From 4588086eeb7e234cd7ceb8a205d76dcaa5179d4a Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:00:37 -0400 Subject: [PATCH 01/15] Plan duplicate elimination work --- ...2026-05-23-eliminate-duplicate-findings.md | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md new file mode 100644 index 00000000..e417350c --- /dev/null +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -0,0 +1,107 @@ +# Duplicate Findings Refactor Plan + +## Baseline Scan + +Generated from the repo-local CLI on branch `elim-dups` after rebuilding `dist`. + +Commands: + +```bash +node ./dist/cli.js duplicates --root . ./src --min-confidence medium --limit 100 --include-same-file +node ./dist/cli.js duplicates --root . . --min-confidence high --limit 200 --include-same-file +``` + +Results: + +- `src` scan: 100 returned groups, 1234 omitted groups, 1669 omitted raw suggestions. +- whole-repo scan: 200 returned groups, 2097 omitted groups, 2995 omitted raw suggestions. +- Whole-repo top results are dominated by test helpers and setup snippets. +- Product-code top results are concentrated in `src/graphs`, `src/cli`, `src/impact`, `src/chunking`, `src/mcp`, and language definitions. + +The grouped output is usable for triage. Remaining caveats: + +- Some chunk findings are sub-ranges of a larger duplicate and should be handled through the larger refactor. +- Some renamed findings compare very different-sized chunks; those are analyzer noise unless a human can identify a clear shared behavior. +- Repeated declarative language-definition shapes are not automatically bad duplication. + +## Refactor Checklist + +### Product Code + +- [ ] Refactor symbol graph renderers. + - Findings: `src/graphs/symbol-render.ts:84-149` and `src/graphs/symbol-render.ts:151-218`. + - Extract shared file-node, symbol-node, and graph-edge collection. + - Keep Mermaid and DOT formatting separate so escaping and syntax remain explicit. + - Add or update renderer tests if output ordering or formatting can change. + +- [ ] Refactor compact graph symbol projection. + - Findings: `src/cli/graph.ts:121-146` and `src/cli/graph.ts:169-194`. + - Extract shared file index, symbol index, symbol array, and sorted symbol edge construction. + - Keep the difference between full compact graph output and symbols-only output visible at the call site. + +- [ ] Share AST range conversion. + - Findings: `src/impact/suggestions.ts:372-385` and `src/util/ast.ts:32-41`. + - Reuse `toRange` from `src/util/ast.ts` or move the non-null conversion into a shared helper. + - Preserve the existing null-node behavior used by current callers. + +- [ ] Share default token counting. + - Findings: `src/chunking/chunkFile.ts:28-31` and `src/chunking/chunkTextFile.ts:21-24`. + - Introduce a small shared tokenizer helper in `src/chunking`. + - Keep public chunking options unchanged. + +- [ ] Consolidate dependency and reverse-dependency wrappers where useful. + - Findings: `src/agent-tools.ts:366-441`, `src/mcp/server.ts:228-246`, and `src/mcp/tools.ts:88-114`. + - Prefer a small result-mapping helper over forcing identical public response shapes. + - Verify both CLI/agent tools and MCP tools still expose `dependencies` and `reverseDependencies` separately. + +- [ ] Revisit CSS and Less language definitions. + - Findings: `src/languages/definitions/css.ts:9-17` and `src/languages/definitions/less.ts:9-17`. + - Extract shared CSS-family structure/query pieces only if it keeps each language definition readable. + - Consider whether Vue/Svelte stylesheet definitions can reuse the same helper without hiding language-specific behavior. + +- [ ] Evaluate JS fallback type duplication. + - Findings: `packages/codegraph-js-fallback/js-fallback.d.ts` and `src/jsFallback.ts`. + - Prefer generating or importing a single declaration source if package boundaries allow it. + - Leave as-is if the duplication is required to keep the fallback package self-contained. + +- [ ] Review smaller wrapper candidates opportunistically. + - `src/cli/artifact.ts`, `src/cli/explain.ts`, and `src/cli/search.ts` command context interfaces. + - `src/cli/projectFile.ts` and `src/session.ts` file-input resolution. + - `src/cli/options.ts` positive and non-negative integer parsers. + - `src/sqlite/canned-query.ts` direct dependencies and dependents queries. + +### Tests + +- [ ] Add a shared temporary directory helper for tests. + - Findings: repeated `mkTmpDir` helpers across dynamic resolution, fast graph edge cases, node modules, resolution precedence, robust fast graph, TS paths workspace, cache, and parsed-cache tests. + - Put it near existing test helpers and migrate only obvious identical helpers first. + +- [ ] Add shared edge-normalization helpers for graph tests. + - Findings: `tests/fast-graph.test.ts`, `tests/monorepo-fast-graph.test.ts`, and related fast graph tests. + - Replace duplicated `normEdge`, `toKey`, and slash normalization only where it improves readability. + +- [ ] Consolidate repeated SQLite/test database setup blocks. + - Findings: repeated chunks in `tests/sqlite.test.ts`, `tests/sql-artifact-graph.test.ts`, and `tests/sql-review-context.test.ts`. + - Extract helpers that describe domain intent, not just line-for-line setup. + +- [ ] Leave intentional fixture repetition alone. + - Repeated sample snippets are often test data, not production maintenance debt. + - Do not refactor setup that would make an individual test harder to read. + +### Analyzer Follow-Ups + +- [ ] Consider a length-ratio guard for high-confidence renamed groups. + - Example noise: large C/C++ query chunks paired with tiny language-definition snippets. + - The detector already reports `lengthRatio`; ranking or confidence can use it more aggressively. + +- [ ] Consider collapsing adjacent same-file chunk findings under a larger group. + - Example: multiple `src/cli/graph.ts` chunk findings are one underlying helper extraction. + - Keep raw variants available through `--raw-pairs`. + +## Verification Plan + +- [ ] Run `npm run build`. +- [ ] Run `npm run lint`. +- [ ] Run focused tests for touched areas. +- [ ] Run `npm test` before pushing a completed refactor batch. +- [ ] Re-run the duplicate analyzer and compare top findings against this baseline. From 44f740b5e8df4b1081d5b9413408949987f680d3 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:02:40 -0400 Subject: [PATCH 02/15] Share chunk token and range helpers --- .../2026-05-23-eliminate-duplicate-findings.md | 4 ++-- src/chunking/chunkFile.ts | 8 ++------ src/chunking/chunkTextFile.ts | 11 ++++------- src/chunking/tokenizer.ts | 4 ++++ src/impact/suggestions.ts | 16 +--------------- 5 files changed, 13 insertions(+), 30 deletions(-) create mode 100644 src/chunking/tokenizer.ts diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index e417350c..6e2e3baa 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -39,12 +39,12 @@ The grouped output is usable for triage. Remaining caveats: - Extract shared file index, symbol index, symbol array, and sorted symbol edge construction. - Keep the difference between full compact graph output and symbols-only output visible at the call site. -- [ ] Share AST range conversion. +- [x] Share AST range conversion. - Findings: `src/impact/suggestions.ts:372-385` and `src/util/ast.ts:32-41`. - Reuse `toRange` from `src/util/ast.ts` or move the non-null conversion into a shared helper. - Preserve the existing null-node behavior used by current callers. -- [ ] Share default token counting. +- [x] Share default token counting. - Findings: `src/chunking/chunkFile.ts:28-31` and `src/chunking/chunkTextFile.ts:21-24`. - Introduce a small shared tokenizer helper in `src/chunking`. - Keep public chunking options unchanged. diff --git a/src/chunking/chunkFile.ts b/src/chunking/chunkFile.ts index bfd99ee4..3c13ff23 100644 --- a/src/chunking/chunkFile.ts +++ b/src/chunking/chunkFile.ts @@ -3,6 +3,7 @@ import { collectChunkBlockGroups } from "./chunkBlocks.js"; import { getChunkMatches } from "./chunkMatches.js"; import { fillGapsWithMiscChunks, mergeSmallChunks } from "./chunkMerge.js"; import { splitLargeBlockSimple, splitLargeBlockUsingInnerBlocks } from "./chunkSplit.js"; +import { countWhitespaceTokens } from "./tokenizer.js"; import type { Chunk, ChunkTokenizer } from "./types.js"; export type { Chunk } from "./types.js"; @@ -25,11 +26,6 @@ export interface ChunkFileOptions { tokenizer?: ChunkTokenizer | undefined; } -function defaultTokenizer(text: string): number { - if (!text.trim()) return 0; - return text.trim().split(/\s+/).length; -} - /** * Splits code into semantic chunks using Tree-sitter queries. * Chunks respect token budgets and preserve semantic boundaries like functions, classes, and methods. @@ -38,7 +34,7 @@ function defaultTokenizer(text: string): number { * @returns Array of semantic chunks */ export function chunkFile(opts: ChunkFileOptions): Chunk[] { - const { language, source, filePath, minTokens = 150, maxTokens = 400, tokenizer = defaultTokenizer } = opts; + const { language, source, filePath, minTokens = 150, maxTokens = 400, tokenizer = countWhitespaceTokens } = opts; const matches = getChunkMatches(language, source, filePath); const newlineOffsets = collectNewlineOffsets(source); const { mainBlocks, innerBlocks, comments } = collectChunkBlockGroups(language, matches); diff --git a/src/chunking/chunkTextFile.ts b/src/chunking/chunkTextFile.ts index 90e4d200..ed72e6ae 100644 --- a/src/chunking/chunkTextFile.ts +++ b/src/chunking/chunkTextFile.ts @@ -1,4 +1,6 @@ import type { Chunk } from "./chunkFile.js"; +import { countWhitespaceTokens } from "./tokenizer.js"; +import type { ChunkTokenizer } from "./types.js"; /** * Options for text file chunking (JSON, YAML, config files, etc.). @@ -15,12 +17,7 @@ export interface TextChunkOptions { /** Maximum tokens per chunk (default: 400). Larger chunks are split. */ maxTokens?: number; /** Custom token counting function (default: whitespace-based) */ - tokenizer?: ((text: string) => number) | undefined; -} - -function defaultTokenizer(text: string): number { - if (!text.trim()) return 0; - return text.trim().split(/\s+/).length; + tokenizer?: ChunkTokenizer | undefined; } /** @@ -31,7 +28,7 @@ function defaultTokenizer(text: string): number { * @returns Array of text chunks */ export function chunkTextFile(opts: TextChunkOptions): Chunk[] { - const { source, filePath, languageId = "text", maxTokens = 400, tokenizer = defaultTokenizer } = opts; + const { source, filePath, languageId = "text", maxTokens = 400, tokenizer = countWhitespaceTokens } = opts; const lines = source.split(/\r?\n/); const chunks: Chunk[] = []; diff --git a/src/chunking/tokenizer.ts b/src/chunking/tokenizer.ts new file mode 100644 index 00000000..bc5fa662 --- /dev/null +++ b/src/chunking/tokenizer.ts @@ -0,0 +1,4 @@ +export function countWhitespaceTokens(text: string): number { + if (!text.trim()) return 0; + return text.trim().split(/\s+/).length; +} diff --git a/src/impact/suggestions.ts b/src/impact/suggestions.ts index 55596e94..6fc74b12 100644 --- a/src/impact/suggestions.ts +++ b/src/impact/suggestions.ts @@ -4,6 +4,7 @@ import { goToDefinition } from "../indexer/navigation.js"; import { ensureParsedContext } from "../indexer/parse-context.js"; import type { LanguageSupport } from "../languages.js"; import type { SyntaxNodeLike, SyntaxTreeLike } from "../languages/types.js"; +import { toRange } from "../util/ast.js"; import { normalizePath, resolveFilePathFromRoot, toProjectRelativePath } from "../util/paths.js"; import { collectChangedLines } from "./hunks.js"; import type { FileChange, ImpactOptions, ImpactSuggestion, ImpactSuggestionConfidence } from "./types.js"; @@ -368,18 +369,3 @@ function pushUniqueSuggestion(output: ImpactSuggestion[], seen: Set, sug seen.add(key); output.push(suggestion); } - -function toRange(node: SyntaxNodeLike): Range { - return { - start: { - line: node.startPosition.row + 1, - column: node.startPosition.column + 1, - index: node.startIndex, - }, - end: { - line: node.endPosition.row + 1, - column: node.endPosition.column + 1, - index: node.endIndex, - }, - }; -} From 9280694c3a4a03a017f74018957f637ca9bc33ad Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:04:17 -0400 Subject: [PATCH 03/15] Share compact symbol projection --- ...2026-05-23-eliminate-duplicate-findings.md | 2 +- src/cli/graph.ts | 59 ++++++++----------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 6e2e3baa..800460ca 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -34,7 +34,7 @@ The grouped output is usable for triage. Remaining caveats: - Keep Mermaid and DOT formatting separate so escaping and syntax remain explicit. - Add or update renderer tests if output ordering or formatting can change. -- [ ] Refactor compact graph symbol projection. +- [x] Refactor compact graph symbol projection. - Findings: `src/cli/graph.ts:121-146` and `src/cli/graph.ts:169-194`. - Extract shared file index, symbol index, symbol array, and sorted symbol edge construction. - Keep the difference between full compact graph output and symbols-only output visible at the call site. diff --git a/src/cli/graph.ts b/src/cli/graph.ts index b0c66e48..a7c19e09 100644 --- a/src/cli/graph.ts +++ b/src/cli/graph.ts @@ -34,6 +34,18 @@ type CompactFileEdge = { typeOnly?: boolean; }; type CompactSymbolEdge = { from: number; to: number; label?: string }; +type CompactSymbolNode = { + id: number; + file: number; + name: string; + kind: string; +}; +type CompactSymbolProjection = { + files: string[]; + symbols: CompactSymbolNode[]; + symbolEdges: CompactSymbolEdge[]; + symbolIdIndex: string[]; +}; type CommandTimingReport = { totalMs?: number; @@ -113,49 +125,26 @@ function compactGraphWithSymbols(fgraph: Graph, sgraph: SymbolGraph, stable = fa }); } - const symbolIds = [...sgraph.nodes.keys()]; - if (stable) symbolIds.sort(); - const symbolIndex = new Map(); - for (let i = 0; i < symbolIds.length; i++) symbolIndex.set(symbolIds[i]!, i); - - const symbols = symbolIds.map((id) => { - const n = sgraph.nodes.get(id)!; - return { - id: symbolIndex.get(id)!, - file: fileIndex.get(n.file)!, - name: n.name, - kind: n.kind, - }; - }); - - const symbolEdges: CompactSymbolEdge[] = sgraph.edges.map((e) => ({ - from: symbolIndex.get(e.from)!, - to: symbolIndex.get(e.to)!, - ...(e.label ? { label: e.label } : {}), - })); - if (stable) { - symbolEdges.sort((a, b) => { - const byFrom = a.from - b.from; - if (byFrom) return byFrom; - const byTo = a.to - b.to; - if (byTo) return byTo; - const al = String(a.label ?? ""); - const bl = String(b.label ?? ""); - if (al !== bl) return al < bl ? -1 : 1; - return 0; - }); - } + const symbolProjection = buildCompactSymbolProjection(files, sgraph, stable); return { files, fileEdges, - symbols, - symbolEdges, - symbolIdIndex: symbolIds, + symbols: symbolProjection.symbols, + symbolEdges: symbolProjection.symbolEdges, + symbolIdIndex: symbolProjection.symbolIdIndex, }; } function compactSymbolsOnly(allFiles: string[], sgraph: SymbolGraph, stable = false) { + return buildCompactSymbolProjection(allFiles, sgraph, stable); +} + +function buildCompactSymbolProjection( + allFiles: string[], + sgraph: SymbolGraph, + stable = false, +): CompactSymbolProjection { const files = [...allFiles]; if (stable) files.sort(); const fileIndex = new Map(); From 6255cb5e21f38b921c328b2997a0c58fe66ef72f Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:07:05 -0400 Subject: [PATCH 04/15] Share symbol render model --- ...2026-05-23-eliminate-duplicate-findings.md | 2 +- src/graphs/symbol-render.ts | 224 +++++++++--------- 2 files changed, 118 insertions(+), 108 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 800460ca..87fffc72 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -28,7 +28,7 @@ The grouped output is usable for triage. Remaining caveats: ### Product Code -- [ ] Refactor symbol graph renderers. +- [x] Refactor symbol graph renderers. - Findings: `src/graphs/symbol-render.ts:84-149` and `src/graphs/symbol-render.ts:151-218`. - Extract shared file-node, symbol-node, and graph-edge collection. - Keep Mermaid and DOT formatting separate so escaping and syntax remain explicit. diff --git a/src/graphs/symbol-render.ts b/src/graphs/symbol-render.ts index 5c0b78b2..92a4f869 100644 --- a/src/graphs/symbol-render.ts +++ b/src/graphs/symbol-render.ts @@ -21,6 +21,29 @@ type SymbolGraphLike = { edges: SymbolEdgeLike[]; }; +type RenderNode = { + id: string; + label: string; +}; + +type FileRenderNode = RenderNode & { + external: boolean; +}; + +type RenderEdge = { + fromId: string; + toId: string; + label?: string; +}; + +type SymbolWithFileRenderModel = { + fileNodes: FileRenderNode[]; + symbolNodes: RenderNode[]; + fileEdges: RenderEdge[]; + fileSymbolEdges: RenderEdge[]; + symbolEdges: RenderEdge[]; +}; + function symbolDisplayLabel(node: SymbolNodeLike, projectRoot?: string): string { const relativeFile = toProjectDisplayPath(projectRoot, node.file); const base = path.basename(relativeFile); @@ -29,6 +52,73 @@ function symbolDisplayLabel(node: SymbolNodeLike, projectRoot?: string): string return `${base}:${node.name}`; } +function buildSymbolWithFileRenderModel( + sg: SymbolGraphLike, + fg: Graph, + projectRoot?: string, +): SymbolWithFileRenderModel { + const fileIdOf = new Map(); + const fileNodeMeta = new Map(); + let fileIndex = 0; + const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); + const ensureFile = (file: string) => { + if (!fileIdOf.has(file)) { + const id = `f${fileIndex++}`; + fileIdOf.set(file, id); + fileNodeMeta.set(id, { label: fileLabel(file), external: false }); + } + }; + const ensureExternal = (name: string) => { + if (!fileIdOf.has(name)) { + const id = `f${fileIndex++}`; + fileIdOf.set(name, id); + fileNodeMeta.set(id, { label: name, external: true }); + } + }; + for (const file of fg.nodes) ensureFile(file); + for (const edge of fg.edges) { + ensureFile(edge.from); + if (edge.to.type === "file") ensureFile(edge.to.path); + else ensureExternal(edge.to.name); + } + + const symbolIdOf = new Map(); + const symbolNodes: RenderNode[] = []; + let symbolIndex = 0; + for (const [id, node] of sg.nodes) { + const symbolId = `s${symbolIndex++}`; + symbolIdOf.set(id, symbolId); + symbolNodes.push({ id: symbolId, label: symbolDisplayLabel(node) }); + } + + const fileNodes: FileRenderNode[] = []; + for (const [id, meta] of fileNodeMeta) { + fileNodes.push({ id, label: meta.label, external: meta.external }); + } + + const fileEdges = fg.edges.map((edge) => { + const fromId = fileIdOf.get(edge.from)!; + const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; + const toId = fileIdOf.get(targetKey)!; + return { fromId, toId }; + }); + + const fileSymbolEdges: RenderEdge[] = []; + for (const [symbolKey, symbolId] of symbolIdOf) { + const node = sg.nodes.get(symbolKey)!; + const fileId = fileIdOf.get(node.file); + if (fileId) fileSymbolEdges.push({ fromId: fileId, toId: symbolId }); + } + + const symbolEdges = sg.edges.map((edge) => ({ + fromId: symbolIdOf.get(edge.from)!, + toId: symbolIdOf.get(edge.to)!, + ...(edge.label ? { label: edge.label } : {}), + })); + + return { fileNodes, symbolNodes, fileEdges, fileSymbolEdges, symbolEdges }; +} + export function graphToMermaidSymbols(sg: SymbolGraphLike, projectRoot?: string): string { const idOf = new Map(); const labels = new Map(); @@ -82,136 +172,56 @@ export function graphToDOTSymbols(sg: SymbolGraphLike, projectRoot?: string): st } export function graphToMermaidSymbolsWithFiles(sg: SymbolGraphLike, fg: Graph, projectRoot?: string): string { - const fileIdOf = new Map(); - const fileNodeMeta = new Map(); - let fileIndex = 0; - const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); - const ensureFile = (file: string) => { - if (!fileIdOf.has(file)) { - const id = `f${fileIndex++}`; - fileIdOf.set(file, id); - fileNodeMeta.set(id, { label: fileLabel(file), external: false }); - } - }; - const ensureExternal = (name: string) => { - if (!fileIdOf.has(name)) { - const id = `f${fileIndex++}`; - fileIdOf.set(name, id); - fileNodeMeta.set(id, { label: name, external: true }); - } - }; - for (const file of fg.nodes) ensureFile(file); - for (const edge of fg.edges) { - ensureFile(edge.from); - if (edge.to.type === "file") ensureFile(edge.to.path); - else ensureExternal(edge.to.name); - } - - const symbolIdOf = new Map(); - const symbolLabels = new Map(); - let symbolIndex = 0; - for (const [id, node] of sg.nodes) { - const symbolId = `s${symbolIndex++}`; - symbolIdOf.set(id, symbolId); - symbolLabels.set(symbolId, symbolDisplayLabel(node)); - } - + const model = buildSymbolWithFileRenderModel(sg, fg, projectRoot); const declared = new Set(); const lines: string[] = ["flowchart LR"]; - for (const [id, meta] of fileNodeMeta) { - if (declared.has(id)) continue; - declared.add(id); - lines.push(meta.external ? `${id}(["${mermaidLabel(meta.label)}"])` : `${id}["${mermaidLabel(meta.label)}"]`); + for (const node of model.fileNodes) { + if (declared.has(node.id)) continue; + declared.add(node.id); + lines.push(node.external ? `${node.id}(["${mermaidLabel(node.label)}"])` : `${node.id}["${mermaidLabel(node.label)}"]`); } - for (const [id, label] of symbolLabels) { - if (declared.has(id)) continue; - declared.add(id); - lines.push(`${id}["${mermaidLabel(label)}"]`); + for (const node of model.symbolNodes) { + if (declared.has(node.id)) continue; + declared.add(node.id); + lines.push(`${node.id}["${mermaidLabel(node.label)}"]`); } - for (const edge of fg.edges) { - const fromId = fileIdOf.get(edge.from)!; - const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; - const toId = fileIdOf.get(targetKey)!; - lines.push(`${fromId} --> ${toId}`); + for (const edge of model.fileEdges) { + lines.push(`${edge.fromId} --> ${edge.toId}`); } - for (const [symbolKey, symbolId] of symbolIdOf) { - const node = sg.nodes.get(symbolKey)!; - const fileId = fileIdOf.get(node.file); - if (fileId) lines.push(`${fileId} --> ${symbolId}`); + for (const edge of model.fileSymbolEdges) { + lines.push(`${edge.fromId} --> ${edge.toId}`); } - for (const edge of sg.edges) { - const fromId = symbolIdOf.get(edge.from)!; - const toId = symbolIdOf.get(edge.to)!; - if (edge.label) lines.push(`${fromId} -- "${mermaidLabel(edge.label)}" --> ${toId}`); - else lines.push(`${fromId} --> ${toId}`); + for (const edge of model.symbolEdges) { + if (edge.label) lines.push(`${edge.fromId} -- "${mermaidLabel(edge.label)}" --> ${edge.toId}`); + else lines.push(`${edge.fromId} --> ${edge.toId}`); } return lines.join("\n"); } export function graphToDOTSymbolsWithFiles(sg: SymbolGraphLike, fg: Graph, projectRoot?: string): string { - const fileIdOf = new Map(); - const fileNodeMeta = new Map(); - let fileIndex = 0; - const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); - const ensureFile = (file: string) => { - if (!fileIdOf.has(file)) { - const id = `f${fileIndex++}`; - fileIdOf.set(file, id); - fileNodeMeta.set(id, { label: fileLabel(file), external: false }); - } - }; - const ensureExternal = (name: string) => { - if (!fileIdOf.has(name)) { - const id = `f${fileIndex++}`; - fileIdOf.set(name, id); - fileNodeMeta.set(id, { label: name, external: true }); - } - }; - for (const file of fg.nodes) ensureFile(file); - for (const edge of fg.edges) { - ensureFile(edge.from); - if (edge.to.type === "file") ensureFile(edge.to.path); - else ensureExternal(edge.to.name); - } - - const symbolIdOf = new Map(); - const symbolLabels = new Map(); - let symbolIndex = 0; - for (const [id, node] of sg.nodes) { - const symbolId = `s${symbolIndex++}`; - symbolIdOf.set(id, symbolId); - symbolLabels.set(symbolId, symbolDisplayLabel(node)); - } - + const model = buildSymbolWithFileRenderModel(sg, fg, projectRoot); const lines: string[] = []; lines.push("digraph G {"); lines.push(" rankdir=LR;"); lines.push(' node [shape=box, fontsize=10, fontname="Arial"];\n'); - for (const [id, meta] of fileNodeMeta) { + for (const node of model.fileNodes) { lines.push( - ` ${id} [label="${dotLabel(meta.label)}", ${meta.external ? "shape=ellipse, style=dashed" : "shape=box"}];`, + ` ${node.id} [label="${dotLabel(node.label)}", ${node.external ? "shape=ellipse, style=dashed" : "shape=box"}];`, ); } - for (const [id, label] of symbolLabels) { - lines.push(` ${id} [label="${dotLabel(label)}"];`); + for (const node of model.symbolNodes) { + lines.push(` ${node.id} [label="${dotLabel(node.label)}"];`); } - for (const edge of fg.edges) { - const fromId = fileIdOf.get(edge.from)!; - const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; - const toId = fileIdOf.get(targetKey)!; - lines.push(` ${fromId} -> ${toId};`); + for (const edge of model.fileEdges) { + lines.push(` ${edge.fromId} -> ${edge.toId};`); } - for (const [symbolKey, symbolId] of symbolIdOf) { - const node = sg.nodes.get(symbolKey)!; - const fileId = fileIdOf.get(node.file); - if (fileId) lines.push(` ${fileId} -> ${symbolId};`); + for (const edge of model.fileSymbolEdges) { + lines.push(` ${edge.fromId} -> ${edge.toId};`); } - for (const edge of sg.edges) { - const fromId = symbolIdOf.get(edge.from)!; - const toId = symbolIdOf.get(edge.to)!; + for (const edge of model.symbolEdges) { const attrs: string[] = []; if (edge.label) attrs.push(`label="${dotLabel(edge.label)}"`); - lines.push(` ${fromId} -> ${toId}${attrs.length ? " [" + attrs.join(",") + "]" : ""};`); + lines.push(` ${edge.fromId} -> ${edge.toId}${attrs.length ? " [" + attrs.join(",") + "]" : ""};`); } lines.push("}"); return lines.join("\n"); From 89b7a35f6c36014b94820ab4ee5993464d600908 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:10:03 -0400 Subject: [PATCH 05/15] Consolidate dependency tool wrappers --- ...2026-05-23-eliminate-duplicate-findings.md | 2 +- src/agent-tools.ts | 92 ++++++++----------- src/mcp/server.ts | 63 +++++++------ src/mcp/tools.ts | 44 ++++----- 4 files changed, 93 insertions(+), 108 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 87fffc72..e8f86bc7 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -49,7 +49,7 @@ The grouped output is usable for triage. Remaining caveats: - Introduce a small shared tokenizer helper in `src/chunking`. - Keep public chunking options unchanged. -- [ ] Consolidate dependency and reverse-dependency wrappers where useful. +- [x] Consolidate dependency and reverse-dependency wrappers where useful. - Findings: `src/agent-tools.ts:366-441`, `src/mcp/server.ts:228-246`, and `src/mcp/tools.ts:88-114`. - Prefer a small result-mapping helper over forcing identical public response shapes. - Verify both CLI/agent tools and MCP tools still expose `dependencies` and `reverseDependencies` separately. diff --git a/src/agent-tools.ts b/src/agent-tools.ts index a87a0541..be1d9150 100644 --- a/src/agent-tools.ts +++ b/src/agent-tools.ts @@ -115,6 +115,24 @@ type ToolDependencyListResult = error: string; reason?: "outside_project_root"; }; +type ToolDependencyOkResult = Extract; +type ToolDependencyFailureResult = Exclude; +type ToolDependenciesResponse = + | { + status: "ok"; + file: string; + dependencies: ToolDependencyEntry[]; + truncated: boolean; + } + | ToolDependencyFailureResult; +type ToolReverseDependenciesResponse = + | { + status: "ok"; + file: string; + dependents: ToolDependencyEntry[]; + truncated: boolean; + } + | ToolDependencyFailureResult; /** File-level hotspot metrics returned by `tool_getHotspots`. */ export type ToolHotspotEntry = { @@ -367,37 +385,16 @@ export async function tool_getDependencies( root: string, filePath: string, options: ToolDependencyOptions = {}, -): Promise< - | { - status: "ok"; - file: string; - dependencies: ToolDependencyEntry[]; - truncated: boolean; - } - | { - status: "not_found"; - file: string; - reason: "file_not_found" | "file_not_indexed"; - error: string; - } - | { - status: "error"; - error: string; - reason?: "outside_project_root"; - } -> { +): Promise { const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => getDependencies(index.graph, file, traversalOptions), ); - if (result.status !== "ok") { - return result; - } - return { + return mapToolDependencyResult(result, (ok) => ({ status: "ok", - file: result.file, - dependencies: result.entries, - truncated: result.truncated, - }; + file: ok.file, + dependencies: ok.entries, + truncated: ok.truncated, + })); } /** @@ -407,37 +404,24 @@ export async function tool_getReverseDependencies( root: string, filePath: string, options: ToolDependencyOptions = {}, -): Promise< - | { - status: "ok"; - file: string; - dependents: ToolDependencyEntry[]; - truncated: boolean; - } - | { - status: "not_found"; - file: string; - reason: "file_not_found" | "file_not_indexed"; - error: string; - } - | { - status: "error"; - error: string; - reason?: "outside_project_root"; - } -> { +): Promise { const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => getReverseDependencies(index.graph, file, traversalOptions), ); - if (result.status !== "ok") { - return result; - } - return { + return mapToolDependencyResult(result, (ok) => ({ status: "ok", - file: result.file, - dependents: result.entries, - truncated: result.truncated, - }; + file: ok.file, + dependents: ok.entries, + truncated: ok.truncated, + })); +} + +function mapToolDependencyResult( + result: ToolDependencyListResult, + buildOkResult: (result: ToolDependencyOkResult) => T, +): T | ToolDependencyFailureResult { + if (result.status !== "ok") return result; + return buildOkResult(result); } async function collectToolDependencyEntries( diff --git a/src/mcp/server.ts b/src/mcp/server.ts index a9402e8b..cbd12c5d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -19,7 +19,7 @@ import { explainCodegraphTargetWithSession } from "../agent/explain.js"; import type { AgentExplanation, AgentExplanationReference } from "../agent/explain.js"; import { searchCodegraphWithSession } from "../agent/search.js"; import type { AgentSearchMode, AgentSearchResponse } from "../agent/search.js"; -import { getDependencies, getReverseDependencies, getShortestPath } from "../graphs/queries.js"; +import { getDependencies, getReverseDependencies, getShortestPath, type DependencyNode } from "../graphs/queries.js"; import { findReferences, goToDefinition } from "../indexer/navigation.js"; import { buildReviewReport, type ReviewDepth, type ReviewReport } from "../review.js"; import { queryGraphSqliteRaw, type RawSqlResult } from "../sqlite.js"; @@ -139,6 +139,17 @@ export type CodegraphMcpHandlers = { }) => Promise; }; +type McpDependencyRequest = { + file: string; + depth?: number | undefined; + limit?: number | undefined; +}; + +type McpDependencyEntry = { + file: string; + depth: number; +}; + const MCP_HTTP_PATH = "/mcp"; const MAX_MCP_HTTP_BODY_BYTES = 1_000_000; @@ -154,6 +165,28 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): if (typeof limit !== "number" || !Number.isFinite(limit)) return fallback; return Math.min(max, Math.max(0, Math.floor(limit))); }; + const collectMcpDependencyEntries = async ( + request: McpDependencyRequest, + collectEntries: ( + graph: Awaited>["fileGraph"], + file: string, + options: { depth?: number; limit: number }, + ) => DependencyNode[], + ): Promise => { + const snapshot = await session.loadProject(); + const queryOptions = { + ...(request.depth !== undefined ? { depth: request.depth } : {}), + limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), + }; + return collectEntries( + snapshot.fileGraph, + await resolveProjectFile(await realRoot, root, request.file), + queryOptions, + ).map((dependency) => ({ + file: relative(dependency.file), + depth: dependency.depth, + })); + }; return { search: async (request) => @@ -224,36 +257,12 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): }, deps: async (request) => { - const snapshot = await session.loadProject(); - const queryOptions = { - ...(request.depth !== undefined ? { depth: request.depth } : {}), - limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), - }; - const dependencies = getDependencies( - snapshot.fileGraph, - await resolveProjectFile(await realRoot, root, request.file), - queryOptions, - ).map((dependency) => ({ - file: relative(dependency.file), - depth: dependency.depth, - })); + const dependencies = await collectMcpDependencyEntries(request, getDependencies); return { dependencies }; }, rdeps: async (request) => { - const snapshot = await session.loadProject(); - const queryOptions = { - ...(request.depth !== undefined ? { depth: request.depth } : {}), - limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), - }; - const reverseDependencies = getReverseDependencies( - snapshot.fileGraph, - await resolveProjectFile(await realRoot, root, request.file), - queryOptions, - ).map((dependency) => ({ - file: relative(dependency.file), - depth: dependency.depth, - })); + const reverseDependencies = await collectMcpDependencyEntries(request, getReverseDependencies); return { reverseDependencies }; }, diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 3c1cbf04..45071890 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -14,6 +14,22 @@ function objectSchema(properties: Record, required: string[] = [ const stringProperty = { type: "string" }; const booleanProperty = { type: "boolean" }; +function dependencyInputSchema(): Tool["inputSchema"] { + return objectSchema( + { + file: stringProperty, + depth: { type: "integer", minimum: 0, default: 1 }, + limit: { + type: "integer", + minimum: 0, + maximum: MAX_MCP_COLLECTION_LIMIT, + default: DEFAULT_MCP_COLLECTION_LIMIT, + }, + }, + ["file"], + ); +} + export const MCP_TOOLS: Tool[] = [ { name: "search", @@ -84,36 +100,12 @@ export const MCP_TOOLS: Tool[] = [ { name: "deps", description: "List file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), + inputSchema: dependencyInputSchema(), }, { name: "rdeps", description: "List reverse file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), + inputSchema: dependencyInputSchema(), }, { name: "path", From 2dc93db376aac7085cfebb223b76264e6da37bf8 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:11:56 -0400 Subject: [PATCH 06/15] Share CSS-like language definition pieces --- ...2026-05-23-eliminate-duplicate-findings.md | 2 +- src/languages/definitions/css.ts | 29 +++------------ src/languages/definitions/cssLike.ts | 35 +++++++++++++++++++ src/languages/definitions/less.ts | 29 +++------------ 4 files changed, 44 insertions(+), 51 deletions(-) create mode 100644 src/languages/definitions/cssLike.ts diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index e8f86bc7..7493a14d 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -54,7 +54,7 @@ The grouped output is usable for triage. Remaining caveats: - Prefer a small result-mapping helper over forcing identical public response shapes. - Verify both CLI/agent tools and MCP tools still expose `dependencies` and `reverseDependencies` separately. -- [ ] Revisit CSS and Less language definitions. +- [x] Revisit CSS and Less language definitions. - Findings: `src/languages/definitions/css.ts:9-17` and `src/languages/definitions/less.ts:9-17`. - Extract shared CSS-family structure/query pieces only if it keeps each language definition readable. - Consider whether Vue/Svelte stylesheet definitions can reuse the same helper without hiding language-specific behavior. diff --git a/src/languages/definitions/css.ts b/src/languages/definitions/css.ts index 116b68d8..f63fca7c 100644 --- a/src/languages/definitions/css.ts +++ b/src/languages/definitions/css.ts @@ -1,35 +1,14 @@ import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { cssLikeGraph, cssLikeNodeTypes, cssLikeStructure } from "./cssLike.js"; export const CSS_DEF: LanguageDefinition = { id: "css", extensions: [".css"], grammar: () => loadTreeSitterLanguage("tree-sitter-css"), - structure: { - blocks: [ - { type: "rule_set", captureId: "rule" }, - { type: "media_statement", captureId: "media" }, - { type: "keyframes_statement", captureId: "keyframes" }, - ], - splitPoints: ["rule_set"], - comments: ["comment", "js_comment"], - }, - graph: { - imports: ` - (import_statement (string_value) @mod) @stmt - `, - exports: "", - locals: ` - (class_selector (class_name) @name) - (id_selector (id_name) @name) - `, - importBindings: ` - (import_statement (string_value) @from) @stmt - `, - }, - nodeTypes: { - identifier: ["class_name", "id_name"], - }, + structure: cssLikeStructure(), + graph: cssLikeGraph(), + nodeTypes: cssLikeNodeTypes(), }; registerLanguage(CSS_DEF); diff --git a/src/languages/definitions/cssLike.ts b/src/languages/definitions/cssLike.ts new file mode 100644 index 00000000..67c6a8c7 --- /dev/null +++ b/src/languages/definitions/cssLike.ts @@ -0,0 +1,35 @@ +import type { LanguageDefinition } from "../types.js"; + +export function cssLikeStructure(): LanguageDefinition["structure"] { + return { + blocks: [ + { type: "rule_set", captureId: "rule" }, + { type: "media_statement", captureId: "media" }, + { type: "keyframes_statement", captureId: "keyframes" }, + ], + splitPoints: ["rule_set"], + comments: ["comment", "js_comment"], + }; +} + +export function cssLikeGraph(): LanguageDefinition["graph"] { + return { + imports: ` + (import_statement (string_value) @mod) @stmt + `, + exports: "", + locals: ` + (class_selector (class_name) @name) + (id_selector (id_name) @name) + `, + importBindings: ` + (import_statement (string_value) @from) @stmt + `, + }; +} + +export function cssLikeNodeTypes(): NonNullable { + return { + identifier: ["class_name", "id_name"], + }; +} diff --git a/src/languages/definitions/less.ts b/src/languages/definitions/less.ts index 8df96c4d..3554b815 100644 --- a/src/languages/definitions/less.ts +++ b/src/languages/definitions/less.ts @@ -1,35 +1,14 @@ import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { cssLikeGraph, cssLikeNodeTypes, cssLikeStructure } from "./cssLike.js"; export const LESS_DEF: LanguageDefinition = { id: "less", extensions: [".less"], grammar: () => loadTreeSitterLanguage("tree-sitter-css"), - structure: { - blocks: [ - { type: "rule_set", captureId: "rule" }, - { type: "media_statement", captureId: "media" }, - { type: "keyframes_statement", captureId: "keyframes" }, - ], - splitPoints: ["rule_set"], - comments: ["comment", "js_comment"], - }, - graph: { - imports: ` - (import_statement (string_value) @mod) @stmt - `, - exports: "", - locals: ` - (class_selector (class_name) @name) - (id_selector (id_name) @name) - `, - importBindings: ` - (import_statement (string_value) @from) @stmt - `, - }, - nodeTypes: { - identifier: ["class_name", "id_name"], - }, + structure: cssLikeStructure(), + graph: cssLikeGraph(), + nodeTypes: cssLikeNodeTypes(), }; registerLanguage(LESS_DEF); From 5b8e7baca66912985829ac66c4d0dc4b82a4d5f3 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:14:42 -0400 Subject: [PATCH 07/15] Share smaller CLI and query helpers --- ...2026-05-23-eliminate-duplicate-findings.md | 6 ++-- src/cli/artifact.ts | 12 ++----- src/cli/context.ts | 11 ++++++ src/cli/explain.ts | 12 ++----- src/cli/search.ts | 12 ++----- src/sqlite/canned-query.ts | 35 ++++++++++--------- 6 files changed, 39 insertions(+), 49 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 7493a14d..7d8697f3 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -59,16 +59,18 @@ The grouped output is usable for triage. Remaining caveats: - Extract shared CSS-family structure/query pieces only if it keeps each language definition readable. - Consider whether Vue/Svelte stylesheet definitions can reuse the same helper without hiding language-specific behavior. -- [ ] Evaluate JS fallback type duplication. +- [x] Evaluate JS fallback type duplication. - Findings: `packages/codegraph-js-fallback/js-fallback.d.ts` and `src/jsFallback.ts`. - Prefer generating or importing a single declaration source if package boundaries allow it. - Leave as-is if the duplication is required to keep the fallback package self-contained. + - Decision: leave as-is because the fallback package publishes a self-contained `js-fallback.d.ts`. -- [ ] Review smaller wrapper candidates opportunistically. +- [x] Review smaller wrapper candidates opportunistically. - `src/cli/artifact.ts`, `src/cli/explain.ts`, and `src/cli/search.ts` command context interfaces. - `src/cli/projectFile.ts` and `src/session.ts` file-input resolution. - `src/cli/options.ts` positive and non-negative integer parsers. - `src/sqlite/canned-query.ts` direct dependencies and dependents queries. + - Decision: extracted the common agent CLI context and canned-query edge loader; left session file resolution and option parsing as-is because their existing helpers already separate concerns. ### Tests diff --git a/src/cli/artifact.ts b/src/cli/artifact.ts index 34b6244d..6448f772 100644 --- a/src/cli/artifact.ts +++ b/src/cli/artifact.ts @@ -1,16 +1,8 @@ import { buildCodegraphArtifact } from "../agent/artifact.js"; +import type { CliAgentCommandContext } from "./context.js"; import { ARTIFACT_HELP_TEXT } from "./help.js"; -export type ArtifactCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type ArtifactCommandContext = CliAgentCommandContext; export async function handleArtifactCommand(context: ArtifactCommandContext): Promise { const artifactCommand = context.positionals[0]; diff --git a/src/cli/context.ts b/src/cli/context.ts index cd80c47d..315d6a73 100644 --- a/src/cli/context.ts +++ b/src/cli/context.ts @@ -71,6 +71,17 @@ export type CliRuntime = { readStdin: () => Promise; }; +export type CliAgentCommandContext = { + positionals: string[]; + root: string; + getOpt: (name: string) => string | undefined; + hasFlag: (name: string) => boolean; + writeJSONLine: (value: unknown) => void; + writeStdoutLine: (message: string) => void; + writeStderrLine: (message: string) => void; + exit: (code: number) => never; +}; + type CliContext = { runtime: CliRuntime; stderrFilePath: string | undefined; diff --git a/src/cli/explain.ts b/src/cli/explain.ts index 471b38e6..0db27b0c 100644 --- a/src/cli/explain.ts +++ b/src/cli/explain.ts @@ -1,17 +1,9 @@ import { explainCodegraphTarget, formatAgentExplanation } from "../agent/explain.js"; +import type { CliAgentCommandContext } from "./context.js"; import { EXPLAIN_HELP_TEXT } from "./help.js"; import { parsePositiveIntegerOption } from "./options.js"; -export type ExplainCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type ExplainCommandContext = CliAgentCommandContext; export async function handleExplainCommand(context: ExplainCommandContext): Promise { const target = context.positionals.join(" ").trim(); diff --git a/src/cli/search.ts b/src/cli/search.ts index 78e6590e..c7fc2e8e 100644 --- a/src/cli/search.ts +++ b/src/cli/search.ts @@ -1,17 +1,9 @@ import { formatAgentSearchResponse, searchCodegraph, type AgentSearchMode } from "../agent/search.js"; +import type { CliAgentCommandContext } from "./context.js"; import { SEARCH_HELP_TEXT } from "./help.js"; import { parsePositiveIntegerOption } from "./options.js"; -export type SearchCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type SearchCommandContext = CliAgentCommandContext; function parseAgentSearchMode(rawValue: string | undefined): AgentSearchMode { if (rawValue === undefined) return "hybrid"; diff --git a/src/sqlite/canned-query.ts b/src/sqlite/canned-query.ts index a8a6f3d7..fd994975 100644 --- a/src/sqlite/canned-query.ts +++ b/src/sqlite/canned-query.ts @@ -4,33 +4,34 @@ import type { GraphQueryResult } from "./types.js"; import { dedupePreservingOrder, execRowsParams, toSqliteText } from "./common.js"; import { withReadOnlySqliteDatabase } from "./database.js"; -const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => - execRowsParams( +type DirectFileEdgeDirection = "dependencies" | "dependents"; + +const loadDirectFileEdges = (db: SqliteDatabase, filePath: string, direction: DirectFileEdgeDirection): string[] => { + let selectedColumn = "to_path"; + let constrainedColumn = "from_path"; + if (direction === "dependents") { + selectedColumn = "from_path"; + constrainedColumn = "to_path"; + } + return execRowsParams( db, ` - SELECT to_path + SELECT ${selectedColumn} FROM file_edges - WHERE to_type = ? AND from_path = ? + WHERE to_type = ? AND ${constrainedColumn} = ? ORDER BY rowid; `, - ["file", fromPath], + ["file", filePath], ) .map((row) => toSqliteText(row[0])) .filter(Boolean); +}; + +const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => + loadDirectFileEdges(db, fromPath, "dependencies"); const loadDirectFileDependents = (db: SqliteDatabase, toPath: string): string[] => - execRowsParams( - db, - ` - SELECT from_path - FROM file_edges - WHERE to_type = ? AND to_path = ? - ORDER BY rowid; - `, - ["file", toPath], - ) - .map((row) => toSqliteText(row[0])) - .filter(Boolean); + loadDirectFileEdges(db, toPath, "dependents"); const bfsFileTraversal = (start: string, loadNeighbors: (file: string) => string[]): string[] => { const visited = new Set(); From 88112007dc3de8419f1f782029ce0322de066071 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:20:33 -0400 Subject: [PATCH 08/15] Share duplicate test helpers --- ...2026-05-23-eliminate-duplicate-findings.md | 8 ++--- tests/dynamic-resolution.test.ts | 14 ++------ tests/fast-graph-edgecases.test.ts | 13 ++----- tests/fast-graph.test.ts | 34 +++++-------------- tests/helpers/filesystem.ts | 12 +++++++ tests/helpers/graph.ts | 15 ++++++++ tests/monorepo-fast-graph.test.ts | 14 ++------ tests/node-modules-and-paths.test.ts | 8 ++--- tests/resolution-precedence.test.ts | 6 +--- tests/robust-fast-graph.test.ts | 24 +++++-------- tests/sql-artifact-graph.test.ts | 32 ++++++++--------- tests/sql-review-context.test.ts | 32 ++++++++--------- tests/sqlite.test.ts | 19 ++++------- tests/ts-paths-workspace.test.ts | 8 ++--- 14 files changed, 99 insertions(+), 140 deletions(-) create mode 100644 tests/helpers/graph.ts diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 7d8697f3..0ec29ba1 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -74,19 +74,19 @@ The grouped output is usable for triage. Remaining caveats: ### Tests -- [ ] Add a shared temporary directory helper for tests. +- [x] Add a shared temporary directory helper for tests. - Findings: repeated `mkTmpDir` helpers across dynamic resolution, fast graph edge cases, node modules, resolution precedence, robust fast graph, TS paths workspace, cache, and parsed-cache tests. - Put it near existing test helpers and migrate only obvious identical helpers first. -- [ ] Add shared edge-normalization helpers for graph tests. +- [x] Add shared edge-normalization helpers for graph tests. - Findings: `tests/fast-graph.test.ts`, `tests/monorepo-fast-graph.test.ts`, and related fast graph tests. - Replace duplicated `normEdge`, `toKey`, and slash normalization only where it improves readability. -- [ ] Consolidate repeated SQLite/test database setup blocks. +- [x] Consolidate repeated SQLite/test database setup blocks. - Findings: repeated chunks in `tests/sqlite.test.ts`, `tests/sql-artifact-graph.test.ts`, and `tests/sql-review-context.test.ts`. - Extract helpers that describe domain intent, not just line-for-line setup. -- [ ] Leave intentional fixture repetition alone. +- [x] Leave intentional fixture repetition alone. - Repeated sample snippets are often test data, not production maintenance debt. - Do not refactor setup that would make an individual test harder to read. diff --git a/tests/dynamic-resolution.test.ts b/tests/dynamic-resolution.test.ts index 192d3cd4..d6e835e4 100644 --- a/tests/dynamic-resolution.test.ts +++ b/tests/dynamic-resolution.test.ts @@ -1,16 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function normalizeFile(p: string): string { - return p.replace(/\\/g, "/"); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("Dynamic resolution heuristics", () => { it("resolves path.join(__dirname, ...) when enabled", async () => { @@ -25,7 +17,7 @@ describe("Dynamic resolution heuristics", () => { await fsp.writeFile(mainPath, source, "utf8"); await fsp.writeFile(utilPath, "module.exports = {};", "utf8"); - const graph = await collectGraph(root, [normalizeFile(mainPath), normalizeFile(utilPath)], { + const graph = await collectGraph(root, [normalizeTestPath(mainPath), normalizeTestPath(utilPath)], { dynamicImportHeuristics: true, }); @@ -48,7 +40,7 @@ describe("Dynamic resolution heuristics", () => { await fsp.writeFile(mainPath, `import Button from "components/button";\nconsole.log(Button);\n`, "utf8"); await fsp.writeFile(buttonPath, "export default function Button() {}", "utf8"); - const graph = await collectGraph(root, [normalizeFile(mainPath), normalizeFile(buttonPath)], { + const graph = await collectGraph(root, [normalizeTestPath(mainPath), normalizeTestPath(buttonPath)], { resolutionHints: ["src"], }); diff --git a/tests/fast-graph-edgecases.test.ts b/tests/fast-graph-edgecases.test.ts index 968654fb..d2ac3588 100644 --- a/tests/fast-graph-edgecases.test.ts +++ b/tests/fast-graph-edgecases.test.ts @@ -1,16 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function edgeFrom(file: string) { - return (e: { from: string }) => e.from.replace(/\\/g, "/") === file.replace(/\\/g, "/"); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; +import { edgeFrom } from "./helpers/graph.js"; describe("Fast graph edge cases", () => { it("detects type-only import with typeOnly=true (TS)", async () => { @@ -21,7 +14,7 @@ describe("Fast graph edge cases", () => { const mainPath = path.join(root, "main.ts"); await fsp.writeFile(utilPath, util, "utf8"); await fsp.writeFile(mainPath, main, "utf8"); - const files = [mainPath.replace(/\\/g, "/"), utilPath.replace(/\\/g, "/")]; + const files = [normalizeTestPath(mainPath), normalizeTestPath(utilPath)]; const gNormal = await collectGraph(root, files); const gFast = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); diff --git a/tests/fast-graph.test.ts b/tests/fast-graph.test.ts index 2fdc5baf..eec56846 100644 --- a/tests/fast-graph.test.ts +++ b/tests/fast-graph.test.ts @@ -1,16 +1,12 @@ import { describe, it, expect, vi, afterEach } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph, type Edge } from "../src/index.js"; import * as nativeRuntime from "../src/native/treeSitterNative.js"; +import { mkTmpDir } from "./helpers/filesystem.js"; +import { graphEdgeKey } from "./helpers/graph.js"; import { getSamplePath } from "./test-utils.js"; -function normEdge(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return { from: e.from.replace(/\\/g, "/"), to: toStr(e.to).replace(/\\/g, "/"), raw: e.raw, typeOnly: !!e.typeOnly }; -} - describe("Fast graph specifier extraction (--fast-graph)", () => { afterEach(() => { vi.restoreAllMocks(); @@ -26,12 +22,8 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); @@ -47,12 +39,8 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); it("matches regular graph edges for Python samples", async () => { @@ -66,17 +54,13 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); it("can miss multiline import edges that full parsing captures", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "dg-fast-graph-")); + const root = await mkTmpDir("dg-fast-graph-"); const entryPath = path.join(root, "entry.ts").replace(/\\/g, "/"); const depPath = path.join(root, "dep.ts").replace(/\\/g, "/"); await fsp.writeFile(entryPath, `import {\n value\n} from './dep';\n\nconsole.log(value);\n`, "utf8"); diff --git a/tests/helpers/filesystem.ts b/tests/helpers/filesystem.ts index 3a4a7fa8..70c341b3 100644 --- a/tests/helpers/filesystem.ts +++ b/tests/helpers/filesystem.ts @@ -1,3 +1,11 @@ +import path from "node:path"; +import os from "node:os"; +import fsp from "node:fs/promises"; + +export async function mkTmpDir(prefix: string): Promise { + return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); +} + export function isSymlinkUnavailable(error: unknown): boolean { return ( error instanceof Error && @@ -5,3 +13,7 @@ export function isSymlinkUnavailable(error: unknown): boolean { (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") ); } + +export function normalizeTestPath(filePath: string): string { + return filePath.replace(/\\/g, "/"); +} diff --git a/tests/helpers/graph.ts b/tests/helpers/graph.ts new file mode 100644 index 00000000..816044c9 --- /dev/null +++ b/tests/helpers/graph.ts @@ -0,0 +1,15 @@ +import type { Edge } from "../../src/index.js"; +import { normalizeTestPath } from "./filesystem.js"; + +export function graphEdgeTargetKey(to: Edge["to"]): string { + if (to.type === "file") return to.path; + return to.name; +} + +export function graphEdgeKey(edge: Edge): string { + return `${edge.from}|${graphEdgeTargetKey(edge.to)}|${edge.raw}`; +} + +export function edgeFrom(filePath: string): (edge: Pick) => boolean { + return (edge) => normalizeTestPath(edge.from) === normalizeTestPath(filePath); +} diff --git a/tests/monorepo-fast-graph.test.ts b/tests/monorepo-fast-graph.test.ts index 49e14647..ede6a5b4 100644 --- a/tests/monorepo-fast-graph.test.ts +++ b/tests/monorepo-fast-graph.test.ts @@ -1,11 +1,7 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; import { collectGraph } from "../src/index.js"; - -function normEdge(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return { from: e.from.replace(/\\/g, "/"), to: toStr(e.to).replace(/\\/g, "/"), raw: e.raw, typeOnly: !!e.typeOnly }; -} +import { graphEdgeKey } from "./helpers/graph.js"; describe("Monorepo fast graph parity", () => { it("fast mode matches normal mode in monorepo sample", async () => { @@ -17,12 +13,8 @@ describe("Monorepo fast graph parity", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); // Ensure workspace package edge is resolved to a file diff --git a/tests/node-modules-and-paths.test.ts b/tests/node-modules-and-paths.test.ts index e24ccda9..7c2c8bcd 100644 --- a/tests/node-modules-and-paths.test.ts +++ b/tests/node-modules-and-paths.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("Node modules resolution (opt-in) and path normalization", () => { it("treats packages as external by default; resolves to file with flag", async () => { @@ -17,7 +13,7 @@ describe("Node modules resolution (opt-in) and path normalization", () => { await fsp.writeFile(path.join(nm, "package.json"), '{"name":"my-pkg","main":"index.js"}', "utf8"); const main = path.join(root, "main.js"); await fsp.writeFile(main, 'import "my-pkg";\n', "utf8"); - const files = [main].map((f) => f.replace(/\\/g, "/")); + const files = [main].map(normalizeTestPath); const g1 = await collectGraph(root, files); expect(g1.edges.some((e) => e.raw === "my-pkg" && e.to.type === "external")).toBe(true); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { resolveNodeModules: true }); diff --git a/tests/resolution-precedence.test.ts b/tests/resolution-precedence.test.ts index 401c7d12..3377501d 100644 --- a/tests/resolution-precedence.test.ts +++ b/tests/resolution-precedence.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Resolution precedence", () => { it("prefers tsconfig paths over workspace package names", async () => { diff --git a/tests/robust-fast-graph.test.ts b/tests/robust-fast-graph.test.ts index bf7c9c7f..42474aa7 100644 --- a/tests/robust-fast-graph.test.ts +++ b/tests/robust-fast-graph.test.ts @@ -1,24 +1,16 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function toKey(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return `${e.from}|${toStr(e.to)}|${e.raw}`; -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; +import { graphEdgeKey } from "./helpers/graph.js"; describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { it("ignores require inside string literal", async () => { const root = await mkTmpDir("dg-fast-strings-"); const file = path.join(root, "a.ts"); await fsp.writeFile(file, `const s = 'require("x")';\n`, "utf8"); - const g = await (await import("../src/graphs.js")).collectGraph(root, [file.replace(/\\/g, "/")], { fast: true }); + const g = await (await import("../src/graphs.js")).collectGraph(root, [normalizeTestPath(file)], { fast: true }); expect(g.edges.length).toBe(0); }); @@ -26,7 +18,7 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const root = await mkTmpDir("dg-fast-templates-"); const file = path.join(root, "a.ts"); await fsp.writeFile(file, "const s = `import('x')`\n", "utf8"); - const g = await (await import("../src/graphs.js")).collectGraph(root, [file.replace(/\\/g, "/")], { fast: true }); + const g = await (await import("../src/graphs.js")).collectGraph(root, [normalizeTestPath(file)], { fast: true }); expect(g.edges.length).toBe(0); }); @@ -36,9 +28,9 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const main = path.join(root, "main.js"); await fsp.writeFile(a, "export const x = 1;\n", "utf8"); await fsp.writeFile(main, "async function run(){ await import('./a.js'); }\n", "utf8"); - const files = [main, a].map((f) => f.replace(/\\/g, "/")); + const files = [main, a].map(normalizeTestPath); const g = await collectGraph(root, files); - const keys = new Set(g.edges.map(toKey)); + const keys = new Set(g.edges.map(graphEdgeKey)); expect([...keys].some((k) => k.includes("main.js") && k.includes("./a.js"))).toBe(true); }); @@ -48,9 +40,9 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const main = path.join(root, "main.js"); await fsp.writeFile(a, "exports.helper = () => 1;\n", "utf8"); await fsp.writeFile(main, "const { helper: h } = require('./a');\n", "utf8"); - const files = [main, a].map((f) => f.replace(/\\/g, "/")); + const files = [main, a].map(normalizeTestPath); const g = await collectGraph(root, files); - const keys = new Set(g.edges.map(toKey)); + const keys = new Set(g.edges.map(graphEdgeKey)); expect([...keys].some((k) => k.includes("main.js") && k.includes("./a"))).toBe(true); }); }); diff --git a/tests/sql-artifact-graph.test.ts b/tests/sql-artifact-graph.test.ts index 3106a6ca..6fd8bcc7 100644 --- a/tests/sql-artifact-graph.test.ts +++ b/tests/sql-artifact-graph.test.ts @@ -1,5 +1,4 @@ import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { collectGraph } from "../src/index.js"; @@ -8,6 +7,7 @@ import { buildSqlFactCache, buildSqlModuleIndex, collectSqlEdgesForFile } from " import { SymbolKind } from "../src/indexer/types.js"; import { computeFileSymbolHashes } from "../src/util/symbolHash.js"; import type { GraphCacheEntry } from "../src/graphs/types.js"; +import { mkTmpDir } from "./helpers/filesystem.js"; const fixtureRoot = path.resolve(process.cwd(), "tests", "samples", "sql", "graph"); const sqlFiles = ["001_create_users.sql", "002_alter_users.sql", "report.sql"].map((file) => @@ -49,7 +49,7 @@ describe("SQL artifact graph", () => { }); it("adds SQL-to-SQL edges for read dependencies inside write statements", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-dml-graph-")); + const root = await mkTmpDir("cg-sql-dml-graph-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const pipelineFile = path.join(root, "pipeline.sql").replace(/\\/g, "/"); @@ -101,7 +101,7 @@ describe("SQL artifact graph", () => { }); it("links schema-qualified SQL references to unqualified object definitions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-qualified-edge-")); + const root = await mkTmpDir("cg-sql-qualified-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const publicSchemaFile = path.join(root, "public_schema.sql").replace(/\\/g, "/"); @@ -141,7 +141,7 @@ describe("SQL artifact graph", () => { }); it("emits heuristic SQL edges for basename fallback matches", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-fallback-edge-")); + const root = await mkTmpDir("cg-sql-fallback-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -165,7 +165,7 @@ describe("SQL artifact graph", () => { }); it("links unqualified SQL references to a single schema-qualified object definition", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-unqualified-qualified-edge-")); + const root = await mkTmpDir("cg-sql-unqualified-qualified-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -189,7 +189,7 @@ describe("SQL artifact graph", () => { }); it("does not guess ambiguous SQL edges from unqualified references to schema-qualified definitions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-ambiguous-unqualified-edge-")); + const root = await mkTmpDir("cg-sql-ambiguous-unqualified-edge-"); try { const publicSchemaFile = path.join(root, "public_schema.sql").replace(/\\/g, "/"); const archiveSchemaFile = path.join(root, "archive_schema.sql").replace(/\\/g, "/"); @@ -211,7 +211,7 @@ describe("SQL artifact graph", () => { }); it("does not emit duplicate join edges for the primary FROM object", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-join-primary-edge-")); + const root = await mkTmpDir("cg-sql-join-primary-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -249,7 +249,7 @@ describe("SQL artifact graph", () => { }); it("does not emit SQL dependency self-edges inside a single SQL file", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-self-edge-")); + const root = await mkTmpDir("cg-sql-self-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); await fsp.writeFile(schemaFile, "CREATE TABLE users (id integer);\nSELECT id FROM users;\n", "utf8"); @@ -267,7 +267,7 @@ describe("SQL artifact graph", () => { }); it("links SQL rename statements to the source definition without targeting the new name", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-rename-edge-")); + const root = await mkTmpDir("cg-sql-rename-edge-"); try { const oldSchemaFile = path.join(root, "old_schema.sql").replace(/\\/g, "/"); const unrelatedNewSchemaFile = path.join(root, "unrelated_new_schema.sql").replace(/\\/g, "/"); @@ -337,7 +337,7 @@ describe("SQL artifact graph", () => { }); it("includes related SQL objects in artifact graph candidate mentions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-related-artifacts-")); + const root = await mkTmpDir("cg-sql-related-artifacts-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); await fsp.writeFile( @@ -376,7 +376,7 @@ describe("SQL artifact graph", () => { }); it("reuses SQL facts across per-file edge collection", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-cache-")); + const root = await mkTmpDir("cg-sql-cache-"); const files: string[] = []; try { for (let index = 0; index < 12; index += 1) { @@ -403,7 +403,7 @@ describe("SQL artifact graph", () => { }); it("bounds concurrent SQL fact reads when building the corpus cache", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-cache-concurrency-")); + const root = await mkTmpDir("cg-sql-cache-concurrency-"); const files: string[] = []; try { for (let index = 0; index < 40; index += 1) { @@ -440,7 +440,7 @@ describe("SQL artifact graph", () => { }); it("reuses SQL edge cache entries when the SQL corpus signature is unchanged", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-edge-cache-reuse-")); + const root = await mkTmpDir("cg-sql-edge-cache-reuse-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -502,7 +502,7 @@ describe("SQL artifact graph", () => { }); it("rejects stale SQL edge cache entries when another SQL file changes", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-edge-cache-invalidate-")); + const root = await mkTmpDir("cg-sql-edge-cache-invalidate-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -546,7 +546,7 @@ describe("SQL artifact graph", () => { }); it("recomputes unchanged SQL query edges when changed SQL definitions add targets", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-incremental-add-")); + const root = await mkTmpDir("cg-sql-incremental-add-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -581,7 +581,7 @@ describe("SQL artifact graph", () => { }); it("removes unchanged SQL query edges when changed SQL definitions drop targets", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-incremental-remove-")); + const root = await mkTmpDir("cg-sql-incremental-remove-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); diff --git a/tests/sql-review-context.test.ts b/tests/sql-review-context.test.ts index 67843a43..6b6948e2 100644 --- a/tests/sql-review-context.test.ts +++ b/tests/sql-review-context.test.ts @@ -1,13 +1,9 @@ import fs from "node:fs/promises"; -import os from "node:os"; import path from "node:path"; import { describe, expect, it, vi } from "vitest"; import { buildReviewReport } from "../src/index.js"; import { collectSqlReviewContext } from "../src/sql/index.js"; - -async function mkTmpDir(): Promise { - return await fs.mkdtemp(path.join(os.tmpdir(), "cg-sql-review-")); -} +import { mkTmpDir } from "./helpers/filesystem.js"; async function rmTmpDir(root: string): Promise { await fs.rm(root, { recursive: true, force: true }); @@ -21,7 +17,7 @@ async function writeFile(filePath: string, contents: string): Promise { describe("SQL review context", () => { it("surfaces SQL candidates for changed code SQL literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const repo = await writeFile( @@ -44,7 +40,7 @@ describe("SQL review context", () => { }); it("includes changed SQL files without creating source dependency impact", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const migration = await writeFile( path.join(root, "db", "migrations", "20190101000000_legacy.sql"), @@ -68,7 +64,7 @@ describe("SQL review context", () => { }); it("keeps seed SQL visible only when the seed file changes", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const seed = await writeFile(path.join(root, "db", "seeds", "users.sql"), "INSERT INTO users (id) VALUES (1);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), "export const ok = true;\n"); @@ -90,7 +86,7 @@ describe("SQL review context", () => { }); it("does not surface stale SQL for unrelated code-only reviews", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile( path.join(root, "db", "migrations", "20190101000000_legacy.sql"), @@ -108,7 +104,7 @@ describe("SQL review context", () => { }); it("skips the SQL corpus when changed code has no SQL-like text", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), "export const count = 1;\n"); @@ -126,7 +122,7 @@ describe("SQL review context", () => { }); it("does not treat ES module imports as SQL literal hints", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'import users from "./users";\n'); @@ -144,7 +140,7 @@ describe("SQL review context", () => { }); it("does not treat a bare with token as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'export const label = "created with care";\n'); @@ -162,7 +158,7 @@ describe("SQL review context", () => { }); it("does not treat a bare update phrase as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'export const label = "update x";\n'); @@ -180,7 +176,7 @@ describe("SQL review context", () => { }); it("does not treat a bare select identifier as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile( @@ -201,7 +197,7 @@ describe("SQL review context", () => { }); it("uses provided project files when matching code SQL literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "userRepo.ts"), "export const query = `SELECT * FROM users`;\n"); @@ -216,7 +212,7 @@ describe("SQL review context", () => { }); it("keeps full SQL discovery for sparse reviews that include a changed SQL file", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const migration = await writeFile(path.join(root, "db", "migrations", "20260513000000_orders.sql"), [ @@ -247,7 +243,7 @@ describe("SQL review context", () => { }); it("bounds concurrent SQL fact reads when matching changed code literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { for (let index = 0; index < 40; index += 1) { await writeFile(path.join(root, "db", `schema_${index}.sql`), `CREATE TABLE table_${index} (id integer);\n`); @@ -287,7 +283,7 @@ describe("SQL review context", () => { }); it("surfaces SQL candidates for changed code update literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile( diff --git a/tests/sqlite.test.ts b/tests/sqlite.test.ts index 55d6a28a..bcaf7a1e 100644 --- a/tests/sqlite.test.ts +++ b/tests/sqlite.test.ts @@ -1,6 +1,5 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { DatabaseSync } from "node:sqlite"; import { @@ -11,11 +10,7 @@ import { queryGraphSqlite, queryGraphSqliteRaw, } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - const dir = await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); - return dir; -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; const dbQuery = (db: DatabaseSync, sql: string): string[] => { const stmt = db.prepare(sql); @@ -326,20 +321,20 @@ export const run = () => helper(); fileGraph: nextIndex.graph, symbolGraph: nextSgraph, outputPath: dbPath, - changedFiles: [path.join(root, "main.ts").replace(/\\/g, "/")], - deletedFiles: [path.join(root, "util.ts").replace(/\\/g, "/")], + changedFiles: [normalizeTestPath(path.join(root, "main.ts"))], + deletedFiles: [normalizeTestPath(path.join(root, "util.ts"))], fullGraphSync: true, }); const db = new DatabaseSync(dbPath); const utilFiles = dbQuery( db, - `SELECT path FROM files WHERE path = '${path.join(root, "util.ts").replace(/\\/g, "/")}';`, + `SELECT path FROM files WHERE path = '${normalizeTestPath(path.join(root, "util.ts"))}';`, ); const utilSymbols = dbQuery(db, "SELECT name FROM symbols WHERE name = 'helper';"); const staleEdges = dbQuery( db, - `SELECT to_path FROM file_edges WHERE to_path = '${path.join(root, "util.ts").replace(/\\/g, "/")}';`, + `SELECT to_path FROM file_edges WHERE to_path = '${normalizeTestPath(path.join(root, "util.ts"))}';`, ); expect(utilFiles).toEqual([]); @@ -481,8 +476,8 @@ export const run = () => helper(); fileGraph: nextIndex.graph, symbolGraph: nextSgraph, outputPath: dbPath, - changedFiles: [path.join(root, "main.ts").replace(/\\/g, "/")], - deletedFiles: [path.join(root, "util.ts").replace(/\\/g, "/")], + changedFiles: [normalizeTestPath(path.join(root, "main.ts"))], + deletedFiles: [normalizeTestPath(path.join(root, "util.ts"))], fullGraphSync: true, }); diff --git a/tests/ts-paths-workspace.test.ts b/tests/ts-paths-workspace.test.ts index 496cdb49..ae181baa 100644 --- a/tests/ts-paths-workspace.test.ts +++ b/tests/ts-paths-workspace.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("TypeScript paths/baseUrl resolution via tsconfig", () => { it("resolves @lib/util to lib/util.ts using tsconfig paths", async () => { @@ -28,7 +24,7 @@ describe("TypeScript paths/baseUrl resolution via tsconfig", () => { const main = path.join(root, "main.ts"); await fsp.writeFile(util, "export const fn = () => 1;\n", "utf8"); await fsp.writeFile(main, "import { fn } from '@lib/util';\nconst x = fn();\n", "utf8"); - const files = [util, main].map((f) => f.replace(/\\/g, "/")); + const files = [util, main].map(normalizeTestPath); const g = await collectGraph(root, files); expect( g.edges.some( From b0304685fd3cfb9ec4afe93fd5e7cfbf768002a4 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:29:52 -0400 Subject: [PATCH 09/15] Refine duplicate grouping and remaining test helpers --- ...2026-05-23-eliminate-duplicate-findings.md | 11 +++++-- src/duplicates.ts | 33 ++++++++++++++++--- tests/disk-cache-sqlite.test.ts | 6 +--- tests/parsed-cache-eviction.test.ts | 6 +--- tests/parsed-cache-reuse.test.ts | 6 +--- tests/symbols-detailed-edgecases.test.ts | 6 +--- 6 files changed, 42 insertions(+), 26 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 0ec29ba1..0975b82e 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -18,6 +18,13 @@ Results: - Whole-repo top results are dominated by test helpers and setup snippets. - Product-code top results are concentrated in `src/graphs`, `src/cli`, `src/impact`, `src/chunking`, `src/mcp`, and language definitions. +Post-refactor comparison: + +- `src` scan: 100 returned groups, 869 omitted groups, 1611 omitted raw suggestions. +- whole-repo scan: 200 returned groups, 899 omitted groups, 2904 omitted raw suggestions. +- The previous `src/graphs/symbol-render.ts`, `src/cli/graph.ts`, chunk tokenizer, AST range, CSS/LESS, and first-pass test helper findings dropped out of the top product-code results. +- The analyzer no longer ranks the large C/C++ query chunk against tiny language snippets as a top medium/high finding. + The grouped output is usable for triage. Remaining caveats: - Some chunk findings are sub-ranges of a larger duplicate and should be handled through the larger refactor. @@ -92,11 +99,11 @@ The grouped output is usable for triage. Remaining caveats: ### Analyzer Follow-Ups -- [ ] Consider a length-ratio guard for high-confidence renamed groups. +- [x] Consider a length-ratio guard for high-confidence renamed groups. - Example noise: large C/C++ query chunks paired with tiny language-definition snippets. - The detector already reports `lengthRatio`; ranking or confidence can use it more aggressively. -- [ ] Consider collapsing adjacent same-file chunk findings under a larger group. +- [x] Consider collapsing adjacent same-file chunk findings under a larger group. - Example: multiple `src/cli/graph.ts` chunk findings are one underlying helper extraction. - Keep raw variants available through `--raw-pairs`. diff --git a/src/duplicates.ts b/src/duplicates.ts index e8a99722..5699d329 100644 --- a/src/duplicates.ts +++ b/src/duplicates.ts @@ -140,6 +140,8 @@ const DEFAULT_GROUP_VARIANT_LIMIT = 5; const DEFAULT_SHINGLE_SIZE = 5; const DEFAULT_WINDOW_SIZE = 4; const DEFAULT_MAX_FINGERPRINTS = 128; +const GROUP_PRIMARY_LENGTH_RATIO_FLOOR = 0.7; +const NEARBY_CHUNK_VARIANT_MAX_GAP = 2; const textLanguageByExtension: Record = { ".json": "json", @@ -334,6 +336,18 @@ function rangesSubstantiallyOverlap(left: DuplicateUnitRef, right: DuplicateUnit return overlap / Math.min(lineSpan(left), lineSpan(right)) >= 0.8; } +function lineGap(left: DuplicateUnitRef, right: DuplicateUnitRef): number { + if (left.endLine < right.startLine) return right.startLine - left.endLine - 1; + if (right.endLine < left.startLine) return left.startLine - right.endLine - 1; + return 0; +} + +function rangesAreNearbyChunkVariants(left: DuplicateUnitRef, right: DuplicateUnitRef): boolean { + if (left.file !== right.file || left.languageId !== right.languageId) return false; + if (left.kind !== "chunk" || right.kind !== "chunk") return false; + return lineGap(left, right) <= NEARBY_CHUNK_VARIANT_MAX_GAP; +} + function ratio(left: number, right: number): number { if (!left || !right) return 0; return Math.min(left, right) / Math.max(left, right); @@ -952,8 +966,10 @@ function createUnitClusters(refs: readonly DuplicateUnitRef[]): Map left.ref.endLine) break; - if (rangesSubstantiallyOverlap(left.ref, right.ref)) union(left.key, right.key); + if (right.ref.startLine > left.ref.endLine + NEARBY_CHUNK_VARIANT_MAX_GAP + 1) break; + if (rangesSubstantiallyOverlap(left.ref, right.ref) || rangesAreNearbyChunkVariants(left.ref, right.ref)) { + union(left.key, right.key); + } } } } @@ -1009,6 +1025,13 @@ function groupForSuggestions( confidence = bestConfidence(confidence, suggestion.confidence); cloneType = bestCloneType(cloneType, suggestion.cloneType); } + let reasons = mergeReasons(suggestions); + const primaryLengthRatio = ratio(left.primary.tokenCount, right.primary.tokenCount); + if (primaryLengthRatio < GROUP_PRIMARY_LENGTH_RATIO_FLOOR) { + score = Math.min(score, 64); + confidence = "low"; + reasons = Array.from(new Set([...reasons, "different-sized grouped units"])).sort(); + } return { id: shortHashText(key), score, @@ -1021,7 +1044,7 @@ function groupForSuggestions( rawPairCount: suggestions.length, omittedVariantCount: Math.max(0, suggestions.length - variants.length), metrics: primary.metrics, - reasons: mergeReasons(suggestions), + reasons, }; } @@ -1120,7 +1143,9 @@ export async function findDuplicates( suggestions.sort(compareSuggestions); const includeRawPairs = options.includeRawPairs ?? false; - const groups = groupSuggestions(suggestions, includeRawPairs); + const groups = groupSuggestions(suggestions, includeRawPairs).filter( + (group) => confidenceRank[group.confidence] >= confidenceRank[minConfidence], + ); const limitedGroups = groups.slice(0, limit); const omittedGroups = Math.max(0, groups.length - limitedGroups.length); const limitedRawSuggestions = includeRawPairs ? suggestions.slice(0, limit) : []; diff --git a/tests/disk-cache-sqlite.test.ts b/tests/disk-cache-sqlite.test.ts index dccc192b..ac96deb3 100644 --- a/tests/disk-cache-sqlite.test.ts +++ b/tests/disk-cache-sqlite.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex, type BuildReport } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("disk cache uses sqlite backend", () => { it("persists module cache in sqlite and reuses entries", async () => { diff --git a/tests/parsed-cache-eviction.test.ts b/tests/parsed-cache-eviction.test.ts index 2f002d18..5de8fd98 100644 --- a/tests/parsed-cache-eviction.test.ts +++ b/tests/parsed-cache-eviction.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("parsed AST cache eviction", () => { it("limits parsed entries to configured max", async () => { diff --git a/tests/parsed-cache-reuse.test.ts b/tests/parsed-cache-reuse.test.ts index 8c710305..95966170 100644 --- a/tests/parsed-cache-reuse.test.ts +++ b/tests/parsed-cache-reuse.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex, type BuildReport } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Parsed cache reuse", () => { it("reuses cached parse outputs on the second build", async () => { diff --git a/tests/symbols-detailed-edgecases.test.ts b/tests/symbols-detailed-edgecases.test.ts index 4a4b5dd3..c7c3dd99 100644 --- a/tests/symbols-detailed-edgecases.test.ts +++ b/tests/symbols-detailed-edgecases.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Symbols-detailed edge cases", () => { it("membersOnly keeps namespace member edges but drops direct alias uses", async () => { From 7f2fcbe2d6494b8fa2731e0d1cdb799f5577d736 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:33:01 -0400 Subject: [PATCH 10/15] Mark duplicate refactor plan complete --- .../plans/2026-05-23-eliminate-duplicate-findings.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 0975b82e..2126a5db 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -109,8 +109,8 @@ The grouped output is usable for triage. Remaining caveats: ## Verification Plan -- [ ] Run `npm run build`. -- [ ] Run `npm run lint`. -- [ ] Run focused tests for touched areas. -- [ ] Run `npm test` before pushing a completed refactor batch. -- [ ] Re-run the duplicate analyzer and compare top findings against this baseline. +- [x] Run `npm run build`. +- [x] Run `npm run lint`. +- [x] Run focused tests for touched areas. +- [x] Run `npm test` before pushing a completed refactor batch. +- [x] Re-run the duplicate analyzer and compare top findings against this baseline. From 918c66048044a468ff3c3d60d126c5e475fadb42 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 22:35:37 -0400 Subject: [PATCH 11/15] Use static canned query SQL --- src/sqlite/canned-query.ts | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/sqlite/canned-query.ts b/src/sqlite/canned-query.ts index fd994975..527a0466 100644 --- a/src/sqlite/canned-query.ts +++ b/src/sqlite/canned-query.ts @@ -7,22 +7,21 @@ import { withReadOnlySqliteDatabase } from "./database.js"; type DirectFileEdgeDirection = "dependencies" | "dependents"; const loadDirectFileEdges = (db: SqliteDatabase, filePath: string, direction: DirectFileEdgeDirection): string[] => { - let selectedColumn = "to_path"; - let constrainedColumn = "from_path"; + let sql = ` + SELECT to_path + FROM file_edges + WHERE to_type = ? AND from_path = ? + ORDER BY rowid; + `; if (direction === "dependents") { - selectedColumn = "from_path"; - constrainedColumn = "to_path"; - } - return execRowsParams( - db, - ` - SELECT ${selectedColumn} + sql = ` + SELECT from_path FROM file_edges - WHERE to_type = ? AND ${constrainedColumn} = ? + WHERE to_type = ? AND to_path = ? ORDER BY rowid; - `, - ["file", filePath], - ) + `; + } + return execRowsParams(db, sql, ["file", filePath]) .map((row) => toSqliteText(row[0])) .filter(Boolean); }; From 6b55db6249152815cd6bef4d74effc63af97c110 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Fri, 22 May 2026 23:01:48 -0400 Subject: [PATCH 12/15] Surface duplicate opportunities in inspect --- README.md | 11 +++ codegraph-skill/codegraph/SKILL.md | 3 +- docs/cli.md | 2 + ...2026-05-23-eliminate-duplicate-findings.md | 68 +++++++++++++ src/agent/handles.ts | 17 ++-- src/cli/context.ts | 18 +--- src/cli/inspect.ts | 84 ++++++++++++++-- src/duplicates.ts | 95 ++++++++++++++++++- src/impact/report-suggestions.ts | 20 ++-- src/indexer/imports/languageSpecific.ts | 28 ++++-- src/languages/definitions/c.ts | 64 ++++--------- src/languages/definitions/cFamily.ts | 52 ++++++++++ src/languages/definitions/cpp.ts | 68 +++++-------- src/util/packageExports.ts | 9 ++ src/util/projectFiles.ts | 2 +- src/util/resolution/node.ts | 13 +-- src/util/workspace.ts | 14 +-- tests/cli-regressions.test.ts | 43 ++++++++- tests/duplicates.test.ts | 47 +++++++++ 19 files changed, 492 insertions(+), 166 deletions(-) create mode 100644 src/util/packageExports.ts diff --git a/README.md b/README.md index 4fecf0bd..f9344cdf 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,17 @@ Use Codegraph when you need fast structural answers about a repo without relying "score": 59 } ], + "duplicates": { + "minConfidence": "high", + "top": [ + { + "confidence": "high", + "cloneType": "exact", + "left": { "file": "src/a.ts", "startLine": 10, "endLine": 24 }, + "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22 } + } + ] + }, "recommendedCommands": ["codegraph hotspots --root \"/workspace/codegraph/src\" --limit 20 --json"] } ``` diff --git a/codegraph-skill/codegraph/SKILL.md b/codegraph-skill/codegraph/SKILL.md index 9550586a..7703838c 100644 --- a/codegraph-skill/codegraph/SKILL.md +++ b/codegraph-skill/codegraph/SKILL.md @@ -53,7 +53,7 @@ Then choose the narrowest follow-up command: - Artifact bundle: `codegraph artifact build --root . --out codegraph-out --json` - MCP server: `codegraph mcp serve --root . --stdio` or `codegraph mcp serve --root . --port 7331` -Use `--json` when the output will feed later reasoning, scripts, or another agent step. `search` is deterministic and returns project-relative explainable handles, evidence, neighbors, follow-up commands, result counts, limits, and omission counts. `explain` accepts those handles plus file paths, symbol names, and SQL object names, then returns bounded symbols, dependencies, reverse dependencies, references, snippets, SQL relation facts, changed-context review tasks/candidate tests, explicit limits, omission counts, and next commands. Generated command strings POSIX-shell-quote dynamic arguments when needed. For SQL objects, use search handles or schema-qualified names when basenames may be ambiguous. Reference and snippet omission counts are lower bounds after bounded navigation hits its cap. `artifact build` writes a durable SQLite, self-describing project-relative graph JSON, report, questions with unique stable-handle command IDs, and manifest bundle for handoff while excluding its own in-repo output directory and linked outside-root files. With `--force`, recognizable stale artifact files are removed, unrelated operator files are preserved, and unrecognized reserved-name collisions are refused. `codegraph doctor ` recognizes manifest-backed artifact bundle directories and reports expected artifact presence. `mcp serve` exposes the same primitives as read-only MCP tools by default over stdio, or over Streamable HTTP with `--port ` at `/mcp`; HTTP binds to `127.0.0.1` unless `--host ` is passed, validates Host headers, and allows loopback Host headers for wildcard binds. File/artifact paths are confined after realpath resolution, SQLite query results are row- and byte-bounded, synthetic payload functions are rejected, and `--allow-build` is required before an agent may write artifact output. +Use `--json` when the output will feed later reasoning, scripts, or another agent step. `inspect` includes compact high-confidence duplicate opportunities plus a recommended `duplicates` command for full grouped JSON. `search` is deterministic and returns project-relative explainable handles, evidence, neighbors, follow-up commands, result counts, limits, and omission counts. `explain` accepts those handles plus file paths, symbol names, and SQL object names, then returns bounded symbols, dependencies, reverse dependencies, references, snippets, SQL relation facts, changed-context review tasks/candidate tests, explicit limits, omission counts, and next commands. Generated command strings POSIX-shell-quote dynamic arguments when needed. For SQL objects, use search handles or schema-qualified names when basenames may be ambiguous. Reference and snippet omission counts are lower bounds after bounded navigation hits its cap. `artifact build` writes a durable SQLite, self-describing project-relative graph JSON, report, questions with unique stable-handle command IDs, and manifest bundle for handoff while excluding its own in-repo output directory and linked outside-root files. With `--force`, recognizable stale artifact files are removed, unrelated operator files are preserved, and unrecognized reserved-name collisions are refused. `codegraph doctor ` recognizes manifest-backed artifact bundle directories and reports expected artifact presence. `mcp serve` exposes the same primitives as read-only MCP tools by default over stdio, or over Streamable HTTP with `--port ` at `/mcp`; HTTP binds to `127.0.0.1` unless `--host ` is passed, validates Host headers, and allows loopback Host headers for wildcard binds. File/artifact paths are confined after realpath resolution, SQLite query results are row- and byte-bounded, synthetic payload functions are rejected, and `--allow-build` is required before an agent may write artifact output. Numeric options such as `--limit`, `--threads`, `--depth`, `--max-refs`, and token bounds must be integers in their documented ranges; invalid numeric values fail instead of being silently clamped or ignored. @@ -200,6 +200,7 @@ For git-provider impact and git-scoped review/index/graph commands, `WORKTREE` c - Start here when you need an architecture summary: `codegraph inspect ./src --limit 20` + Includes compact high-confidence duplicate opportunities and follow-up commands. - Dependencies of a file: `codegraph deps ` - Reverse dependencies: diff --git a/docs/cli.md b/docs/cli.md index a48299c7..87102313 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -96,6 +96,8 @@ codegraph index --report codegraph review --report --report-file review.report.json ``` +`inspect` emits bounded hotspots, unresolved imports, cycles, and high-confidence duplicate opportunities. Duplicate opportunities are intentionally compact and include file ranges, confidence, clone type, score, token counts, and raw pair counts; run the recommended `duplicates` command for full grouped JSON. + Graph, index, and review reports include `backend.native.byLanguage` so native usage and fallback remain visible per language. Build reports also include `backend.parser` when syntax-tree backend degradation leaves files without parser context. Reports also include `graph.fallbackImportExtraction.byLanguage` and `byReason` when regex import extraction is used. Review JSON reports `diagnostics.symbolMappingParseFailures`, `diagnostics.missingFiles`, `changedFiles[].status` as `updated`, `deleted`, or `missing`, and `sqlContext` when changed SQL files or changed SQL literals make SQL artifact facts relevant. ### Symbols, navigation, grep, and chunking diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md index 2126a5db..d0153913 100644 --- a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -107,6 +107,74 @@ The grouped output is usable for triage. Remaining caveats: - Example: multiple `src/cli/graph.ts` chunk findings are one underlying helper extraction. - Keep raw variants available through `--raw-pairs`. +## Follow-Up Scan: 2026-05-23 + +Generated after the first duplicate cleanup pass on branch `elim-dups`. + +Commands: + +```bash +node ./dist/cli.js duplicates --root . ./src --min-confidence medium --limit 120 --include-same-file +node ./dist/cli.js duplicates --root . . --min-confidence high --limit 120 --include-same-file +node ./dist/cli.js inspect --root . ./src --limit 8 +node ./dist/cli.js review --base main --head HEAD --summary +node ./dist/cli.js doctor +``` + +Results: + +- `src` scan: 120 returned groups, 849 omitted groups, 1611 omitted raw suggestions. +- whole-repo scan: 120 returned groups, 973 omitted groups, 2898 omitted raw suggestions. +- `doctor` reports native runtime availability and artifact health only; it does not surface duplicate cleanup opportunities. +- `review --summary` reports diff risk and candidate tests only; it does not surface duplicate cleanup opportunities. +- `inspect` reports hotspots, unresolved imports, cycles, and recommended commands only; it is the best current home for an at-a-glance duplicate opportunity section. + +### Product Output Follow-Ups + +- [x] Coalesce repeated grouped duplicate findings with the same primary ranges. + - Finding: `src/indexer/imports/languageSpecific.ts:249-258 applyJavaStatementOverride` vs `src/indexer/imports/languageSpecific.ts:260-269 applyKotlinStatementOverride` appeared multiple times with the same primary pair. + - Preserve raw evidence counts through `rawPairCount` and bounded `variants`. + - Keep `--raw-pairs` as the explicit escape hatch for low-level pair inspection. + +- [x] Surface bounded duplicate opportunities in `inspect` output. + - Include high-signal fields only: confidence, clone type, score, files/ranges, token counts, and raw pair count. + - Keep the summary bounded by `--limit`. + - Add a `duplicates` follow-up command to `recommendedCommands` so agents can drill into full grouped JSON. + - Leave `doctor` focused on package/runtime/artifact health. + +### Remaining Source Cleanup Candidates + +- [x] Share C/C++ language-definition scaffolding where it stays readable. + - Findings: `src/languages/definitions/c.ts:14-147` and `src/languages/definitions/cpp.ts:14-165`. + - Preserve C-specific macro support and C++ namespace/class/alias/using/lambda behavior. + +- [x] Share package export target selection between node-module and workspace resolution. + - Findings: `src/util/resolution/node.ts:22-34 tryResolveRelative` and `src/util/workspace.ts:298-307 pickExportTarget`. + - Prefer a small resolver helper over duplicating `exports` target precedence. + +- [x] Share Java/Kotlin import override plumbing. + - Findings: `src/indexer/imports/languageSpecific.ts:249-258` and `src/indexer/imports/languageSpecific.ts:260-269`. + - Keep Java and Kotlin parsing differences explicit at the call site. + +- [x] Share range line-counting for coverage suggestions. + - Findings: `src/impact/report-suggestions.ts:820-827` and `src/impact/report-suggestions.ts:829-836`. + - Preserve zero-count behavior when no coverage is available. + +- [x] Share file-like agent handle parsing. + - Findings: `src/agent/handles.ts:54-60` and `src/agent/handles.ts:82-88`. + - Keep public handle prefixes and return types unchanged. + +- [x] Share discovery glob relative-path matching. + - Findings: `src/cli/context.ts:25-35` and `src/util/projectFiles.ts:278-288`. + - Keep CLI include/ignore globs relative to active scan roots and config globs relative to project roots. + +Follow-up verification: + +- [x] `src` scan reports `0` repeated displayed primary pairs in the first 80 medium-or-higher groups. +- [x] `inspect --root . ./src --limit 5` emits compact high-confidence duplicate opportunities and a `duplicates` follow-up command. +- [x] Focused duplicate, inspect, C/C++, resolution, references, impact-suggestion, and agent-search tests pass. +- [x] Full test suite passes. + ## Verification Plan - [x] Run `npm run build`. diff --git a/src/agent/handles.ts b/src/agent/handles.ts index 5596080b..c32fe014 100644 --- a/src/agent/handles.ts +++ b/src/agent/handles.ts @@ -51,14 +51,19 @@ export function formatAgentFileHandle(handle: AgentFileHandle): string { return ["file", encodeURIComponent(handle.file)].join(":"); } -export function parseAgentFileHandle(handle: string): AgentFileHandle | null { - if (!handle.startsWith("file:")) return null; - const encodedFile = handle.slice("file:".length); +function parseAgentFileLikeHandle(handle: string, prefix: "file" | "graph"): AgentFileHandle | null { + const handlePrefix = `${prefix}:`; + if (!handle.startsWith(handlePrefix)) return null; + const encodedFile = handle.slice(handlePrefix.length); const file = decodeHandlePart(encodedFile) ?? encodedFile; if (!file) return null; return { file }; } +export function parseAgentFileHandle(handle: string): AgentFileHandle | null { + return parseAgentFileLikeHandle(handle, "file"); +} + export function formatAgentChunkHandle(handle: AgentChunkHandle): string { return ["chunk", encodeURIComponent(handle.file), String(handle.line)].join(":"); } @@ -80,11 +85,7 @@ export function formatAgentGraphHandle(handle: AgentFileHandle): string { } export function parseAgentGraphHandle(handle: string): AgentFileHandle | null { - if (!handle.startsWith("graph:")) return null; - const encodedFile = handle.slice("graph:".length); - const file = decodeHandlePart(encodedFile) ?? encodedFile; - if (!file) return null; - return { file }; + return parseAgentFileLikeHandle(handle, "graph"); } export function formatAgentSqlHandle(handle: AgentSqlHandle): string { diff --git a/src/cli/context.ts b/src/cli/context.ts index 315d6a73..ff965fcd 100644 --- a/src/cli/context.ts +++ b/src/cli/context.ts @@ -6,7 +6,7 @@ import picomatch from "picomatch"; import type { BuildReport } from "../indexer/types.js"; import type { ReviewBuildReport } from "../review.js"; import { normalizePath, resolveFilePathFromRoot } from "../util/paths.js"; -import { type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; +import { matchesDiscoveryGlob, type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; import { isRelativePathInside } from "../util/projectFiles.js"; import { isCliValueOption } from "./options.js"; @@ -22,18 +22,6 @@ export function isCliDiscoveryRelativePathInside(relativePath: string): boolean return isRelativePathInside(relativePath); } -function matchesCliDiscoveryGlob( - absolutePath: string, - scanRoot: string, - matcher: (relativePath: string) => boolean, -): boolean { - const relativePath = path.relative(scanRoot, absolutePath); - if (!isCliDiscoveryRelativePathInside(relativePath)) { - return false; - } - return matcher(normalizePath(relativePath)); -} - export function filterFilesByCliDiscoveryGlobs( files: readonly string[], scanRoot: string, @@ -55,11 +43,11 @@ export function filterFilesByCliDiscoveryGlobs( return files.filter((filePath) => { if ( includeMatchers.length && - !includeMatchers.some((matcher) => matchesCliDiscoveryGlob(filePath, scanRoot, matcher)) + !includeMatchers.some((matcher) => matchesDiscoveryGlob(filePath, scanRoot, matcher)) ) { return false; } - return !ignoreMatchers.some((matcher) => matchesCliDiscoveryGlob(filePath, scanRoot, matcher)); + return !ignoreMatchers.some((matcher) => matchesDiscoveryGlob(filePath, scanRoot, matcher)); }); } diff --git a/src/cli/inspect.ts b/src/cli/inspect.ts index a56ee9a5..22d954e9 100644 --- a/src/cli/inspect.ts +++ b/src/cli/inspect.ts @@ -1,5 +1,6 @@ import fs from "node:fs"; import path from "node:path"; +import { findDuplicates, type DuplicateConfidence, type DuplicateGroup } from "../duplicates.js"; import { collectGraph } from "../graph-builder.js"; import { findDetailedCycles, getUnresolvedImports, sortDetailedCycles } from "../graphs/queries.js"; import { getHotspots } from "../graphs/hotspots.js"; @@ -19,6 +20,7 @@ import { type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; import { parseCacheModeOption, parsePositiveIntegerOption } from "./options.js"; type CacheMode = "off" | "memory" | "disk"; +const INSPECT_DUPLICATE_MIN_TOKENS = 60; type IndexCacheMetadata = { manifestPath: string; @@ -59,9 +61,33 @@ type InspectReport = { size: number; }>; }; + duplicates: { + total: number; + omitted: number; + minConfidence: DuplicateConfidence; + top: DuplicateOpportunitySummary[]; + }; recommendedCommands: string[]; }; +type DuplicateOpportunitySummary = { + confidence: DuplicateConfidence; + cloneType: DuplicateGroup["cloneType"]; + score: number; + left: DuplicateOpportunitySide; + right: DuplicateOpportunitySide; + rawPairCount: number; + reasons: string[]; +}; + +type DuplicateOpportunitySide = { + file: string; + startLine: number; + endLine: number; + tokenCount: number; + name?: string; +}; + export type InspectCommandContext = { projectRootFs: string; includeRootsAbs: string[]; @@ -182,6 +208,28 @@ async function buildScopedReportGraph( }; } +function summarizeDuplicateSide(side: DuplicateGroup["primaryLeft"]): DuplicateOpportunitySide { + return { + file: side.file, + startLine: side.startLine, + endLine: side.endLine, + tokenCount: side.tokenCount, + ...(side.name ? { name: side.name } : {}), + }; +} + +function summarizeDuplicateGroup(group: DuplicateGroup): DuplicateOpportunitySummary { + return { + confidence: group.confidence, + cloneType: group.cloneType, + score: group.score, + left: summarizeDuplicateSide(group.primaryLeft), + right: summarizeDuplicateSide(group.primaryRight), + rawPairCount: group.rawPairCount, + reasons: group.reasons, + }; +} + function countFilesByLanguage(files: Iterable): Record { const byLanguage: Record = {}; for (const file of files) { @@ -204,6 +252,7 @@ function buildRecommendedInspectCommands( const commands = [ `codegraph hotspots ${rootFlag}${targetSuffix} --limit 20 --json`, `codegraph graph ${rootFlag}${targetSuffix} --json --symbols-detailed --compact-json`, + `codegraph duplicates ${rootFlag}${targetSuffix} --min-confidence medium --limit 20 --include-same-file`, ]; if (hasUnresolvedImports) { commands.push(`codegraph unresolved ${rootFlag}${targetSuffix} --json`); @@ -228,18 +277,33 @@ async function buildInspectReport( limit: number, writeStderrLine: (message: string) => void, ): Promise { - const { graph, indexCache } = await buildScopedReportGraph(projectRoot, includeRoots, files, { - ...(cache ? { cache } : {}), + const useDiskCache = cache === "disk" || cache === undefined; + const indexCache = useDiskCache ? readIndexCacheMetadata(projectRoot) : null; + if (indexCache) { + writeStderrLine(formatIndexCacheMetadata(indexCache)); + } + const index = await buildProjectIndexIncremental(projectRoot, { + files, + cache: cache ?? "disk", discovery, - ...(graphOptions ? { graphOptions } : {}), - nativeMode, - workerOpts, - ...(progressHandler ? { progressHandler } : {}), - writeStderrLine, + ...(progressHandler ? { onProgress: progressHandler } : {}), + ...(nativeMode !== "auto" ? { native: nativeMode } : {}), + ...workerOpts, + ...(graphOptions ? { graph: graphOptions } : {}), }); + const graph = restrictGraphToIncludeRoots(index.graph, includeRoots); const hotspots = getHotspots(graph, { limit }); const unresolved = getUnresolvedImports(graph, { projectRoot }); const cycles = sortDetailedCycles(findDetailedCycles(graph), "priority"); + const duplicateMinConfidence: DuplicateConfidence = "high"; + const duplicateResult = await findDuplicates(index, { + projectRoot, + files, + includeSameFile: true, + minConfidence: duplicateMinConfidence, + minTokens: INSPECT_DUPLICATE_MIN_TOKENS, + limit, + }); const loadError = getNativeTreeSitterLoadError(nativeMode); return { root: normalizePathForDisplay(projectRoot), @@ -272,6 +336,12 @@ async function buildInspectReport( size: cycle.files.length, })), }, + duplicates: { + total: duplicateResult.groups.length + duplicateResult.omittedCounts.groups, + omitted: duplicateResult.omittedCounts.groups, + minConfidence: duplicateMinConfidence, + top: duplicateResult.groups.map(summarizeDuplicateGroup), + }, recommendedCommands: buildRecommendedInspectCommands( projectRoot, includeRoots, diff --git a/src/duplicates.ts b/src/duplicates.ts index 5699d329..e1c9c033 100644 --- a/src/duplicates.ts +++ b/src/duplicates.ts @@ -876,6 +876,10 @@ function unitRefIdentity(ref: DuplicateUnitRef): string { ].join("\u0000"); } +function unitRefRangeIdentity(ref: DuplicateUnitRef): string { + return [ref.file, ref.startLine, ref.endLine, ref.languageId].join("\u0000"); +} + function compareUnitRefs(left: DuplicateUnitRef, right: DuplicateUnitRef): number { const fileCompare = left.file.localeCompare(right.file); if (fileCompare) return fileCompare; @@ -999,14 +1003,45 @@ function orderedGroupKey(left: UnitCluster, right: UnitCluster): string { return `${right.id}\u0000${left.id}`; } -function mergeReasons(suggestions: readonly DuplicateSuggestion[]): string[] { +function orderedUnitPairKey(left: DuplicateUnitRef, right: DuplicateUnitRef): string { + const leftKey = unitRefIdentity(left); + const rightKey = unitRefIdentity(right); + if (leftKey < rightKey) return `${leftKey}\u0000${rightKey}`; + return `${rightKey}\u0000${leftKey}`; +} + +function orderedUnitRangePairKey(left: DuplicateUnitRef, right: DuplicateUnitRef): string { + const leftKey = unitRefRangeIdentity(left); + const rightKey = unitRefRangeIdentity(right); + if (leftKey < rightKey) return `${leftKey}\u0000${rightKey}`; + return `${rightKey}\u0000${leftKey}`; +} + +function suggestionVariantKey(suggestion: DuplicateSuggestion): string { + return [ + orderedUnitPairKey(suggestion.left, suggestion.right), + suggestion.score, + suggestion.confidence, + suggestion.cloneType, + ].join("\u0000"); +} + +function mergeReasonLists(reasonLists: Iterable): string[] { const reasons = new Set(); - for (const suggestion of suggestions) { - for (const reason of suggestion.reasons) reasons.add(reason); + for (const reasonList of reasonLists) { + for (const reason of reasonList) reasons.add(reason); } return Array.from(reasons).sort(); } +function mergeReasons(suggestions: readonly DuplicateSuggestion[]): string[] { + return mergeReasonLists(suggestions.map((suggestion) => suggestion.reasons)); +} + +function mergeGroupReasons(groups: readonly DuplicateGroup[]): string[] { + return mergeReasonLists(groups.map((group) => group.reasons)); +} + function groupForSuggestions( key: string, suggestions: DuplicateSuggestion[], @@ -1063,6 +1098,57 @@ function compareGroups(left: DuplicateGroup, right: DuplicateGroup): number { return compareUnitRefs(left.primaryRight, right.primaryRight); } +function coalesceDuplicateGroups(groups: DuplicateGroup[], variantLimit: number): DuplicateGroup[] { + const groupsByPrimaryPair = new Map(); + for (const group of groups) { + const key = orderedUnitRangePairKey(group.primaryLeft, group.primaryRight); + const existing = groupsByPrimaryPair.get(key); + if (existing) existing.push(group); + else groupsByPrimaryPair.set(key, [group]); + } + + const coalesced: DuplicateGroup[] = []; + for (const [key, grouped] of groupsByPrimaryPair) { + if (grouped.length === 1) { + coalesced.push(grouped[0]!); + continue; + } + + grouped.sort(compareGroups); + const primary = grouped[0]!; + const variantsByKey = new Map(); + for (const group of grouped) { + for (const variant of group.variants) { + variantsByKey.set(suggestionVariantKey(variant), variant); + } + } + const variants = Array.from(variantsByKey.values()).sort(compareSuggestionsForPrimary).slice(0, variantLimit); + const rawPairCount = grouped.reduce((count, group) => count + group.rawPairCount, 0); + let score = primary.score; + let confidence = primary.confidence; + let cloneType = primary.cloneType; + for (const group of grouped.slice(1)) { + score = Math.max(score, group.score); + confidence = bestConfidence(confidence, group.confidence); + cloneType = bestCloneType(cloneType, group.cloneType); + } + coalesced.push({ + ...primary, + id: shortHashText(key), + score, + confidence, + cloneType, + variants, + variantCount: variants.length, + rawPairCount, + omittedVariantCount: Math.max(0, rawPairCount - variants.length), + reasons: mergeGroupReasons(grouped), + }); + } + coalesced.sort(compareGroups); + return coalesced; +} + function groupSuggestions(suggestions: readonly DuplicateSuggestion[], includeRawPairs: boolean): DuplicateGroup[] { const refs = suggestions.flatMap((suggestion) => [suggestion.left, suggestion.right]); const clusters = createUnitClusters(refs); @@ -1085,8 +1171,7 @@ function groupSuggestions(suggestions: readonly DuplicateSuggestion[], includeRa const groups = Array.from(suggestionsByGroup, ([key, value]) => groupForSuggestions(key, value.suggestions, value.left, value.right, variantLimit), ); - groups.sort(compareGroups); - return groups; + return coalesceDuplicateGroups(groups, variantLimit); } /** Finds scored duplicate candidates from an already-built project index. */ diff --git a/src/impact/report-suggestions.ts b/src/impact/report-suggestions.ts index b14f497b..cbb08923 100644 --- a/src/impact/report-suggestions.ts +++ b/src/impact/report-suggestions.ts @@ -817,22 +817,26 @@ async function loadCoverageByFile( return coverage; } -function countCoveredLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { +function countLinesForRange( + coverage: FileCoverage | undefined, + range: ChangedSymbol["range"], + linesForCoverage: (coverage: FileCoverage) => ReadonlySet, +): number { if (!coverage) return 0; let count = 0; + const lines = linesForCoverage(coverage); for (let line = range.start.line; line <= range.end.line; line += 1) { - if (coverage.coveredLines.has(line)) count += 1; + if (lines.has(line)) count += 1; } return count; } +function countCoveredLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { + return countLinesForRange(coverage, range, (entry) => entry.coveredLines); +} + function countTotalLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { - if (!coverage) return 0; - let count = 0; - for (let line = range.start.line; line <= range.end.line; line += 1) { - if (coverage.allLines.has(line)) count += 1; - } - return count; + return countLinesForRange(coverage, range, (entry) => entry.allLines); } export async function collectImpactReportSuggestions( diff --git a/src/indexer/imports/languageSpecific.ts b/src/indexer/imports/languageSpecific.ts index b2bc6afa..56622269 100644 --- a/src/indexer/imports/languageSpecific.ts +++ b/src/indexer/imports/languageSpecific.ts @@ -28,6 +28,8 @@ export function createStatementImportOverrideState(): StatementImportOverrideSta return { handledStatements: new Set() }; } +type ParsedJvmImportStatement = { kind: "star"; from: string } | { kind: "named"; from: string; imported: string }; + function normalizeGoImports(context: LanguageSpecificImportContext): void { const imports = context.getBindings(); if (context.languageId !== "go" || !imports.length) { @@ -251,10 +253,10 @@ async function applyJavaStatementOverride( normalizedStmt: string, typeOnly: boolean, ): Promise { - const parsed = parseJavaImportStatement(normalizedStmt); - if (!parsed) return false; - const local = parsed.kind === "named" ? parsed.imported : undefined; - return await pushJvmImportBinding(context, parsed, local, typeOnly); + return await applyJvmStatementOverride(context, normalizedStmt, typeOnly, parseJavaImportStatement, (parsed) => { + if (parsed.kind === "named") return parsed.imported; + return undefined; + }); } async function applyKotlinStatementOverride( @@ -262,10 +264,22 @@ async function applyKotlinStatementOverride( normalizedStmt: string, typeOnly: boolean, ): Promise { - const parsed = parseKotlinImportStatement(normalizedStmt); + return await applyJvmStatementOverride(context, normalizedStmt, typeOnly, parseKotlinImportStatement, (parsed) => { + if (parsed.kind === "named") return parsed.local; + return undefined; + }); +} + +async function applyJvmStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, + parseStatement: (statement: string) => TParsed | null, + localNameFor: (parsed: TParsed) => string | undefined, +): Promise { + const parsed = parseStatement(normalizedStmt); if (!parsed) return false; - const local = parsed.kind === "named" ? parsed.local : undefined; - return await pushJvmImportBinding(context, parsed, local, typeOnly); + return await pushJvmImportBinding(context, parsed, localNameFor(parsed), typeOnly); } async function pushJvmImportBinding( diff --git a/src/languages/definitions/c.ts b/src/languages/definitions/c.ts index c58f74f6..e2c82f6c 100644 --- a/src/languages/definitions/c.ts +++ b/src/languages/definitions/c.ts @@ -3,12 +3,18 @@ import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; import { cFamilyContainerTypes, + cFamilyControlSplitPoints, + cFamilyCoreExportQueries, + cFamilyCoreLocalQueries, + cFamilyIncludeBindingsQuery, + cFamilyIncludeImportsQuery, cFunctionNameQuery, findAncestor, isFunctionDeclarator, isInAncestorDeclarator, isInField, isInParameterList, + joinQueryPatterns, } from "./cFamily.js"; const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", false); @@ -56,52 +62,24 @@ export const C_DEF: LanguageDefinition = { captureId: "macro", }, ], - splitPoints: [ - "if_statement", - "for_statement", - "while_statement", - "do_statement", - "switch_statement", - "case_statement", - ], + splitPoints: cFamilyControlSplitPoints, comments: ["comment"], }, graph: { - imports: ` - (preproc_include path: (string_literal) @mod) @stmt - (preproc_include path: (system_lib_string) @mod) @stmt - (preproc_include path: (identifier) @mod) @stmt - `, - exports: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (declaration ${GRAPH_FUNCTION_NAME_QUERY}) - (struct_specifier name: (type_identifier) @name) - (union_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (preproc_def name: (identifier) @name) - (preproc_function_def name: (identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - `, - locals: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (struct_specifier name: (type_identifier) @name) - (union_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - (parameter_declaration declarator: (identifier) @name) - (field_declaration declarator: (field_identifier) @name) - (preproc_def name: (identifier) @name) - (preproc_function_def name: (identifier) @name) - `, - importBindings: ` - (preproc_include path: (string_literal) @from) @stmt - (preproc_include path: (system_lib_string) @from) @stmt - (preproc_include path: (identifier) @from) @stmt - `, + imports: cFamilyIncludeImportsQuery, + exports: joinQueryPatterns([ + ...cFamilyCoreExportQueries(GRAPH_FUNCTION_NAME_QUERY), + `(union_specifier name: (type_identifier) @name)`, + `(preproc_def name: (identifier) @name)`, + `(preproc_function_def name: (identifier) @name)`, + ]), + locals: joinQueryPatterns([ + ...cFamilyCoreLocalQueries(GRAPH_FUNCTION_NAME_QUERY), + `(union_specifier name: (type_identifier) @name)`, + `(preproc_def name: (identifier) @name)`, + `(preproc_function_def name: (identifier) @name)`, + ]), + importBindings: cFamilyIncludeBindingsQuery, }, nodeTypes: { identifier: ["identifier", "field_identifier", "type_identifier"], diff --git a/src/languages/definitions/cFamily.ts b/src/languages/definitions/cFamily.ts index 80c5668d..539df049 100644 --- a/src/languages/definitions/cFamily.ts +++ b/src/languages/definitions/cFamily.ts @@ -9,6 +9,27 @@ export const cFamilyContainerTypes = new Set([ "init_declarator", ]); +export const cFamilyControlSplitPoints = [ + "if_statement", + "for_statement", + "while_statement", + "do_statement", + "switch_statement", + "case_statement", +]; + +export const cFamilyIncludeImportsQuery = ` + (preproc_include path: (string_literal) @mod) @stmt + (preproc_include path: (system_lib_string) @mod) @stmt + (preproc_include path: (identifier) @mod) @stmt + `; + +export const cFamilyIncludeBindingsQuery = ` + (preproc_include path: (string_literal) @from) @stmt + (preproc_include path: (system_lib_string) @from) @stmt + (preproc_include path: (identifier) @from) @stmt + `; + const cFamilyParameterListTypes = new Set(["parameter_declaration", "parameter_list"]); export function cFunctionNameQuery(captureName: string, includeFieldIdentifier: boolean): string { @@ -33,6 +54,37 @@ export function cFunctionNameQuery(captureName: string, includeFieldIdentifier: `; } +export function cFamilyCoreExportQueries(functionNameQuery: string): string[] { + return [ + `(function_definition ${functionNameQuery})`, + `(declaration ${functionNameQuery})`, + `(struct_specifier name: (type_identifier) @name)`, + `(enum_specifier name: (type_identifier) @name)`, + `(type_definition declarator: (type_identifier) @name)`, + `(declaration declarator: (identifier) @name)`, + `(declaration declarator: (init_declarator declarator: (identifier) @name))`, + ]; +} + +export function cFamilyCoreLocalQueries(functionNameQuery: string): string[] { + return [ + `(function_definition ${functionNameQuery})`, + `(struct_specifier name: (type_identifier) @name)`, + `(enum_specifier name: (type_identifier) @name)`, + `(type_definition declarator: (type_identifier) @name)`, + `(declaration declarator: (identifier) @name)`, + `(declaration declarator: (init_declarator declarator: (identifier) @name))`, + `(parameter_declaration declarator: (identifier) @name)`, + `(field_declaration declarator: (field_identifier) @name)`, + ]; +} + +export function joinQueryPatterns(patterns: readonly string[]): string { + return ` + ${patterns.join("\n ")} + `; +} + export function isWithin(node: SyntaxNodeLike, ancestor: SyntaxNodeLike | null): boolean { let current: SyntaxNodeLike | null = node; while (current) { diff --git a/src/languages/definitions/cpp.ts b/src/languages/definitions/cpp.ts index aae4559b..5c548526 100644 --- a/src/languages/definitions/cpp.ts +++ b/src/languages/definitions/cpp.ts @@ -3,12 +3,18 @@ import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; import { cFamilyContainerTypes, + cFamilyControlSplitPoints, + cFamilyCoreExportQueries, + cFamilyCoreLocalQueries, + cFamilyIncludeBindingsQuery, + cFamilyIncludeImportsQuery, cFunctionNameQuery, findAncestor, isFunctionDeclarator, isInAncestorDeclarator, isInField, isInParameterList, + joinQueryPatterns, } from "./cFamily.js"; const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", true); @@ -56,55 +62,25 @@ export const CPP_DEF: LanguageDefinition = { captureId: "type", }, ], - splitPoints: [ - "if_statement", - "for_statement", - "while_statement", - "do_statement", - "switch_statement", - "case_statement", - "try_statement", - "catch_clause", - ], + splitPoints: [...cFamilyControlSplitPoints, "try_statement", "catch_clause"], comments: ["comment"], }, graph: { - imports: ` - (preproc_include path: (string_literal) @mod) @stmt - (preproc_include path: (system_lib_string) @mod) @stmt - (preproc_include path: (identifier) @mod) @stmt - `, - exports: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (declaration ${GRAPH_FUNCTION_NAME_QUERY}) - (class_specifier name: (type_identifier) @name) - (struct_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (namespace_definition name: (namespace_identifier) @name) - (alias_declaration name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (using_declaration (qualified_identifier name: (identifier) @name)) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - `, - locals: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (class_specifier name: (type_identifier) @name) - (struct_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (namespace_definition name: (namespace_identifier) @name) - (alias_declaration name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - (parameter_declaration declarator: (identifier) @name) - (field_declaration declarator: (field_identifier) @name) - `, - importBindings: ` - (preproc_include path: (string_literal) @from) @stmt - (preproc_include path: (system_lib_string) @from) @stmt - (preproc_include path: (identifier) @from) @stmt - `, + imports: cFamilyIncludeImportsQuery, + exports: joinQueryPatterns([ + ...cFamilyCoreExportQueries(GRAPH_FUNCTION_NAME_QUERY), + `(class_specifier name: (type_identifier) @name)`, + `(namespace_definition name: (namespace_identifier) @name)`, + `(alias_declaration name: (type_identifier) @name)`, + `(using_declaration (qualified_identifier name: (identifier) @name))`, + ]), + locals: joinQueryPatterns([ + ...cFamilyCoreLocalQueries(GRAPH_FUNCTION_NAME_QUERY), + `(class_specifier name: (type_identifier) @name)`, + `(namespace_definition name: (namespace_identifier) @name)`, + `(alias_declaration name: (type_identifier) @name)`, + ]), + importBindings: cFamilyIncludeBindingsQuery, }, nodeTypes: { identifier: ["identifier", "field_identifier", "type_identifier", "namespace_identifier"], diff --git a/src/util/packageExports.ts b/src/util/packageExports.ts new file mode 100644 index 00000000..2343e87e --- /dev/null +++ b/src/util/packageExports.ts @@ -0,0 +1,9 @@ +export function pickPackageExportTarget(target: unknown): string | null { + if (!target) return null; + if (typeof target === "string") return target; + if (typeof target !== "object") return null; + const exportTarget = target as Record; + const candidate = exportTarget.import ?? exportTarget.default ?? exportTarget.require ?? exportTarget.module; + if (typeof candidate === "string") return candidate; + return null; +} diff --git a/src/util/projectFiles.ts b/src/util/projectFiles.ts index 93af25c3..cc645cbb 100644 --- a/src/util/projectFiles.ts +++ b/src/util/projectFiles.ts @@ -275,7 +275,7 @@ async function loadGitignoreRulesForRootAliases(projectRoot: string): Promise boolean, diff --git a/src/util/resolution/node.ts b/src/util/resolution/node.ts index feae0191..fd8ab70a 100644 --- a/src/util/resolution/node.ts +++ b/src/util/resolution/node.ts @@ -1,5 +1,6 @@ import path from "node:path"; import { findFirstExistingResolutionCandidate } from "./findFirstExisting.js"; +import { pickPackageExportTarget } from "../packageExports.js"; import { directoryExists, loadJSON, type MinimalPackageJson } from "../workspace.js"; export async function resolveFromNodeModules( @@ -22,16 +23,6 @@ export async function resolveFromNodeModules( const tryResolveRelative = async (rel: string): Promise => { return await findFirstExistingResolutionCandidate(path.resolve(baseDir, rel), resolutionExtensions); }; - const pickExportTarget = (target: unknown): string | null => { - if (!target) return null; - if (typeof target === "string") return target; - if (typeof target === "object" && target !== null) { - const t = target as Record; - const cand = t.import ?? t.default ?? t.require ?? t.module; - if (typeof cand === "string") return cand; - } - return null; - }; if (pkg?.exports) { const key = subpath ? `./${subpath}` : "."; if (typeof pkg.exports === "string" && key === ".") { @@ -40,7 +31,7 @@ export async function resolveFromNodeModules( } else if (typeof pkg.exports === "object" && pkg.exports !== null) { const map = pkg.exports as Record; const target = map[key] ?? (key === "." ? map["."] : undefined); - const rel = pickExportTarget(target); + const rel = pickPackageExportTarget(target); if (rel) { const hit = await tryResolveRelative(rel); if (hit) return hit; diff --git a/src/util/workspace.ts b/src/util/workspace.ts index ee70220c..ad7c515e 100644 --- a/src/util/workspace.ts +++ b/src/util/workspace.ts @@ -4,6 +4,7 @@ const directoryExistsCache = new Map(); import fsp from "node:fs/promises"; import path from "node:path"; import fg from "fast-glob"; +import { pickPackageExportTarget } from "./packageExports.js"; import { listResolutionCandidates } from "./resolutionCandidates.js"; async function pathMatchesCachedStat( @@ -295,17 +296,6 @@ export function listWorkspacePackageResolutionCandidates( const pushRelativeCandidates = (rel: string): void => { candidates.push(...listResolutionCandidates(path.resolve(baseDir, rel), resolutionExtensions)); }; - const pickExportTarget = (target: unknown): string | null => { - if (!target) return null; - if (typeof target === "string") return target; - if (typeof target === "object" && target !== null) { - const typedTarget = target as Record; - const candidate = typedTarget.import ?? typedTarget.default ?? typedTarget.require ?? typedTarget.module; - if (typeof candidate === "string") return candidate; - } - return null; - }; - if (pkg.exports) { const key = subpath ? `./${subpath}` : "."; if (typeof pkg.exports === "string" && key === ".") { @@ -313,7 +303,7 @@ export function listWorkspacePackageResolutionCandidates( } else if (typeof pkg.exports === "object") { const exportMap = pkg.exports as Record; const target = exportMap[key] ?? (key === "." ? exportMap["."] : undefined); - const rel = pickExportTarget(target); + const rel = pickPackageExportTarget(target); if (rel) { pushRelativeCandidates(rel); } diff --git a/tests/cli-regressions.test.ts b/tests/cli-regressions.test.ts index e1284edc..a1c57fa8 100644 --- a/tests/cli-regressions.test.ts +++ b/tests/cli-regressions.test.ts @@ -814,6 +814,20 @@ describe("CLI regressions", () => { "utf8", ); await fsp.writeFile(path.join(srcDir, "b.ts"), "export const b = 1;\n", "utf8"); + const duplicateSource = ` +export function summarizeInvoices(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); +} +`; + await fsp.writeFile(path.join(srcDir, "c.ts"), duplicateSource, "utf8"); + await fsp.writeFile(path.join(srcDir, "d.ts"), duplicateSource, "utf8"); const stdout = await runCliCommand(["inspect", "--root", tmpDir, srcDir, "--limit", "1"]); const report = JSON.parse(stdout) as { @@ -832,6 +846,18 @@ describe("CLI regressions", () => { hotspots: Array<{ file: string; fanIn: number; fanOut: number; score: number }>; unresolved: { total: number; top: Array<{ name: string; importerCount: number }> }; cycles: { total: number; top: Array<{ files: string[]; priorityScore: number; size: number }> }; + duplicates: { + total: number; + omitted: number; + minConfidence: string; + top: Array<{ + confidence: string; + cloneType: string; + left: { file: string; startLine: number; endLine: number; tokenCount: number; name?: string }; + right: { file: string; startLine: number; endLine: number; tokenCount: number; name?: string }; + rawPairCount: number; + }>; + }; recommendedCommands: string[]; }; @@ -839,20 +865,33 @@ describe("CLI regressions", () => { expect(report.includeRoots).toEqual([normalize(srcDir)]); expect(typeof report.backend.native.available).toBe("boolean"); expect(Array.isArray(report.backend.native.supportedLanguageIds)).toBe(true); - expect(report.files.total).toBe(2); - expect(report.files.byLanguage.ts).toBe(2); + expect(report.files.total).toBe(4); + expect(report.files.byLanguage.ts).toBe(4); expect(report.hotspots.length).toBe(1); expect(report.hotspots[0].file).toBe(normalize(path.join(srcDir, "b.ts"))); expect(report.unresolved.total).toBe(0); expect(report.unresolved.top).toEqual([]); expect(report.cycles.total).toBe(0); expect(report.cycles.top).toEqual([]); + expect(report.duplicates.total).toBeGreaterThan(0); + expect(report.duplicates.omitted).toBeGreaterThanOrEqual(0); + expect(report.duplicates.minConfidence).toBe("high"); + expect(report.duplicates.top).toHaveLength(1); + expect(report.duplicates.top[0]?.confidence).toBe("high"); + expect(report.duplicates.top[0]?.cloneType).toBe("exact"); + expect(report.duplicates.top[0]?.left.file).toBe("src/c.ts"); + expect(report.duplicates.top[0]?.right.file).toBe("src/d.ts"); + expect(report.duplicates.top[0]?.left.tokenCount).toBeGreaterThan(40); + expect(report.duplicates.top[0]?.rawPairCount).toBeGreaterThan(0); expect(report.recommendedCommands).toContain( `codegraph hotspots --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --limit 20 --json`, ); expect(report.recommendedCommands).toContain( `codegraph graph --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --json --symbols-detailed --compact-json`, ); + expect(report.recommendedCommands).toContain( + `codegraph duplicates --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --min-confidence medium --limit 20 --include-same-file`, + ); expect(report.recommendedCommands).toContain( `codegraph doctor "${normalize(path.join(tmpDir, ".codegraph-cache", "index-v1"))}"`, ); diff --git a/tests/duplicates.test.ts b/tests/duplicates.test.ts index e09354f2..2abed138 100644 --- a/tests/duplicates.test.ts +++ b/tests/duplicates.test.ts @@ -491,6 +491,53 @@ export class InvoiceNormalizer { } }); + test("coalesces repeated groups with the same primary ranges", async () => { + const root = await makeTempProject(); + const source = ` +export class InvoiceNormalizer { + normalizeDomestic(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } + + normalizeInternational(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } +} +`; + + await writeProjectFile(root, "src/a.ts", source); + await writeProjectFile(root, "src/b.ts", source); + + const index = await buildProjectIndex(root); + const result = await findDuplicates(index, { + includeSameFile: true, + minConfidence: "high", + limit: 20, + }); + const primaryPairKeys = result.groups.map((group) => { + const left = `${group.primaryLeft.file}:${group.primaryLeft.startLine}-${group.primaryLeft.endLine}`; + const right = `${group.primaryRight.file}:${group.primaryRight.startLine}-${group.primaryRight.endLine}`; + return [left, right].sort().join("="); + }); + + expect(result.groups.length).toBeGreaterThan(0); + expect(new Set(primaryPairKeys).size).toBe(primaryPairKeys.length); + }); + test("duplicates CLI emits bounded JSON groups", async () => { const root = await makeTempProject(); const source = ` From 47a5098a5beb2ff6c7c955b2d4b4703f14b00dab Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Sat, 23 May 2026 00:22:37 -0400 Subject: [PATCH 13/15] Address PR duplicate cleanup review comments --- README.md | 13 +++++++++++-- tests/helpers/graph.ts | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f9344cdf..b165ae19 100644 --- a/README.md +++ b/README.md @@ -47,17 +47,26 @@ Use Codegraph when you need fast structural answers about a repo without relying } ], "duplicates": { + "total": 1, + "omitted": 0, "minConfidence": "high", "top": [ { "confidence": "high", "cloneType": "exact", + "score": 100, "left": { "file": "src/a.ts", "startLine": 10, "endLine": 24 }, - "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22 } + "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22 }, + "rawPairCount": 1 } ] }, - "recommendedCommands": ["codegraph hotspots --root \"/workspace/codegraph/src\" --limit 20 --json"] + "recommendedCommands": [ + "codegraph hotspots --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --limit 20 --json", + "codegraph graph --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --json --symbols-detailed --compact-json", + "codegraph duplicates --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --min-confidence medium --limit 20 --include-same-file", + "codegraph doctor \"/workspace/codegraph/.codegraph-cache/index-v1\"" + ] } ``` diff --git a/tests/helpers/graph.ts b/tests/helpers/graph.ts index 816044c9..21332a1f 100644 --- a/tests/helpers/graph.ts +++ b/tests/helpers/graph.ts @@ -2,12 +2,12 @@ import type { Edge } from "../../src/index.js"; import { normalizeTestPath } from "./filesystem.js"; export function graphEdgeTargetKey(to: Edge["to"]): string { - if (to.type === "file") return to.path; + if (to.type === "file") return normalizeTestPath(to.path); return to.name; } export function graphEdgeKey(edge: Edge): string { - return `${edge.from}|${graphEdgeTargetKey(edge.to)}|${edge.raw}`; + return `${normalizeTestPath(edge.from)}|${graphEdgeTargetKey(edge.to)}|${edge.raw}`; } export function edgeFrom(filePath: string): (edge: Pick) => boolean { From f05a22240d0a56639b50287a58ff1c2472391ff2 Mon Sep 17 00:00:00 2001 From: Luke Zehrung Date: Sat, 23 May 2026 00:49:38 -0400 Subject: [PATCH 14/15] Address follow-up PR review comments --- README.md | 7 ++++--- src/graphs/symbol-render.ts | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b165ae19..df5125f2 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,10 @@ Use Codegraph when you need fast structural answers about a repo without relying "confidence": "high", "cloneType": "exact", "score": 100, - "left": { "file": "src/a.ts", "startLine": 10, "endLine": 24 }, - "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22 }, - "rawPairCount": 1 + "left": { "file": "src/a.ts", "startLine": 10, "endLine": 24, "tokenCount": 86 }, + "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22, "tokenCount": 86 }, + "rawPairCount": 1, + "reasons": ["identical text", "matching normalized token stream"] } ] }, diff --git a/src/graphs/symbol-render.ts b/src/graphs/symbol-render.ts index 92a4f869..61cb44dd 100644 --- a/src/graphs/symbol-render.ts +++ b/src/graphs/symbol-render.ts @@ -88,7 +88,7 @@ function buildSymbolWithFileRenderModel( for (const [id, node] of sg.nodes) { const symbolId = `s${symbolIndex++}`; symbolIdOf.set(id, symbolId); - symbolNodes.push({ id: symbolId, label: symbolDisplayLabel(node) }); + symbolNodes.push({ id: symbolId, label: symbolDisplayLabel(node, projectRoot) }); } const fileNodes: FileRenderNode[] = []; From 3470d052cc356b1da6a5236f5b047d6270ab0630 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 15:58:13 +0000 Subject: [PATCH 15/15] Fix omitted variant count for coalesced duplicate groups Base coalesced omittedVariantCount on the deduped variant pool rather than summed rawPairCount, so deduping merged groups no longer reports omissions when nothing was truncated (notably variantLimit = Infinity under includeRawPairs). https://claude.ai/code/session_01YPYNDfeufXG585YrMW3GH3 --- src/duplicates.ts | 5 +++-- tests/duplicates.test.ts | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/duplicates.ts b/src/duplicates.ts index e1c9c033..d7e9775d 100644 --- a/src/duplicates.ts +++ b/src/duplicates.ts @@ -1122,7 +1122,8 @@ function coalesceDuplicateGroups(groups: DuplicateGroup[], variantLimit: number) variantsByKey.set(suggestionVariantKey(variant), variant); } } - const variants = Array.from(variantsByKey.values()).sort(compareSuggestionsForPrimary).slice(0, variantLimit); + const dedupedVariants = Array.from(variantsByKey.values()).sort(compareSuggestionsForPrimary); + const variants = dedupedVariants.slice(0, variantLimit); const rawPairCount = grouped.reduce((count, group) => count + group.rawPairCount, 0); let score = primary.score; let confidence = primary.confidence; @@ -1141,7 +1142,7 @@ function coalesceDuplicateGroups(groups: DuplicateGroup[], variantLimit: number) variants, variantCount: variants.length, rawPairCount, - omittedVariantCount: Math.max(0, rawPairCount - variants.length), + omittedVariantCount: Math.max(0, dedupedVariants.length - variants.length), reasons: mergeGroupReasons(grouped), }); } diff --git a/tests/duplicates.test.ts b/tests/duplicates.test.ts index 2abed138..4bea9651 100644 --- a/tests/duplicates.test.ts +++ b/tests/duplicates.test.ts @@ -114,6 +114,9 @@ export function normalizeInvoiceRows(rows: Array<{ amount: number; tax: number } expect(result.groups[0]?.primaryLeft.kind).toBe("symbol"); expect(result.groups[0]?.primaryRight.kind).toBe("symbol"); expect(result.suggestions?.length).toBeGreaterThan(result.groups.length); + // With includeRawPairs the variant list is unbounded, so coalescing must not + // report omitted variants from deduping merged groups. + expect(result.groups[0]?.omittedVariantCount).toBe(0); }); test("omits raw unit pairs unless requested", async () => {