diff --git a/README.md b/README.md index 4fecf0bd..df5125f2 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,28 @@ Use Codegraph when you need fast structural answers about a repo without relying "score": 59 } ], - "recommendedCommands": ["codegraph hotspots --root \"/workspace/codegraph/src\" --limit 20 --json"] + "duplicates": { + "total": 1, + "omitted": 0, + "minConfidence": "high", + "top": [ + { + "confidence": "high", + "cloneType": "exact", + "score": 100, + "left": { "file": "src/a.ts", "startLine": 10, "endLine": 24, "tokenCount": 86 }, + "right": { "file": "src/b.ts", "startLine": 8, "endLine": 22, "tokenCount": 86 }, + "rawPairCount": 1, + "reasons": ["identical text", "matching normalized token stream"] + } + ] + }, + "recommendedCommands": [ + "codegraph hotspots --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --limit 20 --json", + "codegraph graph --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --json --symbols-detailed --compact-json", + "codegraph duplicates --root \"/workspace/codegraph\" \"/workspace/codegraph/src\" --min-confidence medium --limit 20 --include-same-file", + "codegraph doctor \"/workspace/codegraph/.codegraph-cache/index-v1\"" + ] } ``` diff --git a/codegraph-skill/codegraph/SKILL.md b/codegraph-skill/codegraph/SKILL.md index 9550586a..7703838c 100644 --- a/codegraph-skill/codegraph/SKILL.md +++ b/codegraph-skill/codegraph/SKILL.md @@ -53,7 +53,7 @@ Then choose the narrowest follow-up command: - Artifact bundle: `codegraph artifact build --root . --out codegraph-out --json` - MCP server: `codegraph mcp serve --root . --stdio` or `codegraph mcp serve --root . --port 7331` -Use `--json` when the output will feed later reasoning, scripts, or another agent step. `search` is deterministic and returns project-relative explainable handles, evidence, neighbors, follow-up commands, result counts, limits, and omission counts. `explain` accepts those handles plus file paths, symbol names, and SQL object names, then returns bounded symbols, dependencies, reverse dependencies, references, snippets, SQL relation facts, changed-context review tasks/candidate tests, explicit limits, omission counts, and next commands. Generated command strings POSIX-shell-quote dynamic arguments when needed. For SQL objects, use search handles or schema-qualified names when basenames may be ambiguous. Reference and snippet omission counts are lower bounds after bounded navigation hits its cap. `artifact build` writes a durable SQLite, self-describing project-relative graph JSON, report, questions with unique stable-handle command IDs, and manifest bundle for handoff while excluding its own in-repo output directory and linked outside-root files. With `--force`, recognizable stale artifact files are removed, unrelated operator files are preserved, and unrecognized reserved-name collisions are refused. `codegraph doctor ` recognizes manifest-backed artifact bundle directories and reports expected artifact presence. `mcp serve` exposes the same primitives as read-only MCP tools by default over stdio, or over Streamable HTTP with `--port ` at `/mcp`; HTTP binds to `127.0.0.1` unless `--host ` is passed, validates Host headers, and allows loopback Host headers for wildcard binds. File/artifact paths are confined after realpath resolution, SQLite query results are row- and byte-bounded, synthetic payload functions are rejected, and `--allow-build` is required before an agent may write artifact output. +Use `--json` when the output will feed later reasoning, scripts, or another agent step. `inspect` includes compact high-confidence duplicate opportunities plus a recommended `duplicates` command for full grouped JSON. `search` is deterministic and returns project-relative explainable handles, evidence, neighbors, follow-up commands, result counts, limits, and omission counts. `explain` accepts those handles plus file paths, symbol names, and SQL object names, then returns bounded symbols, dependencies, reverse dependencies, references, snippets, SQL relation facts, changed-context review tasks/candidate tests, explicit limits, omission counts, and next commands. Generated command strings POSIX-shell-quote dynamic arguments when needed. For SQL objects, use search handles or schema-qualified names when basenames may be ambiguous. Reference and snippet omission counts are lower bounds after bounded navigation hits its cap. `artifact build` writes a durable SQLite, self-describing project-relative graph JSON, report, questions with unique stable-handle command IDs, and manifest bundle for handoff while excluding its own in-repo output directory and linked outside-root files. With `--force`, recognizable stale artifact files are removed, unrelated operator files are preserved, and unrecognized reserved-name collisions are refused. `codegraph doctor ` recognizes manifest-backed artifact bundle directories and reports expected artifact presence. `mcp serve` exposes the same primitives as read-only MCP tools by default over stdio, or over Streamable HTTP with `--port ` at `/mcp`; HTTP binds to `127.0.0.1` unless `--host ` is passed, validates Host headers, and allows loopback Host headers for wildcard binds. File/artifact paths are confined after realpath resolution, SQLite query results are row- and byte-bounded, synthetic payload functions are rejected, and `--allow-build` is required before an agent may write artifact output. Numeric options such as `--limit`, `--threads`, `--depth`, `--max-refs`, and token bounds must be integers in their documented ranges; invalid numeric values fail instead of being silently clamped or ignored. @@ -200,6 +200,7 @@ For git-provider impact and git-scoped review/index/graph commands, `WORKTREE` c - Start here when you need an architecture summary: `codegraph inspect ./src --limit 20` + Includes compact high-confidence duplicate opportunities and follow-up commands. - Dependencies of a file: `codegraph deps ` - Reverse dependencies: diff --git a/docs/cli.md b/docs/cli.md index a48299c7..87102313 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -96,6 +96,8 @@ codegraph index --report codegraph review --report --report-file review.report.json ``` +`inspect` emits bounded hotspots, unresolved imports, cycles, and high-confidence duplicate opportunities. Duplicate opportunities are intentionally compact and include file ranges, confidence, clone type, score, token counts, and raw pair counts; run the recommended `duplicates` command for full grouped JSON. + Graph, index, and review reports include `backend.native.byLanguage` so native usage and fallback remain visible per language. Build reports also include `backend.parser` when syntax-tree backend degradation leaves files without parser context. Reports also include `graph.fallbackImportExtraction.byLanguage` and `byReason` when regex import extraction is used. Review JSON reports `diagnostics.symbolMappingParseFailures`, `diagnostics.missingFiles`, `changedFiles[].status` as `updated`, `deleted`, or `missing`, and `sqlContext` when changed SQL files or changed SQL literals make SQL artifact facts relevant. ### Symbols, navigation, grep, and chunking diff --git a/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md new file mode 100644 index 00000000..d0153913 --- /dev/null +++ b/docs/superpowers/plans/2026-05-23-eliminate-duplicate-findings.md @@ -0,0 +1,184 @@ +# Duplicate Findings Refactor Plan + +## Baseline Scan + +Generated from the repo-local CLI on branch `elim-dups` after rebuilding `dist`. + +Commands: + +```bash +node ./dist/cli.js duplicates --root . ./src --min-confidence medium --limit 100 --include-same-file +node ./dist/cli.js duplicates --root . . --min-confidence high --limit 200 --include-same-file +``` + +Results: + +- `src` scan: 100 returned groups, 1234 omitted groups, 1669 omitted raw suggestions. +- whole-repo scan: 200 returned groups, 2097 omitted groups, 2995 omitted raw suggestions. +- Whole-repo top results are dominated by test helpers and setup snippets. +- Product-code top results are concentrated in `src/graphs`, `src/cli`, `src/impact`, `src/chunking`, `src/mcp`, and language definitions. + +Post-refactor comparison: + +- `src` scan: 100 returned groups, 869 omitted groups, 1611 omitted raw suggestions. +- whole-repo scan: 200 returned groups, 899 omitted groups, 2904 omitted raw suggestions. +- The previous `src/graphs/symbol-render.ts`, `src/cli/graph.ts`, chunk tokenizer, AST range, CSS/LESS, and first-pass test helper findings dropped out of the top product-code results. +- The analyzer no longer ranks the large C/C++ query chunk against tiny language snippets as a top medium/high finding. + +The grouped output is usable for triage. Remaining caveats: + +- Some chunk findings are sub-ranges of a larger duplicate and should be handled through the larger refactor. +- Some renamed findings compare very different-sized chunks; those are analyzer noise unless a human can identify a clear shared behavior. +- Repeated declarative language-definition shapes are not automatically bad duplication. + +## Refactor Checklist + +### Product Code + +- [x] Refactor symbol graph renderers. + - Findings: `src/graphs/symbol-render.ts:84-149` and `src/graphs/symbol-render.ts:151-218`. + - Extract shared file-node, symbol-node, and graph-edge collection. + - Keep Mermaid and DOT formatting separate so escaping and syntax remain explicit. + - Add or update renderer tests if output ordering or formatting can change. + +- [x] Refactor compact graph symbol projection. + - Findings: `src/cli/graph.ts:121-146` and `src/cli/graph.ts:169-194`. + - Extract shared file index, symbol index, symbol array, and sorted symbol edge construction. + - Keep the difference between full compact graph output and symbols-only output visible at the call site. + +- [x] Share AST range conversion. + - Findings: `src/impact/suggestions.ts:372-385` and `src/util/ast.ts:32-41`. + - Reuse `toRange` from `src/util/ast.ts` or move the non-null conversion into a shared helper. + - Preserve the existing null-node behavior used by current callers. + +- [x] Share default token counting. + - Findings: `src/chunking/chunkFile.ts:28-31` and `src/chunking/chunkTextFile.ts:21-24`. + - Introduce a small shared tokenizer helper in `src/chunking`. + - Keep public chunking options unchanged. + +- [x] Consolidate dependency and reverse-dependency wrappers where useful. + - Findings: `src/agent-tools.ts:366-441`, `src/mcp/server.ts:228-246`, and `src/mcp/tools.ts:88-114`. + - Prefer a small result-mapping helper over forcing identical public response shapes. + - Verify both CLI/agent tools and MCP tools still expose `dependencies` and `reverseDependencies` separately. + +- [x] Revisit CSS and Less language definitions. + - Findings: `src/languages/definitions/css.ts:9-17` and `src/languages/definitions/less.ts:9-17`. + - Extract shared CSS-family structure/query pieces only if it keeps each language definition readable. + - Consider whether Vue/Svelte stylesheet definitions can reuse the same helper without hiding language-specific behavior. + +- [x] Evaluate JS fallback type duplication. + - Findings: `packages/codegraph-js-fallback/js-fallback.d.ts` and `src/jsFallback.ts`. + - Prefer generating or importing a single declaration source if package boundaries allow it. + - Leave as-is if the duplication is required to keep the fallback package self-contained. + - Decision: leave as-is because the fallback package publishes a self-contained `js-fallback.d.ts`. + +- [x] Review smaller wrapper candidates opportunistically. + - `src/cli/artifact.ts`, `src/cli/explain.ts`, and `src/cli/search.ts` command context interfaces. + - `src/cli/projectFile.ts` and `src/session.ts` file-input resolution. + - `src/cli/options.ts` positive and non-negative integer parsers. + - `src/sqlite/canned-query.ts` direct dependencies and dependents queries. + - Decision: extracted the common agent CLI context and canned-query edge loader; left session file resolution and option parsing as-is because their existing helpers already separate concerns. + +### Tests + +- [x] Add a shared temporary directory helper for tests. + - Findings: repeated `mkTmpDir` helpers across dynamic resolution, fast graph edge cases, node modules, resolution precedence, robust fast graph, TS paths workspace, cache, and parsed-cache tests. + - Put it near existing test helpers and migrate only obvious identical helpers first. + +- [x] Add shared edge-normalization helpers for graph tests. + - Findings: `tests/fast-graph.test.ts`, `tests/monorepo-fast-graph.test.ts`, and related fast graph tests. + - Replace duplicated `normEdge`, `toKey`, and slash normalization only where it improves readability. + +- [x] Consolidate repeated SQLite/test database setup blocks. + - Findings: repeated chunks in `tests/sqlite.test.ts`, `tests/sql-artifact-graph.test.ts`, and `tests/sql-review-context.test.ts`. + - Extract helpers that describe domain intent, not just line-for-line setup. + +- [x] Leave intentional fixture repetition alone. + - Repeated sample snippets are often test data, not production maintenance debt. + - Do not refactor setup that would make an individual test harder to read. + +### Analyzer Follow-Ups + +- [x] Consider a length-ratio guard for high-confidence renamed groups. + - Example noise: large C/C++ query chunks paired with tiny language-definition snippets. + - The detector already reports `lengthRatio`; ranking or confidence can use it more aggressively. + +- [x] Consider collapsing adjacent same-file chunk findings under a larger group. + - Example: multiple `src/cli/graph.ts` chunk findings are one underlying helper extraction. + - Keep raw variants available through `--raw-pairs`. + +## Follow-Up Scan: 2026-05-23 + +Generated after the first duplicate cleanup pass on branch `elim-dups`. + +Commands: + +```bash +node ./dist/cli.js duplicates --root . ./src --min-confidence medium --limit 120 --include-same-file +node ./dist/cli.js duplicates --root . . --min-confidence high --limit 120 --include-same-file +node ./dist/cli.js inspect --root . ./src --limit 8 +node ./dist/cli.js review --base main --head HEAD --summary +node ./dist/cli.js doctor +``` + +Results: + +- `src` scan: 120 returned groups, 849 omitted groups, 1611 omitted raw suggestions. +- whole-repo scan: 120 returned groups, 973 omitted groups, 2898 omitted raw suggestions. +- `doctor` reports native runtime availability and artifact health only; it does not surface duplicate cleanup opportunities. +- `review --summary` reports diff risk and candidate tests only; it does not surface duplicate cleanup opportunities. +- `inspect` reports hotspots, unresolved imports, cycles, and recommended commands only; it is the best current home for an at-a-glance duplicate opportunity section. + +### Product Output Follow-Ups + +- [x] Coalesce repeated grouped duplicate findings with the same primary ranges. + - Finding: `src/indexer/imports/languageSpecific.ts:249-258 applyJavaStatementOverride` vs `src/indexer/imports/languageSpecific.ts:260-269 applyKotlinStatementOverride` appeared multiple times with the same primary pair. + - Preserve raw evidence counts through `rawPairCount` and bounded `variants`. + - Keep `--raw-pairs` as the explicit escape hatch for low-level pair inspection. + +- [x] Surface bounded duplicate opportunities in `inspect` output. + - Include high-signal fields only: confidence, clone type, score, files/ranges, token counts, and raw pair count. + - Keep the summary bounded by `--limit`. + - Add a `duplicates` follow-up command to `recommendedCommands` so agents can drill into full grouped JSON. + - Leave `doctor` focused on package/runtime/artifact health. + +### Remaining Source Cleanup Candidates + +- [x] Share C/C++ language-definition scaffolding where it stays readable. + - Findings: `src/languages/definitions/c.ts:14-147` and `src/languages/definitions/cpp.ts:14-165`. + - Preserve C-specific macro support and C++ namespace/class/alias/using/lambda behavior. + +- [x] Share package export target selection between node-module and workspace resolution. + - Findings: `src/util/resolution/node.ts:22-34 tryResolveRelative` and `src/util/workspace.ts:298-307 pickExportTarget`. + - Prefer a small resolver helper over duplicating `exports` target precedence. + +- [x] Share Java/Kotlin import override plumbing. + - Findings: `src/indexer/imports/languageSpecific.ts:249-258` and `src/indexer/imports/languageSpecific.ts:260-269`. + - Keep Java and Kotlin parsing differences explicit at the call site. + +- [x] Share range line-counting for coverage suggestions. + - Findings: `src/impact/report-suggestions.ts:820-827` and `src/impact/report-suggestions.ts:829-836`. + - Preserve zero-count behavior when no coverage is available. + +- [x] Share file-like agent handle parsing. + - Findings: `src/agent/handles.ts:54-60` and `src/agent/handles.ts:82-88`. + - Keep public handle prefixes and return types unchanged. + +- [x] Share discovery glob relative-path matching. + - Findings: `src/cli/context.ts:25-35` and `src/util/projectFiles.ts:278-288`. + - Keep CLI include/ignore globs relative to active scan roots and config globs relative to project roots. + +Follow-up verification: + +- [x] `src` scan reports `0` repeated displayed primary pairs in the first 80 medium-or-higher groups. +- [x] `inspect --root . ./src --limit 5` emits compact high-confidence duplicate opportunities and a `duplicates` follow-up command. +- [x] Focused duplicate, inspect, C/C++, resolution, references, impact-suggestion, and agent-search tests pass. +- [x] Full test suite passes. + +## Verification Plan + +- [x] Run `npm run build`. +- [x] Run `npm run lint`. +- [x] Run focused tests for touched areas. +- [x] Run `npm test` before pushing a completed refactor batch. +- [x] Re-run the duplicate analyzer and compare top findings against this baseline. diff --git a/src/agent-tools.ts b/src/agent-tools.ts index a87a0541..be1d9150 100644 --- a/src/agent-tools.ts +++ b/src/agent-tools.ts @@ -115,6 +115,24 @@ type ToolDependencyListResult = error: string; reason?: "outside_project_root"; }; +type ToolDependencyOkResult = Extract; +type ToolDependencyFailureResult = Exclude; +type ToolDependenciesResponse = + | { + status: "ok"; + file: string; + dependencies: ToolDependencyEntry[]; + truncated: boolean; + } + | ToolDependencyFailureResult; +type ToolReverseDependenciesResponse = + | { + status: "ok"; + file: string; + dependents: ToolDependencyEntry[]; + truncated: boolean; + } + | ToolDependencyFailureResult; /** File-level hotspot metrics returned by `tool_getHotspots`. */ export type ToolHotspotEntry = { @@ -367,37 +385,16 @@ export async function tool_getDependencies( root: string, filePath: string, options: ToolDependencyOptions = {}, -): Promise< - | { - status: "ok"; - file: string; - dependencies: ToolDependencyEntry[]; - truncated: boolean; - } - | { - status: "not_found"; - file: string; - reason: "file_not_found" | "file_not_indexed"; - error: string; - } - | { - status: "error"; - error: string; - reason?: "outside_project_root"; - } -> { +): Promise { const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => getDependencies(index.graph, file, traversalOptions), ); - if (result.status !== "ok") { - return result; - } - return { + return mapToolDependencyResult(result, (ok) => ({ status: "ok", - file: result.file, - dependencies: result.entries, - truncated: result.truncated, - }; + file: ok.file, + dependencies: ok.entries, + truncated: ok.truncated, + })); } /** @@ -407,37 +404,24 @@ export async function tool_getReverseDependencies( root: string, filePath: string, options: ToolDependencyOptions = {}, -): Promise< - | { - status: "ok"; - file: string; - dependents: ToolDependencyEntry[]; - truncated: boolean; - } - | { - status: "not_found"; - file: string; - reason: "file_not_found" | "file_not_indexed"; - error: string; - } - | { - status: "error"; - error: string; - reason?: "outside_project_root"; - } -> { +): Promise { const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => getReverseDependencies(index.graph, file, traversalOptions), ); - if (result.status !== "ok") { - return result; - } - return { + return mapToolDependencyResult(result, (ok) => ({ status: "ok", - file: result.file, - dependents: result.entries, - truncated: result.truncated, - }; + file: ok.file, + dependents: ok.entries, + truncated: ok.truncated, + })); +} + +function mapToolDependencyResult( + result: ToolDependencyListResult, + buildOkResult: (result: ToolDependencyOkResult) => T, +): T | ToolDependencyFailureResult { + if (result.status !== "ok") return result; + return buildOkResult(result); } async function collectToolDependencyEntries( diff --git a/src/agent/handles.ts b/src/agent/handles.ts index 5596080b..c32fe014 100644 --- a/src/agent/handles.ts +++ b/src/agent/handles.ts @@ -51,14 +51,19 @@ export function formatAgentFileHandle(handle: AgentFileHandle): string { return ["file", encodeURIComponent(handle.file)].join(":"); } -export function parseAgentFileHandle(handle: string): AgentFileHandle | null { - if (!handle.startsWith("file:")) return null; - const encodedFile = handle.slice("file:".length); +function parseAgentFileLikeHandle(handle: string, prefix: "file" | "graph"): AgentFileHandle | null { + const handlePrefix = `${prefix}:`; + if (!handle.startsWith(handlePrefix)) return null; + const encodedFile = handle.slice(handlePrefix.length); const file = decodeHandlePart(encodedFile) ?? encodedFile; if (!file) return null; return { file }; } +export function parseAgentFileHandle(handle: string): AgentFileHandle | null { + return parseAgentFileLikeHandle(handle, "file"); +} + export function formatAgentChunkHandle(handle: AgentChunkHandle): string { return ["chunk", encodeURIComponent(handle.file), String(handle.line)].join(":"); } @@ -80,11 +85,7 @@ export function formatAgentGraphHandle(handle: AgentFileHandle): string { } export function parseAgentGraphHandle(handle: string): AgentFileHandle | null { - if (!handle.startsWith("graph:")) return null; - const encodedFile = handle.slice("graph:".length); - const file = decodeHandlePart(encodedFile) ?? encodedFile; - if (!file) return null; - return { file }; + return parseAgentFileLikeHandle(handle, "graph"); } export function formatAgentSqlHandle(handle: AgentSqlHandle): string { diff --git a/src/chunking/chunkFile.ts b/src/chunking/chunkFile.ts index bfd99ee4..3c13ff23 100644 --- a/src/chunking/chunkFile.ts +++ b/src/chunking/chunkFile.ts @@ -3,6 +3,7 @@ import { collectChunkBlockGroups } from "./chunkBlocks.js"; import { getChunkMatches } from "./chunkMatches.js"; import { fillGapsWithMiscChunks, mergeSmallChunks } from "./chunkMerge.js"; import { splitLargeBlockSimple, splitLargeBlockUsingInnerBlocks } from "./chunkSplit.js"; +import { countWhitespaceTokens } from "./tokenizer.js"; import type { Chunk, ChunkTokenizer } from "./types.js"; export type { Chunk } from "./types.js"; @@ -25,11 +26,6 @@ export interface ChunkFileOptions { tokenizer?: ChunkTokenizer | undefined; } -function defaultTokenizer(text: string): number { - if (!text.trim()) return 0; - return text.trim().split(/\s+/).length; -} - /** * Splits code into semantic chunks using Tree-sitter queries. * Chunks respect token budgets and preserve semantic boundaries like functions, classes, and methods. @@ -38,7 +34,7 @@ function defaultTokenizer(text: string): number { * @returns Array of semantic chunks */ export function chunkFile(opts: ChunkFileOptions): Chunk[] { - const { language, source, filePath, minTokens = 150, maxTokens = 400, tokenizer = defaultTokenizer } = opts; + const { language, source, filePath, minTokens = 150, maxTokens = 400, tokenizer = countWhitespaceTokens } = opts; const matches = getChunkMatches(language, source, filePath); const newlineOffsets = collectNewlineOffsets(source); const { mainBlocks, innerBlocks, comments } = collectChunkBlockGroups(language, matches); diff --git a/src/chunking/chunkTextFile.ts b/src/chunking/chunkTextFile.ts index 90e4d200..ed72e6ae 100644 --- a/src/chunking/chunkTextFile.ts +++ b/src/chunking/chunkTextFile.ts @@ -1,4 +1,6 @@ import type { Chunk } from "./chunkFile.js"; +import { countWhitespaceTokens } from "./tokenizer.js"; +import type { ChunkTokenizer } from "./types.js"; /** * Options for text file chunking (JSON, YAML, config files, etc.). @@ -15,12 +17,7 @@ export interface TextChunkOptions { /** Maximum tokens per chunk (default: 400). Larger chunks are split. */ maxTokens?: number; /** Custom token counting function (default: whitespace-based) */ - tokenizer?: ((text: string) => number) | undefined; -} - -function defaultTokenizer(text: string): number { - if (!text.trim()) return 0; - return text.trim().split(/\s+/).length; + tokenizer?: ChunkTokenizer | undefined; } /** @@ -31,7 +28,7 @@ function defaultTokenizer(text: string): number { * @returns Array of text chunks */ export function chunkTextFile(opts: TextChunkOptions): Chunk[] { - const { source, filePath, languageId = "text", maxTokens = 400, tokenizer = defaultTokenizer } = opts; + const { source, filePath, languageId = "text", maxTokens = 400, tokenizer = countWhitespaceTokens } = opts; const lines = source.split(/\r?\n/); const chunks: Chunk[] = []; diff --git a/src/chunking/tokenizer.ts b/src/chunking/tokenizer.ts new file mode 100644 index 00000000..bc5fa662 --- /dev/null +++ b/src/chunking/tokenizer.ts @@ -0,0 +1,4 @@ +export function countWhitespaceTokens(text: string): number { + if (!text.trim()) return 0; + return text.trim().split(/\s+/).length; +} diff --git a/src/cli/artifact.ts b/src/cli/artifact.ts index 34b6244d..6448f772 100644 --- a/src/cli/artifact.ts +++ b/src/cli/artifact.ts @@ -1,16 +1,8 @@ import { buildCodegraphArtifact } from "../agent/artifact.js"; +import type { CliAgentCommandContext } from "./context.js"; import { ARTIFACT_HELP_TEXT } from "./help.js"; -export type ArtifactCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type ArtifactCommandContext = CliAgentCommandContext; export async function handleArtifactCommand(context: ArtifactCommandContext): Promise { const artifactCommand = context.positionals[0]; diff --git a/src/cli/context.ts b/src/cli/context.ts index cd80c47d..ff965fcd 100644 --- a/src/cli/context.ts +++ b/src/cli/context.ts @@ -6,7 +6,7 @@ import picomatch from "picomatch"; import type { BuildReport } from "../indexer/types.js"; import type { ReviewBuildReport } from "../review.js"; import { normalizePath, resolveFilePathFromRoot } from "../util/paths.js"; -import { type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; +import { matchesDiscoveryGlob, type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; import { isRelativePathInside } from "../util/projectFiles.js"; import { isCliValueOption } from "./options.js"; @@ -22,18 +22,6 @@ export function isCliDiscoveryRelativePathInside(relativePath: string): boolean return isRelativePathInside(relativePath); } -function matchesCliDiscoveryGlob( - absolutePath: string, - scanRoot: string, - matcher: (relativePath: string) => boolean, -): boolean { - const relativePath = path.relative(scanRoot, absolutePath); - if (!isCliDiscoveryRelativePathInside(relativePath)) { - return false; - } - return matcher(normalizePath(relativePath)); -} - export function filterFilesByCliDiscoveryGlobs( files: readonly string[], scanRoot: string, @@ -55,11 +43,11 @@ export function filterFilesByCliDiscoveryGlobs( return files.filter((filePath) => { if ( includeMatchers.length && - !includeMatchers.some((matcher) => matchesCliDiscoveryGlob(filePath, scanRoot, matcher)) + !includeMatchers.some((matcher) => matchesDiscoveryGlob(filePath, scanRoot, matcher)) ) { return false; } - return !ignoreMatchers.some((matcher) => matchesCliDiscoveryGlob(filePath, scanRoot, matcher)); + return !ignoreMatchers.some((matcher) => matchesDiscoveryGlob(filePath, scanRoot, matcher)); }); } @@ -71,6 +59,17 @@ export type CliRuntime = { readStdin: () => Promise; }; +export type CliAgentCommandContext = { + positionals: string[]; + root: string; + getOpt: (name: string) => string | undefined; + hasFlag: (name: string) => boolean; + writeJSONLine: (value: unknown) => void; + writeStdoutLine: (message: string) => void; + writeStderrLine: (message: string) => void; + exit: (code: number) => never; +}; + type CliContext = { runtime: CliRuntime; stderrFilePath: string | undefined; diff --git a/src/cli/explain.ts b/src/cli/explain.ts index 471b38e6..0db27b0c 100644 --- a/src/cli/explain.ts +++ b/src/cli/explain.ts @@ -1,17 +1,9 @@ import { explainCodegraphTarget, formatAgentExplanation } from "../agent/explain.js"; +import type { CliAgentCommandContext } from "./context.js"; import { EXPLAIN_HELP_TEXT } from "./help.js"; import { parsePositiveIntegerOption } from "./options.js"; -export type ExplainCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type ExplainCommandContext = CliAgentCommandContext; export async function handleExplainCommand(context: ExplainCommandContext): Promise { const target = context.positionals.join(" ").trim(); diff --git a/src/cli/graph.ts b/src/cli/graph.ts index b0c66e48..a7c19e09 100644 --- a/src/cli/graph.ts +++ b/src/cli/graph.ts @@ -34,6 +34,18 @@ type CompactFileEdge = { typeOnly?: boolean; }; type CompactSymbolEdge = { from: number; to: number; label?: string }; +type CompactSymbolNode = { + id: number; + file: number; + name: string; + kind: string; +}; +type CompactSymbolProjection = { + files: string[]; + symbols: CompactSymbolNode[]; + symbolEdges: CompactSymbolEdge[]; + symbolIdIndex: string[]; +}; type CommandTimingReport = { totalMs?: number; @@ -113,49 +125,26 @@ function compactGraphWithSymbols(fgraph: Graph, sgraph: SymbolGraph, stable = fa }); } - const symbolIds = [...sgraph.nodes.keys()]; - if (stable) symbolIds.sort(); - const symbolIndex = new Map(); - for (let i = 0; i < symbolIds.length; i++) symbolIndex.set(symbolIds[i]!, i); - - const symbols = symbolIds.map((id) => { - const n = sgraph.nodes.get(id)!; - return { - id: symbolIndex.get(id)!, - file: fileIndex.get(n.file)!, - name: n.name, - kind: n.kind, - }; - }); - - const symbolEdges: CompactSymbolEdge[] = sgraph.edges.map((e) => ({ - from: symbolIndex.get(e.from)!, - to: symbolIndex.get(e.to)!, - ...(e.label ? { label: e.label } : {}), - })); - if (stable) { - symbolEdges.sort((a, b) => { - const byFrom = a.from - b.from; - if (byFrom) return byFrom; - const byTo = a.to - b.to; - if (byTo) return byTo; - const al = String(a.label ?? ""); - const bl = String(b.label ?? ""); - if (al !== bl) return al < bl ? -1 : 1; - return 0; - }); - } + const symbolProjection = buildCompactSymbolProjection(files, sgraph, stable); return { files, fileEdges, - symbols, - symbolEdges, - symbolIdIndex: symbolIds, + symbols: symbolProjection.symbols, + symbolEdges: symbolProjection.symbolEdges, + symbolIdIndex: symbolProjection.symbolIdIndex, }; } function compactSymbolsOnly(allFiles: string[], sgraph: SymbolGraph, stable = false) { + return buildCompactSymbolProjection(allFiles, sgraph, stable); +} + +function buildCompactSymbolProjection( + allFiles: string[], + sgraph: SymbolGraph, + stable = false, +): CompactSymbolProjection { const files = [...allFiles]; if (stable) files.sort(); const fileIndex = new Map(); diff --git a/src/cli/inspect.ts b/src/cli/inspect.ts index a56ee9a5..22d954e9 100644 --- a/src/cli/inspect.ts +++ b/src/cli/inspect.ts @@ -1,5 +1,6 @@ import fs from "node:fs"; import path from "node:path"; +import { findDuplicates, type DuplicateConfidence, type DuplicateGroup } from "../duplicates.js"; import { collectGraph } from "../graph-builder.js"; import { findDetailedCycles, getUnresolvedImports, sortDetailedCycles } from "../graphs/queries.js"; import { getHotspots } from "../graphs/hotspots.js"; @@ -19,6 +20,7 @@ import { type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; import { parseCacheModeOption, parsePositiveIntegerOption } from "./options.js"; type CacheMode = "off" | "memory" | "disk"; +const INSPECT_DUPLICATE_MIN_TOKENS = 60; type IndexCacheMetadata = { manifestPath: string; @@ -59,9 +61,33 @@ type InspectReport = { size: number; }>; }; + duplicates: { + total: number; + omitted: number; + minConfidence: DuplicateConfidence; + top: DuplicateOpportunitySummary[]; + }; recommendedCommands: string[]; }; +type DuplicateOpportunitySummary = { + confidence: DuplicateConfidence; + cloneType: DuplicateGroup["cloneType"]; + score: number; + left: DuplicateOpportunitySide; + right: DuplicateOpportunitySide; + rawPairCount: number; + reasons: string[]; +}; + +type DuplicateOpportunitySide = { + file: string; + startLine: number; + endLine: number; + tokenCount: number; + name?: string; +}; + export type InspectCommandContext = { projectRootFs: string; includeRootsAbs: string[]; @@ -182,6 +208,28 @@ async function buildScopedReportGraph( }; } +function summarizeDuplicateSide(side: DuplicateGroup["primaryLeft"]): DuplicateOpportunitySide { + return { + file: side.file, + startLine: side.startLine, + endLine: side.endLine, + tokenCount: side.tokenCount, + ...(side.name ? { name: side.name } : {}), + }; +} + +function summarizeDuplicateGroup(group: DuplicateGroup): DuplicateOpportunitySummary { + return { + confidence: group.confidence, + cloneType: group.cloneType, + score: group.score, + left: summarizeDuplicateSide(group.primaryLeft), + right: summarizeDuplicateSide(group.primaryRight), + rawPairCount: group.rawPairCount, + reasons: group.reasons, + }; +} + function countFilesByLanguage(files: Iterable): Record { const byLanguage: Record = {}; for (const file of files) { @@ -204,6 +252,7 @@ function buildRecommendedInspectCommands( const commands = [ `codegraph hotspots ${rootFlag}${targetSuffix} --limit 20 --json`, `codegraph graph ${rootFlag}${targetSuffix} --json --symbols-detailed --compact-json`, + `codegraph duplicates ${rootFlag}${targetSuffix} --min-confidence medium --limit 20 --include-same-file`, ]; if (hasUnresolvedImports) { commands.push(`codegraph unresolved ${rootFlag}${targetSuffix} --json`); @@ -228,18 +277,33 @@ async function buildInspectReport( limit: number, writeStderrLine: (message: string) => void, ): Promise { - const { graph, indexCache } = await buildScopedReportGraph(projectRoot, includeRoots, files, { - ...(cache ? { cache } : {}), + const useDiskCache = cache === "disk" || cache === undefined; + const indexCache = useDiskCache ? readIndexCacheMetadata(projectRoot) : null; + if (indexCache) { + writeStderrLine(formatIndexCacheMetadata(indexCache)); + } + const index = await buildProjectIndexIncremental(projectRoot, { + files, + cache: cache ?? "disk", discovery, - ...(graphOptions ? { graphOptions } : {}), - nativeMode, - workerOpts, - ...(progressHandler ? { progressHandler } : {}), - writeStderrLine, + ...(progressHandler ? { onProgress: progressHandler } : {}), + ...(nativeMode !== "auto" ? { native: nativeMode } : {}), + ...workerOpts, + ...(graphOptions ? { graph: graphOptions } : {}), }); + const graph = restrictGraphToIncludeRoots(index.graph, includeRoots); const hotspots = getHotspots(graph, { limit }); const unresolved = getUnresolvedImports(graph, { projectRoot }); const cycles = sortDetailedCycles(findDetailedCycles(graph), "priority"); + const duplicateMinConfidence: DuplicateConfidence = "high"; + const duplicateResult = await findDuplicates(index, { + projectRoot, + files, + includeSameFile: true, + minConfidence: duplicateMinConfidence, + minTokens: INSPECT_DUPLICATE_MIN_TOKENS, + limit, + }); const loadError = getNativeTreeSitterLoadError(nativeMode); return { root: normalizePathForDisplay(projectRoot), @@ -272,6 +336,12 @@ async function buildInspectReport( size: cycle.files.length, })), }, + duplicates: { + total: duplicateResult.groups.length + duplicateResult.omittedCounts.groups, + omitted: duplicateResult.omittedCounts.groups, + minConfidence: duplicateMinConfidence, + top: duplicateResult.groups.map(summarizeDuplicateGroup), + }, recommendedCommands: buildRecommendedInspectCommands( projectRoot, includeRoots, diff --git a/src/cli/search.ts b/src/cli/search.ts index 78e6590e..c7fc2e8e 100644 --- a/src/cli/search.ts +++ b/src/cli/search.ts @@ -1,17 +1,9 @@ import { formatAgentSearchResponse, searchCodegraph, type AgentSearchMode } from "../agent/search.js"; +import type { CliAgentCommandContext } from "./context.js"; import { SEARCH_HELP_TEXT } from "./help.js"; import { parsePositiveIntegerOption } from "./options.js"; -export type SearchCommandContext = { - positionals: string[]; - root: string; - getOpt: (name: string) => string | undefined; - hasFlag: (name: string) => boolean; - writeJSONLine: (value: unknown) => void; - writeStdoutLine: (message: string) => void; - writeStderrLine: (message: string) => void; - exit: (code: number) => never; -}; +export type SearchCommandContext = CliAgentCommandContext; function parseAgentSearchMode(rawValue: string | undefined): AgentSearchMode { if (rawValue === undefined) return "hybrid"; diff --git a/src/duplicates.ts b/src/duplicates.ts index e8a99722..d7e9775d 100644 --- a/src/duplicates.ts +++ b/src/duplicates.ts @@ -140,6 +140,8 @@ const DEFAULT_GROUP_VARIANT_LIMIT = 5; const DEFAULT_SHINGLE_SIZE = 5; const DEFAULT_WINDOW_SIZE = 4; const DEFAULT_MAX_FINGERPRINTS = 128; +const GROUP_PRIMARY_LENGTH_RATIO_FLOOR = 0.7; +const NEARBY_CHUNK_VARIANT_MAX_GAP = 2; const textLanguageByExtension: Record = { ".json": "json", @@ -334,6 +336,18 @@ function rangesSubstantiallyOverlap(left: DuplicateUnitRef, right: DuplicateUnit return overlap / Math.min(lineSpan(left), lineSpan(right)) >= 0.8; } +function lineGap(left: DuplicateUnitRef, right: DuplicateUnitRef): number { + if (left.endLine < right.startLine) return right.startLine - left.endLine - 1; + if (right.endLine < left.startLine) return left.startLine - right.endLine - 1; + return 0; +} + +function rangesAreNearbyChunkVariants(left: DuplicateUnitRef, right: DuplicateUnitRef): boolean { + if (left.file !== right.file || left.languageId !== right.languageId) return false; + if (left.kind !== "chunk" || right.kind !== "chunk") return false; + return lineGap(left, right) <= NEARBY_CHUNK_VARIANT_MAX_GAP; +} + function ratio(left: number, right: number): number { if (!left || !right) return 0; return Math.min(left, right) / Math.max(left, right); @@ -862,6 +876,10 @@ function unitRefIdentity(ref: DuplicateUnitRef): string { ].join("\u0000"); } +function unitRefRangeIdentity(ref: DuplicateUnitRef): string { + return [ref.file, ref.startLine, ref.endLine, ref.languageId].join("\u0000"); +} + function compareUnitRefs(left: DuplicateUnitRef, right: DuplicateUnitRef): number { const fileCompare = left.file.localeCompare(right.file); if (fileCompare) return fileCompare; @@ -952,8 +970,10 @@ function createUnitClusters(refs: readonly DuplicateUnitRef[]): Map left.ref.endLine) break; - if (rangesSubstantiallyOverlap(left.ref, right.ref)) union(left.key, right.key); + if (right.ref.startLine > left.ref.endLine + NEARBY_CHUNK_VARIANT_MAX_GAP + 1) break; + if (rangesSubstantiallyOverlap(left.ref, right.ref) || rangesAreNearbyChunkVariants(left.ref, right.ref)) { + union(left.key, right.key); + } } } } @@ -983,14 +1003,45 @@ function orderedGroupKey(left: UnitCluster, right: UnitCluster): string { return `${right.id}\u0000${left.id}`; } -function mergeReasons(suggestions: readonly DuplicateSuggestion[]): string[] { +function orderedUnitPairKey(left: DuplicateUnitRef, right: DuplicateUnitRef): string { + const leftKey = unitRefIdentity(left); + const rightKey = unitRefIdentity(right); + if (leftKey < rightKey) return `${leftKey}\u0000${rightKey}`; + return `${rightKey}\u0000${leftKey}`; +} + +function orderedUnitRangePairKey(left: DuplicateUnitRef, right: DuplicateUnitRef): string { + const leftKey = unitRefRangeIdentity(left); + const rightKey = unitRefRangeIdentity(right); + if (leftKey < rightKey) return `${leftKey}\u0000${rightKey}`; + return `${rightKey}\u0000${leftKey}`; +} + +function suggestionVariantKey(suggestion: DuplicateSuggestion): string { + return [ + orderedUnitPairKey(suggestion.left, suggestion.right), + suggestion.score, + suggestion.confidence, + suggestion.cloneType, + ].join("\u0000"); +} + +function mergeReasonLists(reasonLists: Iterable): string[] { const reasons = new Set(); - for (const suggestion of suggestions) { - for (const reason of suggestion.reasons) reasons.add(reason); + for (const reasonList of reasonLists) { + for (const reason of reasonList) reasons.add(reason); } return Array.from(reasons).sort(); } +function mergeReasons(suggestions: readonly DuplicateSuggestion[]): string[] { + return mergeReasonLists(suggestions.map((suggestion) => suggestion.reasons)); +} + +function mergeGroupReasons(groups: readonly DuplicateGroup[]): string[] { + return mergeReasonLists(groups.map((group) => group.reasons)); +} + function groupForSuggestions( key: string, suggestions: DuplicateSuggestion[], @@ -1009,6 +1060,13 @@ function groupForSuggestions( confidence = bestConfidence(confidence, suggestion.confidence); cloneType = bestCloneType(cloneType, suggestion.cloneType); } + let reasons = mergeReasons(suggestions); + const primaryLengthRatio = ratio(left.primary.tokenCount, right.primary.tokenCount); + if (primaryLengthRatio < GROUP_PRIMARY_LENGTH_RATIO_FLOOR) { + score = Math.min(score, 64); + confidence = "low"; + reasons = Array.from(new Set([...reasons, "different-sized grouped units"])).sort(); + } return { id: shortHashText(key), score, @@ -1021,7 +1079,7 @@ function groupForSuggestions( rawPairCount: suggestions.length, omittedVariantCount: Math.max(0, suggestions.length - variants.length), metrics: primary.metrics, - reasons: mergeReasons(suggestions), + reasons, }; } @@ -1040,6 +1098,58 @@ function compareGroups(left: DuplicateGroup, right: DuplicateGroup): number { return compareUnitRefs(left.primaryRight, right.primaryRight); } +function coalesceDuplicateGroups(groups: DuplicateGroup[], variantLimit: number): DuplicateGroup[] { + const groupsByPrimaryPair = new Map(); + for (const group of groups) { + const key = orderedUnitRangePairKey(group.primaryLeft, group.primaryRight); + const existing = groupsByPrimaryPair.get(key); + if (existing) existing.push(group); + else groupsByPrimaryPair.set(key, [group]); + } + + const coalesced: DuplicateGroup[] = []; + for (const [key, grouped] of groupsByPrimaryPair) { + if (grouped.length === 1) { + coalesced.push(grouped[0]!); + continue; + } + + grouped.sort(compareGroups); + const primary = grouped[0]!; + const variantsByKey = new Map(); + for (const group of grouped) { + for (const variant of group.variants) { + variantsByKey.set(suggestionVariantKey(variant), variant); + } + } + const dedupedVariants = Array.from(variantsByKey.values()).sort(compareSuggestionsForPrimary); + const variants = dedupedVariants.slice(0, variantLimit); + const rawPairCount = grouped.reduce((count, group) => count + group.rawPairCount, 0); + let score = primary.score; + let confidence = primary.confidence; + let cloneType = primary.cloneType; + for (const group of grouped.slice(1)) { + score = Math.max(score, group.score); + confidence = bestConfidence(confidence, group.confidence); + cloneType = bestCloneType(cloneType, group.cloneType); + } + coalesced.push({ + ...primary, + id: shortHashText(key), + score, + confidence, + cloneType, + variants, + variantCount: variants.length, + rawPairCount, + omittedVariantCount: Math.max(0, dedupedVariants.length - variants.length), + reasons: mergeGroupReasons(grouped), + }); + } + coalesced.sort(compareGroups); + return coalesced; +} + function groupSuggestions(suggestions: readonly DuplicateSuggestion[], includeRawPairs: boolean): DuplicateGroup[] { const refs = suggestions.flatMap((suggestion) => [suggestion.left, suggestion.right]); const clusters = createUnitClusters(refs); @@ -1062,8 +1172,7 @@ function groupSuggestions(suggestions: readonly DuplicateSuggestion[], includeRa const groups = Array.from(suggestionsByGroup, ([key, value]) => groupForSuggestions(key, value.suggestions, value.left, value.right, variantLimit), ); - groups.sort(compareGroups); - return groups; + return coalesceDuplicateGroups(groups, variantLimit); } /** Finds scored duplicate candidates from an already-built project index. */ @@ -1120,7 +1229,9 @@ export async function findDuplicates( suggestions.sort(compareSuggestions); const includeRawPairs = options.includeRawPairs ?? false; - const groups = groupSuggestions(suggestions, includeRawPairs); + const groups = groupSuggestions(suggestions, includeRawPairs).filter( + (group) => confidenceRank[group.confidence] >= confidenceRank[minConfidence], + ); const limitedGroups = groups.slice(0, limit); const omittedGroups = Math.max(0, groups.length - limitedGroups.length); const limitedRawSuggestions = includeRawPairs ? suggestions.slice(0, limit) : []; diff --git a/src/graphs/symbol-render.ts b/src/graphs/symbol-render.ts index 5c0b78b2..61cb44dd 100644 --- a/src/graphs/symbol-render.ts +++ b/src/graphs/symbol-render.ts @@ -21,6 +21,29 @@ type SymbolGraphLike = { edges: SymbolEdgeLike[]; }; +type RenderNode = { + id: string; + label: string; +}; + +type FileRenderNode = RenderNode & { + external: boolean; +}; + +type RenderEdge = { + fromId: string; + toId: string; + label?: string; +}; + +type SymbolWithFileRenderModel = { + fileNodes: FileRenderNode[]; + symbolNodes: RenderNode[]; + fileEdges: RenderEdge[]; + fileSymbolEdges: RenderEdge[]; + symbolEdges: RenderEdge[]; +}; + function symbolDisplayLabel(node: SymbolNodeLike, projectRoot?: string): string { const relativeFile = toProjectDisplayPath(projectRoot, node.file); const base = path.basename(relativeFile); @@ -29,6 +52,73 @@ function symbolDisplayLabel(node: SymbolNodeLike, projectRoot?: string): string return `${base}:${node.name}`; } +function buildSymbolWithFileRenderModel( + sg: SymbolGraphLike, + fg: Graph, + projectRoot?: string, +): SymbolWithFileRenderModel { + const fileIdOf = new Map(); + const fileNodeMeta = new Map(); + let fileIndex = 0; + const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); + const ensureFile = (file: string) => { + if (!fileIdOf.has(file)) { + const id = `f${fileIndex++}`; + fileIdOf.set(file, id); + fileNodeMeta.set(id, { label: fileLabel(file), external: false }); + } + }; + const ensureExternal = (name: string) => { + if (!fileIdOf.has(name)) { + const id = `f${fileIndex++}`; + fileIdOf.set(name, id); + fileNodeMeta.set(id, { label: name, external: true }); + } + }; + for (const file of fg.nodes) ensureFile(file); + for (const edge of fg.edges) { + ensureFile(edge.from); + if (edge.to.type === "file") ensureFile(edge.to.path); + else ensureExternal(edge.to.name); + } + + const symbolIdOf = new Map(); + const symbolNodes: RenderNode[] = []; + let symbolIndex = 0; + for (const [id, node] of sg.nodes) { + const symbolId = `s${symbolIndex++}`; + symbolIdOf.set(id, symbolId); + symbolNodes.push({ id: symbolId, label: symbolDisplayLabel(node, projectRoot) }); + } + + const fileNodes: FileRenderNode[] = []; + for (const [id, meta] of fileNodeMeta) { + fileNodes.push({ id, label: meta.label, external: meta.external }); + } + + const fileEdges = fg.edges.map((edge) => { + const fromId = fileIdOf.get(edge.from)!; + const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; + const toId = fileIdOf.get(targetKey)!; + return { fromId, toId }; + }); + + const fileSymbolEdges: RenderEdge[] = []; + for (const [symbolKey, symbolId] of symbolIdOf) { + const node = sg.nodes.get(symbolKey)!; + const fileId = fileIdOf.get(node.file); + if (fileId) fileSymbolEdges.push({ fromId: fileId, toId: symbolId }); + } + + const symbolEdges = sg.edges.map((edge) => ({ + fromId: symbolIdOf.get(edge.from)!, + toId: symbolIdOf.get(edge.to)!, + ...(edge.label ? { label: edge.label } : {}), + })); + + return { fileNodes, symbolNodes, fileEdges, fileSymbolEdges, symbolEdges }; +} + export function graphToMermaidSymbols(sg: SymbolGraphLike, projectRoot?: string): string { const idOf = new Map(); const labels = new Map(); @@ -82,136 +172,56 @@ export function graphToDOTSymbols(sg: SymbolGraphLike, projectRoot?: string): st } export function graphToMermaidSymbolsWithFiles(sg: SymbolGraphLike, fg: Graph, projectRoot?: string): string { - const fileIdOf = new Map(); - const fileNodeMeta = new Map(); - let fileIndex = 0; - const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); - const ensureFile = (file: string) => { - if (!fileIdOf.has(file)) { - const id = `f${fileIndex++}`; - fileIdOf.set(file, id); - fileNodeMeta.set(id, { label: fileLabel(file), external: false }); - } - }; - const ensureExternal = (name: string) => { - if (!fileIdOf.has(name)) { - const id = `f${fileIndex++}`; - fileIdOf.set(name, id); - fileNodeMeta.set(id, { label: name, external: true }); - } - }; - for (const file of fg.nodes) ensureFile(file); - for (const edge of fg.edges) { - ensureFile(edge.from); - if (edge.to.type === "file") ensureFile(edge.to.path); - else ensureExternal(edge.to.name); - } - - const symbolIdOf = new Map(); - const symbolLabels = new Map(); - let symbolIndex = 0; - for (const [id, node] of sg.nodes) { - const symbolId = `s${symbolIndex++}`; - symbolIdOf.set(id, symbolId); - symbolLabels.set(symbolId, symbolDisplayLabel(node)); - } - + const model = buildSymbolWithFileRenderModel(sg, fg, projectRoot); const declared = new Set(); const lines: string[] = ["flowchart LR"]; - for (const [id, meta] of fileNodeMeta) { - if (declared.has(id)) continue; - declared.add(id); - lines.push(meta.external ? `${id}(["${mermaidLabel(meta.label)}"])` : `${id}["${mermaidLabel(meta.label)}"]`); + for (const node of model.fileNodes) { + if (declared.has(node.id)) continue; + declared.add(node.id); + lines.push(node.external ? `${node.id}(["${mermaidLabel(node.label)}"])` : `${node.id}["${mermaidLabel(node.label)}"]`); } - for (const [id, label] of symbolLabels) { - if (declared.has(id)) continue; - declared.add(id); - lines.push(`${id}["${mermaidLabel(label)}"]`); + for (const node of model.symbolNodes) { + if (declared.has(node.id)) continue; + declared.add(node.id); + lines.push(`${node.id}["${mermaidLabel(node.label)}"]`); } - for (const edge of fg.edges) { - const fromId = fileIdOf.get(edge.from)!; - const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; - const toId = fileIdOf.get(targetKey)!; - lines.push(`${fromId} --> ${toId}`); + for (const edge of model.fileEdges) { + lines.push(`${edge.fromId} --> ${edge.toId}`); } - for (const [symbolKey, symbolId] of symbolIdOf) { - const node = sg.nodes.get(symbolKey)!; - const fileId = fileIdOf.get(node.file); - if (fileId) lines.push(`${fileId} --> ${symbolId}`); + for (const edge of model.fileSymbolEdges) { + lines.push(`${edge.fromId} --> ${edge.toId}`); } - for (const edge of sg.edges) { - const fromId = symbolIdOf.get(edge.from)!; - const toId = symbolIdOf.get(edge.to)!; - if (edge.label) lines.push(`${fromId} -- "${mermaidLabel(edge.label)}" --> ${toId}`); - else lines.push(`${fromId} --> ${toId}`); + for (const edge of model.symbolEdges) { + if (edge.label) lines.push(`${edge.fromId} -- "${mermaidLabel(edge.label)}" --> ${edge.toId}`); + else lines.push(`${edge.fromId} --> ${edge.toId}`); } return lines.join("\n"); } export function graphToDOTSymbolsWithFiles(sg: SymbolGraphLike, fg: Graph, projectRoot?: string): string { - const fileIdOf = new Map(); - const fileNodeMeta = new Map(); - let fileIndex = 0; - const fileLabel = (file: string) => toProjectDisplayPath(projectRoot, file); - const ensureFile = (file: string) => { - if (!fileIdOf.has(file)) { - const id = `f${fileIndex++}`; - fileIdOf.set(file, id); - fileNodeMeta.set(id, { label: fileLabel(file), external: false }); - } - }; - const ensureExternal = (name: string) => { - if (!fileIdOf.has(name)) { - const id = `f${fileIndex++}`; - fileIdOf.set(name, id); - fileNodeMeta.set(id, { label: name, external: true }); - } - }; - for (const file of fg.nodes) ensureFile(file); - for (const edge of fg.edges) { - ensureFile(edge.from); - if (edge.to.type === "file") ensureFile(edge.to.path); - else ensureExternal(edge.to.name); - } - - const symbolIdOf = new Map(); - const symbolLabels = new Map(); - let symbolIndex = 0; - for (const [id, node] of sg.nodes) { - const symbolId = `s${symbolIndex++}`; - symbolIdOf.set(id, symbolId); - symbolLabels.set(symbolId, symbolDisplayLabel(node)); - } - + const model = buildSymbolWithFileRenderModel(sg, fg, projectRoot); const lines: string[] = []; lines.push("digraph G {"); lines.push(" rankdir=LR;"); lines.push(' node [shape=box, fontsize=10, fontname="Arial"];\n'); - for (const [id, meta] of fileNodeMeta) { + for (const node of model.fileNodes) { lines.push( - ` ${id} [label="${dotLabel(meta.label)}", ${meta.external ? "shape=ellipse, style=dashed" : "shape=box"}];`, + ` ${node.id} [label="${dotLabel(node.label)}", ${node.external ? "shape=ellipse, style=dashed" : "shape=box"}];`, ); } - for (const [id, label] of symbolLabels) { - lines.push(` ${id} [label="${dotLabel(label)}"];`); + for (const node of model.symbolNodes) { + lines.push(` ${node.id} [label="${dotLabel(node.label)}"];`); } - for (const edge of fg.edges) { - const fromId = fileIdOf.get(edge.from)!; - const targetKey = edge.to.type === "file" ? edge.to.path : edge.to.name; - const toId = fileIdOf.get(targetKey)!; - lines.push(` ${fromId} -> ${toId};`); + for (const edge of model.fileEdges) { + lines.push(` ${edge.fromId} -> ${edge.toId};`); } - for (const [symbolKey, symbolId] of symbolIdOf) { - const node = sg.nodes.get(symbolKey)!; - const fileId = fileIdOf.get(node.file); - if (fileId) lines.push(` ${fileId} -> ${symbolId};`); + for (const edge of model.fileSymbolEdges) { + lines.push(` ${edge.fromId} -> ${edge.toId};`); } - for (const edge of sg.edges) { - const fromId = symbolIdOf.get(edge.from)!; - const toId = symbolIdOf.get(edge.to)!; + for (const edge of model.symbolEdges) { const attrs: string[] = []; if (edge.label) attrs.push(`label="${dotLabel(edge.label)}"`); - lines.push(` ${fromId} -> ${toId}${attrs.length ? " [" + attrs.join(",") + "]" : ""};`); + lines.push(` ${edge.fromId} -> ${edge.toId}${attrs.length ? " [" + attrs.join(",") + "]" : ""};`); } lines.push("}"); return lines.join("\n"); diff --git a/src/impact/report-suggestions.ts b/src/impact/report-suggestions.ts index b14f497b..cbb08923 100644 --- a/src/impact/report-suggestions.ts +++ b/src/impact/report-suggestions.ts @@ -817,22 +817,26 @@ async function loadCoverageByFile( return coverage; } -function countCoveredLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { +function countLinesForRange( + coverage: FileCoverage | undefined, + range: ChangedSymbol["range"], + linesForCoverage: (coverage: FileCoverage) => ReadonlySet, +): number { if (!coverage) return 0; let count = 0; + const lines = linesForCoverage(coverage); for (let line = range.start.line; line <= range.end.line; line += 1) { - if (coverage.coveredLines.has(line)) count += 1; + if (lines.has(line)) count += 1; } return count; } +function countCoveredLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { + return countLinesForRange(coverage, range, (entry) => entry.coveredLines); +} + function countTotalLinesForRange(coverage: FileCoverage | undefined, range: ChangedSymbol["range"]): number { - if (!coverage) return 0; - let count = 0; - for (let line = range.start.line; line <= range.end.line; line += 1) { - if (coverage.allLines.has(line)) count += 1; - } - return count; + return countLinesForRange(coverage, range, (entry) => entry.allLines); } export async function collectImpactReportSuggestions( diff --git a/src/impact/suggestions.ts b/src/impact/suggestions.ts index 55596e94..6fc74b12 100644 --- a/src/impact/suggestions.ts +++ b/src/impact/suggestions.ts @@ -4,6 +4,7 @@ import { goToDefinition } from "../indexer/navigation.js"; import { ensureParsedContext } from "../indexer/parse-context.js"; import type { LanguageSupport } from "../languages.js"; import type { SyntaxNodeLike, SyntaxTreeLike } from "../languages/types.js"; +import { toRange } from "../util/ast.js"; import { normalizePath, resolveFilePathFromRoot, toProjectRelativePath } from "../util/paths.js"; import { collectChangedLines } from "./hunks.js"; import type { FileChange, ImpactOptions, ImpactSuggestion, ImpactSuggestionConfidence } from "./types.js"; @@ -368,18 +369,3 @@ function pushUniqueSuggestion(output: ImpactSuggestion[], seen: Set, sug seen.add(key); output.push(suggestion); } - -function toRange(node: SyntaxNodeLike): Range { - return { - start: { - line: node.startPosition.row + 1, - column: node.startPosition.column + 1, - index: node.startIndex, - }, - end: { - line: node.endPosition.row + 1, - column: node.endPosition.column + 1, - index: node.endIndex, - }, - }; -} diff --git a/src/indexer/imports/languageSpecific.ts b/src/indexer/imports/languageSpecific.ts index b2bc6afa..56622269 100644 --- a/src/indexer/imports/languageSpecific.ts +++ b/src/indexer/imports/languageSpecific.ts @@ -28,6 +28,8 @@ export function createStatementImportOverrideState(): StatementImportOverrideSta return { handledStatements: new Set() }; } +type ParsedJvmImportStatement = { kind: "star"; from: string } | { kind: "named"; from: string; imported: string }; + function normalizeGoImports(context: LanguageSpecificImportContext): void { const imports = context.getBindings(); if (context.languageId !== "go" || !imports.length) { @@ -251,10 +253,10 @@ async function applyJavaStatementOverride( normalizedStmt: string, typeOnly: boolean, ): Promise { - const parsed = parseJavaImportStatement(normalizedStmt); - if (!parsed) return false; - const local = parsed.kind === "named" ? parsed.imported : undefined; - return await pushJvmImportBinding(context, parsed, local, typeOnly); + return await applyJvmStatementOverride(context, normalizedStmt, typeOnly, parseJavaImportStatement, (parsed) => { + if (parsed.kind === "named") return parsed.imported; + return undefined; + }); } async function applyKotlinStatementOverride( @@ -262,10 +264,22 @@ async function applyKotlinStatementOverride( normalizedStmt: string, typeOnly: boolean, ): Promise { - const parsed = parseKotlinImportStatement(normalizedStmt); + return await applyJvmStatementOverride(context, normalizedStmt, typeOnly, parseKotlinImportStatement, (parsed) => { + if (parsed.kind === "named") return parsed.local; + return undefined; + }); +} + +async function applyJvmStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, + parseStatement: (statement: string) => TParsed | null, + localNameFor: (parsed: TParsed) => string | undefined, +): Promise { + const parsed = parseStatement(normalizedStmt); if (!parsed) return false; - const local = parsed.kind === "named" ? parsed.local : undefined; - return await pushJvmImportBinding(context, parsed, local, typeOnly); + return await pushJvmImportBinding(context, parsed, localNameFor(parsed), typeOnly); } async function pushJvmImportBinding( diff --git a/src/languages/definitions/c.ts b/src/languages/definitions/c.ts index c58f74f6..e2c82f6c 100644 --- a/src/languages/definitions/c.ts +++ b/src/languages/definitions/c.ts @@ -3,12 +3,18 @@ import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; import { cFamilyContainerTypes, + cFamilyControlSplitPoints, + cFamilyCoreExportQueries, + cFamilyCoreLocalQueries, + cFamilyIncludeBindingsQuery, + cFamilyIncludeImportsQuery, cFunctionNameQuery, findAncestor, isFunctionDeclarator, isInAncestorDeclarator, isInField, isInParameterList, + joinQueryPatterns, } from "./cFamily.js"; const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", false); @@ -56,52 +62,24 @@ export const C_DEF: LanguageDefinition = { captureId: "macro", }, ], - splitPoints: [ - "if_statement", - "for_statement", - "while_statement", - "do_statement", - "switch_statement", - "case_statement", - ], + splitPoints: cFamilyControlSplitPoints, comments: ["comment"], }, graph: { - imports: ` - (preproc_include path: (string_literal) @mod) @stmt - (preproc_include path: (system_lib_string) @mod) @stmt - (preproc_include path: (identifier) @mod) @stmt - `, - exports: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (declaration ${GRAPH_FUNCTION_NAME_QUERY}) - (struct_specifier name: (type_identifier) @name) - (union_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (preproc_def name: (identifier) @name) - (preproc_function_def name: (identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - `, - locals: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (struct_specifier name: (type_identifier) @name) - (union_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - (parameter_declaration declarator: (identifier) @name) - (field_declaration declarator: (field_identifier) @name) - (preproc_def name: (identifier) @name) - (preproc_function_def name: (identifier) @name) - `, - importBindings: ` - (preproc_include path: (string_literal) @from) @stmt - (preproc_include path: (system_lib_string) @from) @stmt - (preproc_include path: (identifier) @from) @stmt - `, + imports: cFamilyIncludeImportsQuery, + exports: joinQueryPatterns([ + ...cFamilyCoreExportQueries(GRAPH_FUNCTION_NAME_QUERY), + `(union_specifier name: (type_identifier) @name)`, + `(preproc_def name: (identifier) @name)`, + `(preproc_function_def name: (identifier) @name)`, + ]), + locals: joinQueryPatterns([ + ...cFamilyCoreLocalQueries(GRAPH_FUNCTION_NAME_QUERY), + `(union_specifier name: (type_identifier) @name)`, + `(preproc_def name: (identifier) @name)`, + `(preproc_function_def name: (identifier) @name)`, + ]), + importBindings: cFamilyIncludeBindingsQuery, }, nodeTypes: { identifier: ["identifier", "field_identifier", "type_identifier"], diff --git a/src/languages/definitions/cFamily.ts b/src/languages/definitions/cFamily.ts index 80c5668d..539df049 100644 --- a/src/languages/definitions/cFamily.ts +++ b/src/languages/definitions/cFamily.ts @@ -9,6 +9,27 @@ export const cFamilyContainerTypes = new Set([ "init_declarator", ]); +export const cFamilyControlSplitPoints = [ + "if_statement", + "for_statement", + "while_statement", + "do_statement", + "switch_statement", + "case_statement", +]; + +export const cFamilyIncludeImportsQuery = ` + (preproc_include path: (string_literal) @mod) @stmt + (preproc_include path: (system_lib_string) @mod) @stmt + (preproc_include path: (identifier) @mod) @stmt + `; + +export const cFamilyIncludeBindingsQuery = ` + (preproc_include path: (string_literal) @from) @stmt + (preproc_include path: (system_lib_string) @from) @stmt + (preproc_include path: (identifier) @from) @stmt + `; + const cFamilyParameterListTypes = new Set(["parameter_declaration", "parameter_list"]); export function cFunctionNameQuery(captureName: string, includeFieldIdentifier: boolean): string { @@ -33,6 +54,37 @@ export function cFunctionNameQuery(captureName: string, includeFieldIdentifier: `; } +export function cFamilyCoreExportQueries(functionNameQuery: string): string[] { + return [ + `(function_definition ${functionNameQuery})`, + `(declaration ${functionNameQuery})`, + `(struct_specifier name: (type_identifier) @name)`, + `(enum_specifier name: (type_identifier) @name)`, + `(type_definition declarator: (type_identifier) @name)`, + `(declaration declarator: (identifier) @name)`, + `(declaration declarator: (init_declarator declarator: (identifier) @name))`, + ]; +} + +export function cFamilyCoreLocalQueries(functionNameQuery: string): string[] { + return [ + `(function_definition ${functionNameQuery})`, + `(struct_specifier name: (type_identifier) @name)`, + `(enum_specifier name: (type_identifier) @name)`, + `(type_definition declarator: (type_identifier) @name)`, + `(declaration declarator: (identifier) @name)`, + `(declaration declarator: (init_declarator declarator: (identifier) @name))`, + `(parameter_declaration declarator: (identifier) @name)`, + `(field_declaration declarator: (field_identifier) @name)`, + ]; +} + +export function joinQueryPatterns(patterns: readonly string[]): string { + return ` + ${patterns.join("\n ")} + `; +} + export function isWithin(node: SyntaxNodeLike, ancestor: SyntaxNodeLike | null): boolean { let current: SyntaxNodeLike | null = node; while (current) { diff --git a/src/languages/definitions/cpp.ts b/src/languages/definitions/cpp.ts index aae4559b..5c548526 100644 --- a/src/languages/definitions/cpp.ts +++ b/src/languages/definitions/cpp.ts @@ -3,12 +3,18 @@ import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; import { cFamilyContainerTypes, + cFamilyControlSplitPoints, + cFamilyCoreExportQueries, + cFamilyCoreLocalQueries, + cFamilyIncludeBindingsQuery, + cFamilyIncludeImportsQuery, cFunctionNameQuery, findAncestor, isFunctionDeclarator, isInAncestorDeclarator, isInField, isInParameterList, + joinQueryPatterns, } from "./cFamily.js"; const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", true); @@ -56,55 +62,25 @@ export const CPP_DEF: LanguageDefinition = { captureId: "type", }, ], - splitPoints: [ - "if_statement", - "for_statement", - "while_statement", - "do_statement", - "switch_statement", - "case_statement", - "try_statement", - "catch_clause", - ], + splitPoints: [...cFamilyControlSplitPoints, "try_statement", "catch_clause"], comments: ["comment"], }, graph: { - imports: ` - (preproc_include path: (string_literal) @mod) @stmt - (preproc_include path: (system_lib_string) @mod) @stmt - (preproc_include path: (identifier) @mod) @stmt - `, - exports: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (declaration ${GRAPH_FUNCTION_NAME_QUERY}) - (class_specifier name: (type_identifier) @name) - (struct_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (namespace_definition name: (namespace_identifier) @name) - (alias_declaration name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (using_declaration (qualified_identifier name: (identifier) @name)) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - `, - locals: ` - (function_definition ${GRAPH_FUNCTION_NAME_QUERY}) - (class_specifier name: (type_identifier) @name) - (struct_specifier name: (type_identifier) @name) - (enum_specifier name: (type_identifier) @name) - (namespace_definition name: (namespace_identifier) @name) - (alias_declaration name: (type_identifier) @name) - (type_definition declarator: (type_identifier) @name) - (declaration declarator: (identifier) @name) - (declaration declarator: (init_declarator declarator: (identifier) @name)) - (parameter_declaration declarator: (identifier) @name) - (field_declaration declarator: (field_identifier) @name) - `, - importBindings: ` - (preproc_include path: (string_literal) @from) @stmt - (preproc_include path: (system_lib_string) @from) @stmt - (preproc_include path: (identifier) @from) @stmt - `, + imports: cFamilyIncludeImportsQuery, + exports: joinQueryPatterns([ + ...cFamilyCoreExportQueries(GRAPH_FUNCTION_NAME_QUERY), + `(class_specifier name: (type_identifier) @name)`, + `(namespace_definition name: (namespace_identifier) @name)`, + `(alias_declaration name: (type_identifier) @name)`, + `(using_declaration (qualified_identifier name: (identifier) @name))`, + ]), + locals: joinQueryPatterns([ + ...cFamilyCoreLocalQueries(GRAPH_FUNCTION_NAME_QUERY), + `(class_specifier name: (type_identifier) @name)`, + `(namespace_definition name: (namespace_identifier) @name)`, + `(alias_declaration name: (type_identifier) @name)`, + ]), + importBindings: cFamilyIncludeBindingsQuery, }, nodeTypes: { identifier: ["identifier", "field_identifier", "type_identifier", "namespace_identifier"], diff --git a/src/languages/definitions/css.ts b/src/languages/definitions/css.ts index 116b68d8..f63fca7c 100644 --- a/src/languages/definitions/css.ts +++ b/src/languages/definitions/css.ts @@ -1,35 +1,14 @@ import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { cssLikeGraph, cssLikeNodeTypes, cssLikeStructure } from "./cssLike.js"; export const CSS_DEF: LanguageDefinition = { id: "css", extensions: [".css"], grammar: () => loadTreeSitterLanguage("tree-sitter-css"), - structure: { - blocks: [ - { type: "rule_set", captureId: "rule" }, - { type: "media_statement", captureId: "media" }, - { type: "keyframes_statement", captureId: "keyframes" }, - ], - splitPoints: ["rule_set"], - comments: ["comment", "js_comment"], - }, - graph: { - imports: ` - (import_statement (string_value) @mod) @stmt - `, - exports: "", - locals: ` - (class_selector (class_name) @name) - (id_selector (id_name) @name) - `, - importBindings: ` - (import_statement (string_value) @from) @stmt - `, - }, - nodeTypes: { - identifier: ["class_name", "id_name"], - }, + structure: cssLikeStructure(), + graph: cssLikeGraph(), + nodeTypes: cssLikeNodeTypes(), }; registerLanguage(CSS_DEF); diff --git a/src/languages/definitions/cssLike.ts b/src/languages/definitions/cssLike.ts new file mode 100644 index 00000000..67c6a8c7 --- /dev/null +++ b/src/languages/definitions/cssLike.ts @@ -0,0 +1,35 @@ +import type { LanguageDefinition } from "../types.js"; + +export function cssLikeStructure(): LanguageDefinition["structure"] { + return { + blocks: [ + { type: "rule_set", captureId: "rule" }, + { type: "media_statement", captureId: "media" }, + { type: "keyframes_statement", captureId: "keyframes" }, + ], + splitPoints: ["rule_set"], + comments: ["comment", "js_comment"], + }; +} + +export function cssLikeGraph(): LanguageDefinition["graph"] { + return { + imports: ` + (import_statement (string_value) @mod) @stmt + `, + exports: "", + locals: ` + (class_selector (class_name) @name) + (id_selector (id_name) @name) + `, + importBindings: ` + (import_statement (string_value) @from) @stmt + `, + }; +} + +export function cssLikeNodeTypes(): NonNullable { + return { + identifier: ["class_name", "id_name"], + }; +} diff --git a/src/languages/definitions/less.ts b/src/languages/definitions/less.ts index 8df96c4d..3554b815 100644 --- a/src/languages/definitions/less.ts +++ b/src/languages/definitions/less.ts @@ -1,35 +1,14 @@ import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { cssLikeGraph, cssLikeNodeTypes, cssLikeStructure } from "./cssLike.js"; export const LESS_DEF: LanguageDefinition = { id: "less", extensions: [".less"], grammar: () => loadTreeSitterLanguage("tree-sitter-css"), - structure: { - blocks: [ - { type: "rule_set", captureId: "rule" }, - { type: "media_statement", captureId: "media" }, - { type: "keyframes_statement", captureId: "keyframes" }, - ], - splitPoints: ["rule_set"], - comments: ["comment", "js_comment"], - }, - graph: { - imports: ` - (import_statement (string_value) @mod) @stmt - `, - exports: "", - locals: ` - (class_selector (class_name) @name) - (id_selector (id_name) @name) - `, - importBindings: ` - (import_statement (string_value) @from) @stmt - `, - }, - nodeTypes: { - identifier: ["class_name", "id_name"], - }, + structure: cssLikeStructure(), + graph: cssLikeGraph(), + nodeTypes: cssLikeNodeTypes(), }; registerLanguage(LESS_DEF); diff --git a/src/mcp/server.ts b/src/mcp/server.ts index a9402e8b..cbd12c5d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -19,7 +19,7 @@ import { explainCodegraphTargetWithSession } from "../agent/explain.js"; import type { AgentExplanation, AgentExplanationReference } from "../agent/explain.js"; import { searchCodegraphWithSession } from "../agent/search.js"; import type { AgentSearchMode, AgentSearchResponse } from "../agent/search.js"; -import { getDependencies, getReverseDependencies, getShortestPath } from "../graphs/queries.js"; +import { getDependencies, getReverseDependencies, getShortestPath, type DependencyNode } from "../graphs/queries.js"; import { findReferences, goToDefinition } from "../indexer/navigation.js"; import { buildReviewReport, type ReviewDepth, type ReviewReport } from "../review.js"; import { queryGraphSqliteRaw, type RawSqlResult } from "../sqlite.js"; @@ -139,6 +139,17 @@ export type CodegraphMcpHandlers = { }) => Promise; }; +type McpDependencyRequest = { + file: string; + depth?: number | undefined; + limit?: number | undefined; +}; + +type McpDependencyEntry = { + file: string; + depth: number; +}; + const MCP_HTTP_PATH = "/mcp"; const MAX_MCP_HTTP_BODY_BYTES = 1_000_000; @@ -154,6 +165,28 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): if (typeof limit !== "number" || !Number.isFinite(limit)) return fallback; return Math.min(max, Math.max(0, Math.floor(limit))); }; + const collectMcpDependencyEntries = async ( + request: McpDependencyRequest, + collectEntries: ( + graph: Awaited>["fileGraph"], + file: string, + options: { depth?: number; limit: number }, + ) => DependencyNode[], + ): Promise => { + const snapshot = await session.loadProject(); + const queryOptions = { + ...(request.depth !== undefined ? { depth: request.depth } : {}), + limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), + }; + return collectEntries( + snapshot.fileGraph, + await resolveProjectFile(await realRoot, root, request.file), + queryOptions, + ).map((dependency) => ({ + file: relative(dependency.file), + depth: dependency.depth, + })); + }; return { search: async (request) => @@ -224,36 +257,12 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): }, deps: async (request) => { - const snapshot = await session.loadProject(); - const queryOptions = { - ...(request.depth !== undefined ? { depth: request.depth } : {}), - limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), - }; - const dependencies = getDependencies( - snapshot.fileGraph, - await resolveProjectFile(await realRoot, root, request.file), - queryOptions, - ).map((dependency) => ({ - file: relative(dependency.file), - depth: dependency.depth, - })); + const dependencies = await collectMcpDependencyEntries(request, getDependencies); return { dependencies }; }, rdeps: async (request) => { - const snapshot = await session.loadProject(); - const queryOptions = { - ...(request.depth !== undefined ? { depth: request.depth } : {}), - limit: boundedLimit(request.limit, DEFAULT_MCP_COLLECTION_LIMIT, MAX_MCP_COLLECTION_LIMIT), - }; - const reverseDependencies = getReverseDependencies( - snapshot.fileGraph, - await resolveProjectFile(await realRoot, root, request.file), - queryOptions, - ).map((dependency) => ({ - file: relative(dependency.file), - depth: dependency.depth, - })); + const reverseDependencies = await collectMcpDependencyEntries(request, getReverseDependencies); return { reverseDependencies }; }, diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 3c1cbf04..45071890 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -14,6 +14,22 @@ function objectSchema(properties: Record, required: string[] = [ const stringProperty = { type: "string" }; const booleanProperty = { type: "boolean" }; +function dependencyInputSchema(): Tool["inputSchema"] { + return objectSchema( + { + file: stringProperty, + depth: { type: "integer", minimum: 0, default: 1 }, + limit: { + type: "integer", + minimum: 0, + maximum: MAX_MCP_COLLECTION_LIMIT, + default: DEFAULT_MCP_COLLECTION_LIMIT, + }, + }, + ["file"], + ); +} + export const MCP_TOOLS: Tool[] = [ { name: "search", @@ -84,36 +100,12 @@ export const MCP_TOOLS: Tool[] = [ { name: "deps", description: "List file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), + inputSchema: dependencyInputSchema(), }, { name: "rdeps", description: "List reverse file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), + inputSchema: dependencyInputSchema(), }, { name: "path", diff --git a/src/sqlite/canned-query.ts b/src/sqlite/canned-query.ts index a8a6f3d7..527a0466 100644 --- a/src/sqlite/canned-query.ts +++ b/src/sqlite/canned-query.ts @@ -4,33 +4,33 @@ import type { GraphQueryResult } from "./types.js"; import { dedupePreservingOrder, execRowsParams, toSqliteText } from "./common.js"; import { withReadOnlySqliteDatabase } from "./database.js"; -const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => - execRowsParams( - db, - ` +type DirectFileEdgeDirection = "dependencies" | "dependents"; + +const loadDirectFileEdges = (db: SqliteDatabase, filePath: string, direction: DirectFileEdgeDirection): string[] => { + let sql = ` SELECT to_path FROM file_edges WHERE to_type = ? AND from_path = ? ORDER BY rowid; - `, - ["file", fromPath], - ) - .map((row) => toSqliteText(row[0])) - .filter(Boolean); - -const loadDirectFileDependents = (db: SqliteDatabase, toPath: string): string[] => - execRowsParams( - db, - ` + `; + if (direction === "dependents") { + sql = ` SELECT from_path FROM file_edges WHERE to_type = ? AND to_path = ? ORDER BY rowid; - `, - ["file", toPath], - ) + `; + } + return execRowsParams(db, sql, ["file", filePath]) .map((row) => toSqliteText(row[0])) .filter(Boolean); +}; + +const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => + loadDirectFileEdges(db, fromPath, "dependencies"); + +const loadDirectFileDependents = (db: SqliteDatabase, toPath: string): string[] => + loadDirectFileEdges(db, toPath, "dependents"); const bfsFileTraversal = (start: string, loadNeighbors: (file: string) => string[]): string[] => { const visited = new Set(); diff --git a/src/util/packageExports.ts b/src/util/packageExports.ts new file mode 100644 index 00000000..2343e87e --- /dev/null +++ b/src/util/packageExports.ts @@ -0,0 +1,9 @@ +export function pickPackageExportTarget(target: unknown): string | null { + if (!target) return null; + if (typeof target === "string") return target; + if (typeof target !== "object") return null; + const exportTarget = target as Record; + const candidate = exportTarget.import ?? exportTarget.default ?? exportTarget.require ?? exportTarget.module; + if (typeof candidate === "string") return candidate; + return null; +} diff --git a/src/util/projectFiles.ts b/src/util/projectFiles.ts index 93af25c3..cc645cbb 100644 --- a/src/util/projectFiles.ts +++ b/src/util/projectFiles.ts @@ -275,7 +275,7 @@ async function loadGitignoreRulesForRootAliases(projectRoot: string): Promise boolean, diff --git a/src/util/resolution/node.ts b/src/util/resolution/node.ts index feae0191..fd8ab70a 100644 --- a/src/util/resolution/node.ts +++ b/src/util/resolution/node.ts @@ -1,5 +1,6 @@ import path from "node:path"; import { findFirstExistingResolutionCandidate } from "./findFirstExisting.js"; +import { pickPackageExportTarget } from "../packageExports.js"; import { directoryExists, loadJSON, type MinimalPackageJson } from "../workspace.js"; export async function resolveFromNodeModules( @@ -22,16 +23,6 @@ export async function resolveFromNodeModules( const tryResolveRelative = async (rel: string): Promise => { return await findFirstExistingResolutionCandidate(path.resolve(baseDir, rel), resolutionExtensions); }; - const pickExportTarget = (target: unknown): string | null => { - if (!target) return null; - if (typeof target === "string") return target; - if (typeof target === "object" && target !== null) { - const t = target as Record; - const cand = t.import ?? t.default ?? t.require ?? t.module; - if (typeof cand === "string") return cand; - } - return null; - }; if (pkg?.exports) { const key = subpath ? `./${subpath}` : "."; if (typeof pkg.exports === "string" && key === ".") { @@ -40,7 +31,7 @@ export async function resolveFromNodeModules( } else if (typeof pkg.exports === "object" && pkg.exports !== null) { const map = pkg.exports as Record; const target = map[key] ?? (key === "." ? map["."] : undefined); - const rel = pickExportTarget(target); + const rel = pickPackageExportTarget(target); if (rel) { const hit = await tryResolveRelative(rel); if (hit) return hit; diff --git a/src/util/workspace.ts b/src/util/workspace.ts index ee70220c..ad7c515e 100644 --- a/src/util/workspace.ts +++ b/src/util/workspace.ts @@ -4,6 +4,7 @@ const directoryExistsCache = new Map(); import fsp from "node:fs/promises"; import path from "node:path"; import fg from "fast-glob"; +import { pickPackageExportTarget } from "./packageExports.js"; import { listResolutionCandidates } from "./resolutionCandidates.js"; async function pathMatchesCachedStat( @@ -295,17 +296,6 @@ export function listWorkspacePackageResolutionCandidates( const pushRelativeCandidates = (rel: string): void => { candidates.push(...listResolutionCandidates(path.resolve(baseDir, rel), resolutionExtensions)); }; - const pickExportTarget = (target: unknown): string | null => { - if (!target) return null; - if (typeof target === "string") return target; - if (typeof target === "object" && target !== null) { - const typedTarget = target as Record; - const candidate = typedTarget.import ?? typedTarget.default ?? typedTarget.require ?? typedTarget.module; - if (typeof candidate === "string") return candidate; - } - return null; - }; - if (pkg.exports) { const key = subpath ? `./${subpath}` : "."; if (typeof pkg.exports === "string" && key === ".") { @@ -313,7 +303,7 @@ export function listWorkspacePackageResolutionCandidates( } else if (typeof pkg.exports === "object") { const exportMap = pkg.exports as Record; const target = exportMap[key] ?? (key === "." ? exportMap["."] : undefined); - const rel = pickExportTarget(target); + const rel = pickPackageExportTarget(target); if (rel) { pushRelativeCandidates(rel); } diff --git a/tests/cli-regressions.test.ts b/tests/cli-regressions.test.ts index e1284edc..a1c57fa8 100644 --- a/tests/cli-regressions.test.ts +++ b/tests/cli-regressions.test.ts @@ -814,6 +814,20 @@ describe("CLI regressions", () => { "utf8", ); await fsp.writeFile(path.join(srcDir, "b.ts"), "export const b = 1;\n", "utf8"); + const duplicateSource = ` +export function summarizeInvoices(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); +} +`; + await fsp.writeFile(path.join(srcDir, "c.ts"), duplicateSource, "utf8"); + await fsp.writeFile(path.join(srcDir, "d.ts"), duplicateSource, "utf8"); const stdout = await runCliCommand(["inspect", "--root", tmpDir, srcDir, "--limit", "1"]); const report = JSON.parse(stdout) as { @@ -832,6 +846,18 @@ describe("CLI regressions", () => { hotspots: Array<{ file: string; fanIn: number; fanOut: number; score: number }>; unresolved: { total: number; top: Array<{ name: string; importerCount: number }> }; cycles: { total: number; top: Array<{ files: string[]; priorityScore: number; size: number }> }; + duplicates: { + total: number; + omitted: number; + minConfidence: string; + top: Array<{ + confidence: string; + cloneType: string; + left: { file: string; startLine: number; endLine: number; tokenCount: number; name?: string }; + right: { file: string; startLine: number; endLine: number; tokenCount: number; name?: string }; + rawPairCount: number; + }>; + }; recommendedCommands: string[]; }; @@ -839,20 +865,33 @@ describe("CLI regressions", () => { expect(report.includeRoots).toEqual([normalize(srcDir)]); expect(typeof report.backend.native.available).toBe("boolean"); expect(Array.isArray(report.backend.native.supportedLanguageIds)).toBe(true); - expect(report.files.total).toBe(2); - expect(report.files.byLanguage.ts).toBe(2); + expect(report.files.total).toBe(4); + expect(report.files.byLanguage.ts).toBe(4); expect(report.hotspots.length).toBe(1); expect(report.hotspots[0].file).toBe(normalize(path.join(srcDir, "b.ts"))); expect(report.unresolved.total).toBe(0); expect(report.unresolved.top).toEqual([]); expect(report.cycles.total).toBe(0); expect(report.cycles.top).toEqual([]); + expect(report.duplicates.total).toBeGreaterThan(0); + expect(report.duplicates.omitted).toBeGreaterThanOrEqual(0); + expect(report.duplicates.minConfidence).toBe("high"); + expect(report.duplicates.top).toHaveLength(1); + expect(report.duplicates.top[0]?.confidence).toBe("high"); + expect(report.duplicates.top[0]?.cloneType).toBe("exact"); + expect(report.duplicates.top[0]?.left.file).toBe("src/c.ts"); + expect(report.duplicates.top[0]?.right.file).toBe("src/d.ts"); + expect(report.duplicates.top[0]?.left.tokenCount).toBeGreaterThan(40); + expect(report.duplicates.top[0]?.rawPairCount).toBeGreaterThan(0); expect(report.recommendedCommands).toContain( `codegraph hotspots --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --limit 20 --json`, ); expect(report.recommendedCommands).toContain( `codegraph graph --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --json --symbols-detailed --compact-json`, ); + expect(report.recommendedCommands).toContain( + `codegraph duplicates --root "${normalize(tmpDir)}" "${normalize(srcDir)}" --min-confidence medium --limit 20 --include-same-file`, + ); expect(report.recommendedCommands).toContain( `codegraph doctor "${normalize(path.join(tmpDir, ".codegraph-cache", "index-v1"))}"`, ); diff --git a/tests/disk-cache-sqlite.test.ts b/tests/disk-cache-sqlite.test.ts index dccc192b..ac96deb3 100644 --- a/tests/disk-cache-sqlite.test.ts +++ b/tests/disk-cache-sqlite.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex, type BuildReport } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("disk cache uses sqlite backend", () => { it("persists module cache in sqlite and reuses entries", async () => { diff --git a/tests/duplicates.test.ts b/tests/duplicates.test.ts index e09354f2..4bea9651 100644 --- a/tests/duplicates.test.ts +++ b/tests/duplicates.test.ts @@ -114,6 +114,9 @@ export function normalizeInvoiceRows(rows: Array<{ amount: number; tax: number } expect(result.groups[0]?.primaryLeft.kind).toBe("symbol"); expect(result.groups[0]?.primaryRight.kind).toBe("symbol"); expect(result.suggestions?.length).toBeGreaterThan(result.groups.length); + // With includeRawPairs the variant list is unbounded, so coalescing must not + // report omitted variants from deduping merged groups. + expect(result.groups[0]?.omittedVariantCount).toBe(0); }); test("omits raw unit pairs unless requested", async () => { @@ -491,6 +494,53 @@ export class InvoiceNormalizer { } }); + test("coalesces repeated groups with the same primary ranges", async () => { + const root = await makeTempProject(); + const source = ` +export class InvoiceNormalizer { + normalizeDomestic(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } + + normalizeInternational(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } +} +`; + + await writeProjectFile(root, "src/a.ts", source); + await writeProjectFile(root, "src/b.ts", source); + + const index = await buildProjectIndex(root); + const result = await findDuplicates(index, { + includeSameFile: true, + minConfidence: "high", + limit: 20, + }); + const primaryPairKeys = result.groups.map((group) => { + const left = `${group.primaryLeft.file}:${group.primaryLeft.startLine}-${group.primaryLeft.endLine}`; + const right = `${group.primaryRight.file}:${group.primaryRight.startLine}-${group.primaryRight.endLine}`; + return [left, right].sort().join("="); + }); + + expect(result.groups.length).toBeGreaterThan(0); + expect(new Set(primaryPairKeys).size).toBe(primaryPairKeys.length); + }); + test("duplicates CLI emits bounded JSON groups", async () => { const root = await makeTempProject(); const source = ` diff --git a/tests/dynamic-resolution.test.ts b/tests/dynamic-resolution.test.ts index 192d3cd4..d6e835e4 100644 --- a/tests/dynamic-resolution.test.ts +++ b/tests/dynamic-resolution.test.ts @@ -1,16 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function normalizeFile(p: string): string { - return p.replace(/\\/g, "/"); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("Dynamic resolution heuristics", () => { it("resolves path.join(__dirname, ...) when enabled", async () => { @@ -25,7 +17,7 @@ describe("Dynamic resolution heuristics", () => { await fsp.writeFile(mainPath, source, "utf8"); await fsp.writeFile(utilPath, "module.exports = {};", "utf8"); - const graph = await collectGraph(root, [normalizeFile(mainPath), normalizeFile(utilPath)], { + const graph = await collectGraph(root, [normalizeTestPath(mainPath), normalizeTestPath(utilPath)], { dynamicImportHeuristics: true, }); @@ -48,7 +40,7 @@ describe("Dynamic resolution heuristics", () => { await fsp.writeFile(mainPath, `import Button from "components/button";\nconsole.log(Button);\n`, "utf8"); await fsp.writeFile(buttonPath, "export default function Button() {}", "utf8"); - const graph = await collectGraph(root, [normalizeFile(mainPath), normalizeFile(buttonPath)], { + const graph = await collectGraph(root, [normalizeTestPath(mainPath), normalizeTestPath(buttonPath)], { resolutionHints: ["src"], }); diff --git a/tests/fast-graph-edgecases.test.ts b/tests/fast-graph-edgecases.test.ts index 968654fb..d2ac3588 100644 --- a/tests/fast-graph-edgecases.test.ts +++ b/tests/fast-graph-edgecases.test.ts @@ -1,16 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function edgeFrom(file: string) { - return (e: { from: string }) => e.from.replace(/\\/g, "/") === file.replace(/\\/g, "/"); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; +import { edgeFrom } from "./helpers/graph.js"; describe("Fast graph edge cases", () => { it("detects type-only import with typeOnly=true (TS)", async () => { @@ -21,7 +14,7 @@ describe("Fast graph edge cases", () => { const mainPath = path.join(root, "main.ts"); await fsp.writeFile(utilPath, util, "utf8"); await fsp.writeFile(mainPath, main, "utf8"); - const files = [mainPath.replace(/\\/g, "/"), utilPath.replace(/\\/g, "/")]; + const files = [normalizeTestPath(mainPath), normalizeTestPath(utilPath)]; const gNormal = await collectGraph(root, files); const gFast = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); diff --git a/tests/fast-graph.test.ts b/tests/fast-graph.test.ts index 2fdc5baf..eec56846 100644 --- a/tests/fast-graph.test.ts +++ b/tests/fast-graph.test.ts @@ -1,16 +1,12 @@ import { describe, it, expect, vi, afterEach } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph, type Edge } from "../src/index.js"; import * as nativeRuntime from "../src/native/treeSitterNative.js"; +import { mkTmpDir } from "./helpers/filesystem.js"; +import { graphEdgeKey } from "./helpers/graph.js"; import { getSamplePath } from "./test-utils.js"; -function normEdge(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return { from: e.from.replace(/\\/g, "/"), to: toStr(e.to).replace(/\\/g, "/"), raw: e.raw, typeOnly: !!e.typeOnly }; -} - describe("Fast graph specifier extraction (--fast-graph)", () => { afterEach(() => { vi.restoreAllMocks(); @@ -26,12 +22,8 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); @@ -47,12 +39,8 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); it("matches regular graph edges for Python samples", async () => { @@ -66,17 +54,13 @@ describe("Fast graph specifier extraction (--fast-graph)", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); }); it("can miss multiline import edges that full parsing captures", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "dg-fast-graph-")); + const root = await mkTmpDir("dg-fast-graph-"); const entryPath = path.join(root, "entry.ts").replace(/\\/g, "/"); const depPath = path.join(root, "dep.ts").replace(/\\/g, "/"); await fsp.writeFile(entryPath, `import {\n value\n} from './dep';\n\nconsole.log(value);\n`, "utf8"); diff --git a/tests/helpers/filesystem.ts b/tests/helpers/filesystem.ts index 3a4a7fa8..70c341b3 100644 --- a/tests/helpers/filesystem.ts +++ b/tests/helpers/filesystem.ts @@ -1,3 +1,11 @@ +import path from "node:path"; +import os from "node:os"; +import fsp from "node:fs/promises"; + +export async function mkTmpDir(prefix: string): Promise { + return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); +} + export function isSymlinkUnavailable(error: unknown): boolean { return ( error instanceof Error && @@ -5,3 +13,7 @@ export function isSymlinkUnavailable(error: unknown): boolean { (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") ); } + +export function normalizeTestPath(filePath: string): string { + return filePath.replace(/\\/g, "/"); +} diff --git a/tests/helpers/graph.ts b/tests/helpers/graph.ts new file mode 100644 index 00000000..21332a1f --- /dev/null +++ b/tests/helpers/graph.ts @@ -0,0 +1,15 @@ +import type { Edge } from "../../src/index.js"; +import { normalizeTestPath } from "./filesystem.js"; + +export function graphEdgeTargetKey(to: Edge["to"]): string { + if (to.type === "file") return normalizeTestPath(to.path); + return to.name; +} + +export function graphEdgeKey(edge: Edge): string { + return `${normalizeTestPath(edge.from)}|${graphEdgeTargetKey(edge.to)}|${edge.raw}`; +} + +export function edgeFrom(filePath: string): (edge: Pick) => boolean { + return (edge) => normalizeTestPath(edge.from) === normalizeTestPath(filePath); +} diff --git a/tests/monorepo-fast-graph.test.ts b/tests/monorepo-fast-graph.test.ts index 49e14647..ede6a5b4 100644 --- a/tests/monorepo-fast-graph.test.ts +++ b/tests/monorepo-fast-graph.test.ts @@ -1,11 +1,7 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; import { collectGraph } from "../src/index.js"; - -function normEdge(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return { from: e.from.replace(/\\/g, "/"), to: toStr(e.to).replace(/\\/g, "/"), raw: e.raw, typeOnly: !!e.typeOnly }; -} +import { graphEdgeKey } from "./helpers/graph.js"; describe("Monorepo fast graph parity", () => { it("fast mode matches normal mode in monorepo sample", async () => { @@ -17,12 +13,8 @@ describe("Monorepo fast graph parity", () => { const g1 = await collectGraph(root, files); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { fast: true }); - const toKey = (to: unknown) => { - const t = to as { type: "file"; path: string } | { type: "external"; name: string }; - return t.type === "file" ? t.path : t.name; - }; - const aSet = new Set(g1.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); - const bSet = new Set(g2.edges.map((e) => `${e.from}|${toKey(e.to)}|${e.raw}`)); + const aSet = new Set(g1.edges.map(graphEdgeKey)); + const bSet = new Set(g2.edges.map(graphEdgeKey)); expect(bSet).toEqual(aSet); // Ensure workspace package edge is resolved to a file diff --git a/tests/node-modules-and-paths.test.ts b/tests/node-modules-and-paths.test.ts index e24ccda9..7c2c8bcd 100644 --- a/tests/node-modules-and-paths.test.ts +++ b/tests/node-modules-and-paths.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("Node modules resolution (opt-in) and path normalization", () => { it("treats packages as external by default; resolves to file with flag", async () => { @@ -17,7 +13,7 @@ describe("Node modules resolution (opt-in) and path normalization", () => { await fsp.writeFile(path.join(nm, "package.json"), '{"name":"my-pkg","main":"index.js"}', "utf8"); const main = path.join(root, "main.js"); await fsp.writeFile(main, 'import "my-pkg";\n', "utf8"); - const files = [main].map((f) => f.replace(/\\/g, "/")); + const files = [main].map(normalizeTestPath); const g1 = await collectGraph(root, files); expect(g1.edges.some((e) => e.raw === "my-pkg" && e.to.type === "external")).toBe(true); const g2 = await (await import("../src/graphs.js")).collectGraph(root, files, { resolveNodeModules: true }); diff --git a/tests/parsed-cache-eviction.test.ts b/tests/parsed-cache-eviction.test.ts index 2f002d18..5de8fd98 100644 --- a/tests/parsed-cache-eviction.test.ts +++ b/tests/parsed-cache-eviction.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("parsed AST cache eviction", () => { it("limits parsed entries to configured max", async () => { diff --git a/tests/parsed-cache-reuse.test.ts b/tests/parsed-cache-reuse.test.ts index 8c710305..95966170 100644 --- a/tests/parsed-cache-reuse.test.ts +++ b/tests/parsed-cache-reuse.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex, type BuildReport } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Parsed cache reuse", () => { it("reuses cached parse outputs on the second build", async () => { diff --git a/tests/resolution-precedence.test.ts b/tests/resolution-precedence.test.ts index 401c7d12..3377501d 100644 --- a/tests/resolution-precedence.test.ts +++ b/tests/resolution-precedence.test.ts @@ -1,13 +1,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Resolution precedence", () => { it("prefers tsconfig paths over workspace package names", async () => { diff --git a/tests/robust-fast-graph.test.ts b/tests/robust-fast-graph.test.ts index bf7c9c7f..42474aa7 100644 --- a/tests/robust-fast-graph.test.ts +++ b/tests/robust-fast-graph.test.ts @@ -1,24 +1,16 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -function toKey(e: any) { - const toStr = (t: any) => (t.type === "file" ? t.path : t.name); - return `${e.from}|${toStr(e.to)}|${e.raw}`; -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; +import { graphEdgeKey } from "./helpers/graph.js"; describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { it("ignores require inside string literal", async () => { const root = await mkTmpDir("dg-fast-strings-"); const file = path.join(root, "a.ts"); await fsp.writeFile(file, `const s = 'require("x")';\n`, "utf8"); - const g = await (await import("../src/graphs.js")).collectGraph(root, [file.replace(/\\/g, "/")], { fast: true }); + const g = await (await import("../src/graphs.js")).collectGraph(root, [normalizeTestPath(file)], { fast: true }); expect(g.edges.length).toBe(0); }); @@ -26,7 +18,7 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const root = await mkTmpDir("dg-fast-templates-"); const file = path.join(root, "a.ts"); await fsp.writeFile(file, "const s = `import('x')`\n", "utf8"); - const g = await (await import("../src/graphs.js")).collectGraph(root, [file.replace(/\\/g, "/")], { fast: true }); + const g = await (await import("../src/graphs.js")).collectGraph(root, [normalizeTestPath(file)], { fast: true }); expect(g.edges.length).toBe(0); }); @@ -36,9 +28,9 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const main = path.join(root, "main.js"); await fsp.writeFile(a, "export const x = 1;\n", "utf8"); await fsp.writeFile(main, "async function run(){ await import('./a.js'); }\n", "utf8"); - const files = [main, a].map((f) => f.replace(/\\/g, "/")); + const files = [main, a].map(normalizeTestPath); const g = await collectGraph(root, files); - const keys = new Set(g.edges.map(toKey)); + const keys = new Set(g.edges.map(graphEdgeKey)); expect([...keys].some((k) => k.includes("main.js") && k.includes("./a.js"))).toBe(true); }); @@ -48,9 +40,9 @@ describe("Robust fast graph (strings/templates, dynamic import, CJS)", () => { const main = path.join(root, "main.js"); await fsp.writeFile(a, "exports.helper = () => 1;\n", "utf8"); await fsp.writeFile(main, "const { helper: h } = require('./a');\n", "utf8"); - const files = [main, a].map((f) => f.replace(/\\/g, "/")); + const files = [main, a].map(normalizeTestPath); const g = await collectGraph(root, files); - const keys = new Set(g.edges.map(toKey)); + const keys = new Set(g.edges.map(graphEdgeKey)); expect([...keys].some((k) => k.includes("main.js") && k.includes("./a"))).toBe(true); }); }); diff --git a/tests/sql-artifact-graph.test.ts b/tests/sql-artifact-graph.test.ts index 3106a6ca..6fd8bcc7 100644 --- a/tests/sql-artifact-graph.test.ts +++ b/tests/sql-artifact-graph.test.ts @@ -1,5 +1,4 @@ import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { collectGraph } from "../src/index.js"; @@ -8,6 +7,7 @@ import { buildSqlFactCache, buildSqlModuleIndex, collectSqlEdgesForFile } from " import { SymbolKind } from "../src/indexer/types.js"; import { computeFileSymbolHashes } from "../src/util/symbolHash.js"; import type { GraphCacheEntry } from "../src/graphs/types.js"; +import { mkTmpDir } from "./helpers/filesystem.js"; const fixtureRoot = path.resolve(process.cwd(), "tests", "samples", "sql", "graph"); const sqlFiles = ["001_create_users.sql", "002_alter_users.sql", "report.sql"].map((file) => @@ -49,7 +49,7 @@ describe("SQL artifact graph", () => { }); it("adds SQL-to-SQL edges for read dependencies inside write statements", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-dml-graph-")); + const root = await mkTmpDir("cg-sql-dml-graph-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const pipelineFile = path.join(root, "pipeline.sql").replace(/\\/g, "/"); @@ -101,7 +101,7 @@ describe("SQL artifact graph", () => { }); it("links schema-qualified SQL references to unqualified object definitions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-qualified-edge-")); + const root = await mkTmpDir("cg-sql-qualified-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const publicSchemaFile = path.join(root, "public_schema.sql").replace(/\\/g, "/"); @@ -141,7 +141,7 @@ describe("SQL artifact graph", () => { }); it("emits heuristic SQL edges for basename fallback matches", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-fallback-edge-")); + const root = await mkTmpDir("cg-sql-fallback-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -165,7 +165,7 @@ describe("SQL artifact graph", () => { }); it("links unqualified SQL references to a single schema-qualified object definition", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-unqualified-qualified-edge-")); + const root = await mkTmpDir("cg-sql-unqualified-qualified-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -189,7 +189,7 @@ describe("SQL artifact graph", () => { }); it("does not guess ambiguous SQL edges from unqualified references to schema-qualified definitions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-ambiguous-unqualified-edge-")); + const root = await mkTmpDir("cg-sql-ambiguous-unqualified-edge-"); try { const publicSchemaFile = path.join(root, "public_schema.sql").replace(/\\/g, "/"); const archiveSchemaFile = path.join(root, "archive_schema.sql").replace(/\\/g, "/"); @@ -211,7 +211,7 @@ describe("SQL artifact graph", () => { }); it("does not emit duplicate join edges for the primary FROM object", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-join-primary-edge-")); + const root = await mkTmpDir("cg-sql-join-primary-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -249,7 +249,7 @@ describe("SQL artifact graph", () => { }); it("does not emit SQL dependency self-edges inside a single SQL file", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-self-edge-")); + const root = await mkTmpDir("cg-sql-self-edge-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); await fsp.writeFile(schemaFile, "CREATE TABLE users (id integer);\nSELECT id FROM users;\n", "utf8"); @@ -267,7 +267,7 @@ describe("SQL artifact graph", () => { }); it("links SQL rename statements to the source definition without targeting the new name", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-rename-edge-")); + const root = await mkTmpDir("cg-sql-rename-edge-"); try { const oldSchemaFile = path.join(root, "old_schema.sql").replace(/\\/g, "/"); const unrelatedNewSchemaFile = path.join(root, "unrelated_new_schema.sql").replace(/\\/g, "/"); @@ -337,7 +337,7 @@ describe("SQL artifact graph", () => { }); it("includes related SQL objects in artifact graph candidate mentions", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-related-artifacts-")); + const root = await mkTmpDir("cg-sql-related-artifacts-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); await fsp.writeFile( @@ -376,7 +376,7 @@ describe("SQL artifact graph", () => { }); it("reuses SQL facts across per-file edge collection", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-cache-")); + const root = await mkTmpDir("cg-sql-cache-"); const files: string[] = []; try { for (let index = 0; index < 12; index += 1) { @@ -403,7 +403,7 @@ describe("SQL artifact graph", () => { }); it("bounds concurrent SQL fact reads when building the corpus cache", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-cache-concurrency-")); + const root = await mkTmpDir("cg-sql-cache-concurrency-"); const files: string[] = []; try { for (let index = 0; index < 40; index += 1) { @@ -440,7 +440,7 @@ describe("SQL artifact graph", () => { }); it("reuses SQL edge cache entries when the SQL corpus signature is unchanged", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-edge-cache-reuse-")); + const root = await mkTmpDir("cg-sql-edge-cache-reuse-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -502,7 +502,7 @@ describe("SQL artifact graph", () => { }); it("rejects stale SQL edge cache entries when another SQL file changes", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-edge-cache-invalidate-")); + const root = await mkTmpDir("cg-sql-edge-cache-invalidate-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -546,7 +546,7 @@ describe("SQL artifact graph", () => { }); it("recomputes unchanged SQL query edges when changed SQL definitions add targets", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-incremental-add-")); + const root = await mkTmpDir("cg-sql-incremental-add-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); @@ -581,7 +581,7 @@ describe("SQL artifact graph", () => { }); it("removes unchanged SQL query edges when changed SQL definitions drop targets", async () => { - const root = await fsp.mkdtemp(path.join(os.tmpdir(), "cg-sql-incremental-remove-")); + const root = await mkTmpDir("cg-sql-incremental-remove-"); try { const schemaFile = path.join(root, "schema.sql").replace(/\\/g, "/"); const reportFile = path.join(root, "report.sql").replace(/\\/g, "/"); diff --git a/tests/sql-review-context.test.ts b/tests/sql-review-context.test.ts index 67843a43..6b6948e2 100644 --- a/tests/sql-review-context.test.ts +++ b/tests/sql-review-context.test.ts @@ -1,13 +1,9 @@ import fs from "node:fs/promises"; -import os from "node:os"; import path from "node:path"; import { describe, expect, it, vi } from "vitest"; import { buildReviewReport } from "../src/index.js"; import { collectSqlReviewContext } from "../src/sql/index.js"; - -async function mkTmpDir(): Promise { - return await fs.mkdtemp(path.join(os.tmpdir(), "cg-sql-review-")); -} +import { mkTmpDir } from "./helpers/filesystem.js"; async function rmTmpDir(root: string): Promise { await fs.rm(root, { recursive: true, force: true }); @@ -21,7 +17,7 @@ async function writeFile(filePath: string, contents: string): Promise { describe("SQL review context", () => { it("surfaces SQL candidates for changed code SQL literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const repo = await writeFile( @@ -44,7 +40,7 @@ describe("SQL review context", () => { }); it("includes changed SQL files without creating source dependency impact", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const migration = await writeFile( path.join(root, "db", "migrations", "20190101000000_legacy.sql"), @@ -68,7 +64,7 @@ describe("SQL review context", () => { }); it("keeps seed SQL visible only when the seed file changes", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const seed = await writeFile(path.join(root, "db", "seeds", "users.sql"), "INSERT INTO users (id) VALUES (1);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), "export const ok = true;\n"); @@ -90,7 +86,7 @@ describe("SQL review context", () => { }); it("does not surface stale SQL for unrelated code-only reviews", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile( path.join(root, "db", "migrations", "20190101000000_legacy.sql"), @@ -108,7 +104,7 @@ describe("SQL review context", () => { }); it("skips the SQL corpus when changed code has no SQL-like text", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), "export const count = 1;\n"); @@ -126,7 +122,7 @@ describe("SQL review context", () => { }); it("does not treat ES module imports as SQL literal hints", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'import users from "./users";\n'); @@ -144,7 +140,7 @@ describe("SQL review context", () => { }); it("does not treat a bare with token as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'export const label = "created with care";\n'); @@ -162,7 +158,7 @@ describe("SQL review context", () => { }); it("does not treat a bare update phrase as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "app.ts"), 'export const label = "update x";\n'); @@ -180,7 +176,7 @@ describe("SQL review context", () => { }); it("does not treat a bare select identifier as a SQL literal hint", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile( @@ -201,7 +197,7 @@ describe("SQL review context", () => { }); it("uses provided project files when matching code SQL literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile(path.join(root, "src", "userRepo.ts"), "export const query = `SELECT * FROM users`;\n"); @@ -216,7 +212,7 @@ describe("SQL review context", () => { }); it("keeps full SQL discovery for sparse reviews that include a changed SQL file", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const migration = await writeFile(path.join(root, "db", "migrations", "20260513000000_orders.sql"), [ @@ -247,7 +243,7 @@ describe("SQL review context", () => { }); it("bounds concurrent SQL fact reads when matching changed code literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { for (let index = 0; index < 40; index += 1) { await writeFile(path.join(root, "db", `schema_${index}.sql`), `CREATE TABLE table_${index} (id integer);\n`); @@ -287,7 +283,7 @@ describe("SQL review context", () => { }); it("surfaces SQL candidates for changed code update literals", async () => { - const root = await mkTmpDir(); + const root = await mkTmpDir("cg-sql-review-"); try { const schema = await writeFile(path.join(root, "db", "schema.sql"), "CREATE TABLE users (id integer);\n"); const code = await writeFile( diff --git a/tests/sqlite.test.ts b/tests/sqlite.test.ts index 55d6a28a..bcaf7a1e 100644 --- a/tests/sqlite.test.ts +++ b/tests/sqlite.test.ts @@ -1,6 +1,5 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { DatabaseSync } from "node:sqlite"; import { @@ -11,11 +10,7 @@ import { queryGraphSqlite, queryGraphSqliteRaw, } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - const dir = await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); - return dir; -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; const dbQuery = (db: DatabaseSync, sql: string): string[] => { const stmt = db.prepare(sql); @@ -326,20 +321,20 @@ export const run = () => helper(); fileGraph: nextIndex.graph, symbolGraph: nextSgraph, outputPath: dbPath, - changedFiles: [path.join(root, "main.ts").replace(/\\/g, "/")], - deletedFiles: [path.join(root, "util.ts").replace(/\\/g, "/")], + changedFiles: [normalizeTestPath(path.join(root, "main.ts"))], + deletedFiles: [normalizeTestPath(path.join(root, "util.ts"))], fullGraphSync: true, }); const db = new DatabaseSync(dbPath); const utilFiles = dbQuery( db, - `SELECT path FROM files WHERE path = '${path.join(root, "util.ts").replace(/\\/g, "/")}';`, + `SELECT path FROM files WHERE path = '${normalizeTestPath(path.join(root, "util.ts"))}';`, ); const utilSymbols = dbQuery(db, "SELECT name FROM symbols WHERE name = 'helper';"); const staleEdges = dbQuery( db, - `SELECT to_path FROM file_edges WHERE to_path = '${path.join(root, "util.ts").replace(/\\/g, "/")}';`, + `SELECT to_path FROM file_edges WHERE to_path = '${normalizeTestPath(path.join(root, "util.ts"))}';`, ); expect(utilFiles).toEqual([]); @@ -481,8 +476,8 @@ export const run = () => helper(); fileGraph: nextIndex.graph, symbolGraph: nextSgraph, outputPath: dbPath, - changedFiles: [path.join(root, "main.ts").replace(/\\/g, "/")], - deletedFiles: [path.join(root, "util.ts").replace(/\\/g, "/")], + changedFiles: [normalizeTestPath(path.join(root, "main.ts"))], + deletedFiles: [normalizeTestPath(path.join(root, "util.ts"))], fullGraphSync: true, }); diff --git a/tests/symbols-detailed-edgecases.test.ts b/tests/symbols-detailed-edgecases.test.ts index 4a4b5dd3..c7c3dd99 100644 --- a/tests/symbols-detailed-edgecases.test.ts +++ b/tests/symbols-detailed-edgecases.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { buildProjectIndex } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir } from "./helpers/filesystem.js"; describe("Symbols-detailed edge cases", () => { it("membersOnly keeps namespace member edges but drops direct alias uses", async () => { diff --git a/tests/ts-paths-workspace.test.ts b/tests/ts-paths-workspace.test.ts index 496cdb49..ae181baa 100644 --- a/tests/ts-paths-workspace.test.ts +++ b/tests/ts-paths-workspace.test.ts @@ -1,12 +1,8 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import os from "node:os"; import fsp from "node:fs/promises"; import { collectGraph } from "../src/index.js"; - -async function mkTmpDir(prefix: string): Promise { - return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); -} +import { mkTmpDir, normalizeTestPath } from "./helpers/filesystem.js"; describe("TypeScript paths/baseUrl resolution via tsconfig", () => { it("resolves @lib/util to lib/util.ts using tsconfig paths", async () => { @@ -28,7 +24,7 @@ describe("TypeScript paths/baseUrl resolution via tsconfig", () => { const main = path.join(root, "main.ts"); await fsp.writeFile(util, "export const fn = () => 1;\n", "utf8"); await fsp.writeFile(main, "import { fn } from '@lib/util';\nconst x = fn();\n", "utf8"); - const files = [util, main].map((f) => f.replace(/\\/g, "/")); + const files = [util, main].map(normalizeTestPath); const g = await collectGraph(root, files); expect( g.edges.some(