diff --git a/README.md b/README.md index ca86f695..4fecf0bd 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ The supported package import surface is the root export, `@lzehrung/codegraph`. ## Common workflows - Repo triage: run `codegraph inspect ./src --limit 20`, then follow with `codegraph hotspots ./src --limit 20` or `codegraph unresolved` to focus the next pass. -- Duplicate cleanup: run `codegraph duplicates ./src --min-confidence medium` before refactors to find shared extraction candidates. +- Duplicate cleanup: run `codegraph duplicates ./src --min-confidence medium` before refactors to find grouped extraction candidates. - Symbol navigation: use `codegraph goto ` and `codegraph refs --file --line --col --pretty` when a question is about definitions or semantic usages rather than matching strings. - PR review: run `codegraph impact --base origin/main --head HEAD --pretty` for a ranked map, `codegraph review --base origin/main --head HEAD --summary` for a compact reviewer handoff with actionable candidate tests, or redirect plain `review` output when a downstream tool needs the full JSON bundle. - Worktree review: run `codegraph impact --base HEAD --head WORKTREE --pretty` for current staged and unstaged tracked-file changes, then `codegraph review --base HEAD --head WORKTREE --summary` for a compact handoff. Use `--head STAGED` to compare `HEAD` against the current index. @@ -287,7 +287,7 @@ npm run test:ci `npm run test:ci` writes a Vitest JSON timing report and prints a slow-test summary. Tests over 2 seconds are review-required, and tests over 10 seconds should be treated as integration-tier candidates unless they have a documented reason. -Use `npm run test:fast` for the shorter non-integration suite. Use `npm run test:integration` for CLI and native-runtime integration coverage. +Use `npm test` or `npm run test:fast` for the shorter non-integration suite. Use `npm run test:integration` for CLI and native-runtime integration coverage. If you are touching the native workspace directly, also run `npm run build:native` and `npm run test:native`. Benchmark harness coverage lives behind `npm run test:bench`. diff --git a/codegraph-skill/codegraph/SKILL.md b/codegraph-skill/codegraph/SKILL.md index 50afa023..9550586a 100644 --- a/codegraph-skill/codegraph/SKILL.md +++ b/codegraph-skill/codegraph/SKILL.md @@ -214,9 +214,13 @@ For git-provider impact and git-scoped review/index/graph commands, `WORKTREE` c - Duplicate and near-duplicate code: `codegraph duplicates --root . ./src --min-confidence medium` Covers indexed symbols, semantic chunks, and text chunks. + Reports grouped findings by default so overlapping symbol/chunk variants collapse into one clone. + Uses duplicate JSON `schemaVersion: 2`. + Bounds per-group variants by default and reports hidden evidence with counts. A single positional directory becomes the project root unless `--root` is set. Use `--include-small` for tiny helpers. Use `--include-same-file` for local clone cleanup. + Use `--raw-pairs` to include low-level scored unit-pair suggestions. - Unresolved project imports: `codegraph unresolved` Excludes graph-only document/template link edges plus known runtime/package externals: supported-language standard libraries, URL imports, and dependencies declared in nearby manifests such as `package.json`, Python, PHP, Rust, Go, Zig, Ruby, Java/Kotlin, .NET, C/C++, and Swift package manifests. diff --git a/docs/cli.md b/docs/cli.md index adcc9fa2..a48299c7 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -150,6 +150,7 @@ codegraph chunk config.yaml --language yaml --min-tokens 100 --max-tokens 300 # Detect duplicate and near-duplicate code units codegraph duplicates ./src --min-confidence medium --limit 20 codegraph duplicates --root . ./src ./packages/app --include-same-file +codegraph duplicates ./src --raw-pairs codegraph duplicates --help # Go to definition @@ -166,14 +167,18 @@ codegraph grep --query '(function_declaration name: (identifier) @name)' codegraph grep --pattern 'eval\(' --ignore-case ``` -`duplicates` always reports scored exact, renamed, near, and weak clone candidates as JSON. +`duplicates` always reports grouped exact, renamed, near, and weak clone candidates as JSON. - It combines indexed symbols, semantic chunks, and text chunks. -- It reports project-relative paths, confidence, clone type, metrics, omission counts, and pair stats. +- It emits `schemaVersion: 2` for grouped duplicate output. +- It reports project-relative paths, confidence, clone type, metrics, variant counts, omission counts, and pair stats. +- Groups collapse overlapping symbol/chunk variants so one underlying clone appears as one finding. +- Group `variants` are bounded by default; use `rawPairCount` and `omittedVariantCount` to see hidden evidence counts. - A single positional directory becomes the project root unless `--root` is set. - Use `--root . ./src` for scoped scans with repository-relative paths. - Use `--include-small` for tiny helpers. - Use `--include-same-file` for non-overlapping clones inside one file. +- Use `--raw-pairs` when debugging the low-level pair evidence behind each group. `search`, `explain`, `artifact`, and `mcp` each support command-specific `--help` output so agents do not have to infer their options from the top-level help. `search` is deterministic and vectorless. It returns ranked results with project-relative stable handles, rank reasons, evidence, graph neighbors, follow-up commands, result counts, per-packet limits, and omission counts. `explain` resolves file paths, symbol names, SQL object names, and search handles, including file/chunk/graph handles, into bounded packets with symbols, dependencies, reverse dependencies, references, snippets, SQL object relation facts, changed-context review tasks/candidate tests, explicit limits, omission counts, and follow-ups. Generated follow-up and suggested-question commands POSIX-shell-quote dynamic arguments when needed. SQL object names resolve by exact name first; unqualified basenames resolve only when unique, so handles or schema-qualified names are preferred. Reference and snippet omission counts are lower bounds after the bounded navigation scan reaches its cap. `artifact build` writes `codegraph.sqlite`, self-describing project-relative `graph.json`, `CODEGRAPH_REPORT.md`, `questions.json`, and `manifest.json` by default; suggested questions use unique IDs backed by stable handles when a handle is available. Use artifact flags to select a subset. `--force` permits non-empty output directories, removes recognizable stale Codegraph artifacts, preserves unrelated operator files, and refuses unrecognized reserved-name collisions. Artifact contents exclude their own output directory and linked outside-root files. `mcp serve` exposes `search`, `get_file`, `get_symbol`, `goto`, `refs`, `deps`, `rdeps`, `path`, `impact`, `review`, `query_sqlite`, and `artifact_build` over stdio by default or Streamable HTTP with `--port `. HTTP serves `/mcp`, binds to `127.0.0.1` unless `--host ` is passed, validates the Host header, allows loopback Host headers for wildcard binds, and rejects oversized request bodies. MCP file and artifact paths are confined to `--root` after realpath resolution; tools are read-only by default, `query_sqlite` is row- and byte-bounded and rejects synthetic payload functions, and `--allow-build` enables artifact output only. `chunk` uses semantic Tree-sitter chunking for registered source and stylesheet languages, Vue and Svelte block-aware chunking for single-file components, and text chunking for JSON, YAML, and unsupported extensions. Use `--text` to force text chunking. diff --git a/docs/library-api.md b/docs/library-api.md index c0732f43..a47ba608 100644 --- a/docs/library-api.md +++ b/docs/library-api.md @@ -218,7 +218,10 @@ The integration examples demonstrate semantic chunking with type-based filtering `findDuplicates()` scans a built `ProjectIndex` for exact, renamed, near, and weak clone candidates. - It uses indexed symbols, semantic chunks, and text chunks. -- Results include confidence, score, clone type, metrics, omission counts, and pair stats. +- Grouped duplicate output uses `schemaVersion: 2`. +- Results include grouped findings, confidence, score, clone type, metrics, omission counts, and pair stats. +- Group `variants` are bounded by default and expose hidden evidence through `rawPairCount` and `omittedVariantCount`. +- Raw unit-pair suggestions and full group variants are available when `includeRawPairs` is enabled. - Paths are project-relative when the index has a project root. ```ts @@ -231,7 +234,7 @@ const duplicates = await findDuplicates(index, { limit: 20, }); -console.log(duplicates.suggestions); +console.log(duplicates.groups); ``` Useful options: @@ -239,6 +242,7 @@ Useful options: - `minConfidence`: `high`, `medium`, or `low`; default `medium`. - `includeSameFile`: report non-overlapping clones in the same file. - `includeSmall`: include units below the default token floor. +- `includeRawPairs`: include low-level symbol/chunk pair evidence as `suggestions`. - `minTokens` and `maxTokens`: tune unit and fallback chunk bounds. Tests: diff --git a/package.json b/package.json index 00bcc190..23141089 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "lint": "npx eslint \"src/**/*.ts\" \"tests/**/*.test.ts\"", "lint:fix": "npx eslint \"src/**/*.ts\" \"tests/**/*.test.ts\" --fix", "prepare": "node ./scripts/prepare-package.mjs", - "test": "node ./scripts/ensure-dist-for-tests.mjs && vitest", + "test": "npm run test:fast", "test:all": "npm run test:native && npm run test:ci", "test:fast": "node ./scripts/ensure-dist-for-tests.mjs && vitest run --exclude tests/bench-harness.test.ts --exclude tests/cli-regressions.test.ts --exclude tests/impact-cli.test.ts --exclude tests/esm-language-loading.test.ts --exclude tests/native-fallback-reporting.test.ts --exclude tests/native-tree-sitter.test.ts --exclude tests/native-semantic-parity.test.ts --exclude tests/native-worker-parity.test.ts --exclude tests/native-parser-ownership.test.ts --exclude tests/detailed-symbol-native-only.test.ts", "test:integration": "node ./scripts/ensure-dist-for-tests.mjs && vitest run tests/cli-regressions.test.ts tests/impact-cli.test.ts tests/esm-language-loading.test.ts tests/native-fallback-reporting.test.ts tests/native-tree-sitter.test.ts tests/native-semantic-parity.test.ts tests/native-worker-parity.test.ts tests/native-parser-ownership.test.ts tests/detailed-symbol-native-only.test.ts", diff --git a/src/agent-tools.ts b/src/agent-tools.ts index 7dabc3c5..a87a0541 100644 --- a/src/agent-tools.ts +++ b/src/agent-tools.ts @@ -13,7 +13,7 @@ import { analyzeImpactFromDiff } from "./impact/index.js"; import type { CompactImpactReport, ImpactOptions, ImpactReport } from "./impact/types.js"; import type { Edge, Range } from "./types.js"; import { collectGraph } from "./graph-builder.js"; -import { getDependencies, getReverseDependencies } from "./graphs/queries.js"; +import { getDependencies, getReverseDependencies, type DependencyNode } from "./graphs/queries.js"; import { getHotspots } from "./graphs/hotspots.js"; import type { NativeRuntimeMode } from "./native/treeSitterNative.js"; import { fileExists } from "./util/workspace.js"; @@ -92,6 +92,30 @@ export type ToolDependencyEntry = { depth: number; }; +type ToolDependencyOptions = ToolRuntimeOptions & { + depth?: number; + limit?: number; +}; + +type ToolDependencyListResult = + | { + status: "ok"; + file: string; + entries: ToolDependencyEntry[]; + truncated: boolean; + } + | { + status: "not_found"; + file: string; + reason: "file_not_found" | "file_not_indexed"; + error: string; + } + | { + status: "error"; + error: string; + reason?: "outside_project_root"; + }; + /** File-level hotspot metrics returned by `tool_getHotspots`. */ export type ToolHotspotEntry = { file: string; @@ -342,12 +366,7 @@ export async function tool_getGraph( export async function tool_getDependencies( root: string, filePath: string, - options: { - depth?: number; - limit?: number; - index?: ProjectIndex; - native?: NativeRuntimeMode; - } = {}, + options: ToolDependencyOptions = {}, ): Promise< | { status: "ok"; @@ -367,37 +386,18 @@ export async function tool_getDependencies( reason?: "outside_project_root"; } > { - try { - const resolvedFile = resolveToolFileInput(root, filePath); - if (resolvedFile.status === "error") { - return resolvedFile; - } - - const index = await getToolIndex(root, options); - const missing = await getToolMissingFileResult(index, resolvedFile.absPath, resolvedFile.relativeFile); - if (missing) { - return missing; - } - - const limit = getToolLimit(options.limit) ?? 20; - const dependencies = getDependencies(index.graph, resolvedFile.absPath, { - ...(options.depth !== undefined ? { depth: options.depth } : {}), - limit: limit + 1, - }); - const limited = boundAgentList(dependencies, limit).items.map((entry) => ({ - file: normalizeToolFileOutput(root, entry.file), - depth: entry.depth, - })); - - return { - status: "ok", - file: resolvedFile.relativeFile, - dependencies: limited, - truncated: dependencies.length !== limited.length, - }; - } catch (error) { - return { status: "error", error: String(error) }; + const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => + getDependencies(index.graph, file, traversalOptions), + ); + if (result.status !== "ok") { + return result; } + return { + status: "ok", + file: result.file, + dependencies: result.entries, + truncated: result.truncated, + }; } /** @@ -406,12 +406,7 @@ export async function tool_getDependencies( export async function tool_getReverseDependencies( root: string, filePath: string, - options: { - depth?: number; - limit?: number; - index?: ProjectIndex; - native?: NativeRuntimeMode; - } = {}, + options: ToolDependencyOptions = {}, ): Promise< | { status: "ok"; @@ -431,6 +426,30 @@ export async function tool_getReverseDependencies( reason?: "outside_project_root"; } > { + const result = await collectToolDependencyEntries(root, filePath, options, (index, file, traversalOptions) => + getReverseDependencies(index.graph, file, traversalOptions), + ); + if (result.status !== "ok") { + return result; + } + return { + status: "ok", + file: result.file, + dependents: result.entries, + truncated: result.truncated, + }; +} + +async function collectToolDependencyEntries( + root: string, + filePath: string, + options: ToolDependencyOptions, + collectEntries: ( + index: ProjectIndex, + file: string, + traversalOptions: { depth?: number; limit: number }, + ) => DependencyNode[], +): Promise { try { const resolvedFile = resolveToolFileInput(root, filePath); if (resolvedFile.status === "error") { @@ -444,11 +463,11 @@ export async function tool_getReverseDependencies( } const limit = getToolLimit(options.limit) ?? 20; - const dependents = getReverseDependencies(index.graph, resolvedFile.absPath, { + const entries = collectEntries(index, resolvedFile.absPath, { ...(options.depth !== undefined ? { depth: options.depth } : {}), limit: limit + 1, }); - const limited = boundAgentList(dependents, limit).items.map((entry) => ({ + const limited = boundAgentList(entries, limit).items.map((entry) => ({ file: normalizeToolFileOutput(root, entry.file), depth: entry.depth, })); @@ -456,8 +475,8 @@ export async function tool_getReverseDependencies( return { status: "ok", file: resolvedFile.relativeFile, - dependents: limited, - truncated: dependents.length !== limited.length, + entries: limited, + truncated: entries.length !== limited.length, }; } catch (error) { return { status: "error", error: String(error) }; diff --git a/src/agent/artifact.ts b/src/agent/artifact.ts index 207db125..d6702175 100644 --- a/src/agent/artifact.ts +++ b/src/agent/artifact.ts @@ -4,6 +4,7 @@ import { getHotspots } from "../graphs/hotspots.js"; import { type SymbolNode } from "../graphs/symbol-graph.js"; import { defNodeId } from "../graphs/symbol-graph.js"; import { queryGraphSqliteRaw, writeGraphSqlite } from "../sqlite.js"; +import { isPlainRecord } from "../util/guards.js"; import { isFilePathWithinRoot, normalizePath, toProjectRelativePath } from "../util/paths.js"; import { formatAgentSqlHandle, formatAgentSymbolHandle } from "./handles.js"; import { createAgentFileLookup, normalizeAgentFilePath } from "./normalize.js"; @@ -266,9 +267,9 @@ async function fileExists(filePath: string): Promise { async function readCodegraphManifest(outDir: string): Promise { const value = await readJsonIfPresent(path.join(outDir, MANIFEST_FILE)); - if (!isRecord(value)) return undefined; + if (!isPlainRecord(value)) return undefined; if (value.schemaVersion !== 1 || value.graphJsonSchema !== "codegraph.graph-json") return undefined; - if (!isRecord(value.artifacts)) return undefined; + if (!isPlainRecord(value.artifacts)) return undefined; const artifacts: CodegraphArtifactBuildResult["artifacts"] = {}; for (const [key, artifactFile] of Object.entries(value.artifacts)) { if (typeof artifactFile !== "string") continue; @@ -297,11 +298,11 @@ async function isRecognizedCodegraphArtifact(filePath: string, fileName: string) } if (fileName === GRAPH_JSON_FILE) { const value = await readJsonIfPresent(filePath); - return isRecord(value) && value.format === "codegraph.graph-json"; + return isPlainRecord(value) && value.format === "codegraph.graph-json"; } if (fileName === QUESTIONS_FILE) { const value = await readJsonIfPresent(filePath); - return isRecord(value) && value.format === "codegraph.questions"; + return isPlainRecord(value) && value.format === "codegraph.questions"; } if (fileName === REPORT_FILE) { try { @@ -370,10 +371,6 @@ async function readJsonIfPresent(filePath: string): Promise { } } -function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - async function readDirectoryIfPresent(outDir: string): Promise { try { return await fs.readdir(outDir); diff --git a/src/cli/doctor.ts b/src/cli/doctor.ts index 9834c980..0dcdd2aa 100644 --- a/src/cli/doctor.ts +++ b/src/cli/doctor.ts @@ -11,6 +11,7 @@ import { pathExists, type CodegraphPackageIdentity, } from "./packageInfo.js"; +import { isPlainRecord } from "../util/guards.js"; type IndexedArtifactReport = { type: "jsonGraph" | "sqliteGraph" | "diskCache" | "artifactBundle" | "unknown"; @@ -40,9 +41,9 @@ function statIfExists(filePath: string): fs.Stats | null { function readArtifactManifest(dirPath: string): { artifacts: Record } | null { try { const parsed = JSON.parse(fs.readFileSync(path.join(dirPath, "manifest.json"), "utf8")); - if (!isRecord(parsed)) return null; + if (!isPlainRecord(parsed)) return null; if (parsed.schemaVersion !== 1 || parsed.graphJsonSchema !== "codegraph.graph-json") return null; - if (!isRecord(parsed.artifacts)) return null; + if (!isPlainRecord(parsed.artifacts)) return null; const artifacts: Record = {}; for (const [key, value] of Object.entries(parsed.artifacts)) { if (typeof value === "string") artifacts[key] = value; @@ -53,10 +54,6 @@ function readArtifactManifest(dirPath: string): { artifacts: Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - function detectIndexedArtifactType(filePath: string, stats: fs.Stats | null): IndexedArtifactReport["type"] { if (stats?.isDirectory() && readArtifactManifest(filePath)) { return "artifactBundle"; diff --git a/src/cli/duplicates.ts b/src/cli/duplicates.ts index 88221012..90ae48fc 100644 --- a/src/cli/duplicates.ts +++ b/src/cli/duplicates.ts @@ -32,6 +32,7 @@ function parseDuplicateDetectionOptions(context: DuplicatesCommandContext): Dupl maxBucketSize: parsePositiveIntegerOption(context.getOpt("--max-bucket-size"), "--max-bucket-size", 200), ...(context.hasFlag("--include-same-file") ? { includeSameFile: true } : {}), ...(context.hasFlag("--include-small") ? { includeSmall: true } : {}), + ...(context.hasFlag("--raw-pairs") ? { includeRawPairs: true } : {}), }; if (options.maxTokens !== undefined && options.minTokens !== undefined && options.maxTokens < options.minTokens) { diff --git a/src/cli/graphQueries.ts b/src/cli/graphQueries.ts index 662cf827..9829124c 100644 --- a/src/cli/graphQueries.ts +++ b/src/cli/graphQueries.ts @@ -12,15 +12,12 @@ import { } from "../graphs/queries.js"; import { type GraphBuildOptions } from "../graphs/types.js"; import type { Graph } from "../types.js"; -import { assertFilePathWithinRoot, toProjectDisplayPath } from "../util/paths.js"; +import { toProjectDisplayPath } from "../util/paths.js"; import { parseOptionalNonNegativeIntegerOption } from "./options.js"; +import { resolveCliProjectFile, writeCliProjectFileError } from "./projectFile.js"; export type GraphQueryCommand = "deps" | "rdeps" | "path" | "cycles" | "unresolved" | "apisurface"; -type CliProjectFileInput = - | { status: "ok"; file: string } - | { status: "error"; reason: "outside_project_root"; error: string }; - export type GraphQueryCommandContext = { command: GraphQueryCommand; positionals: string[]; @@ -44,33 +41,6 @@ type LoadedGraph = { adjacency?: GraphAdjacencyIndex; }; -function resolveCliProjectFile(projectRoot: string, fileArg: string, label: string): CliProjectFileInput { - try { - return { - status: "ok", - file: assertFilePathWithinRoot(projectRoot, fileArg, label), - }; - } catch (error) { - return { - status: "error", - reason: "outside_project_root", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -function writeCliProjectFileError( - context: Pick, - result: Extract, - output: "json" | "text" = "json", -): void { - if (output === "json") { - context.writeJSONLine(result); - return; - } - context.writeStdoutLine(`error: ${result.reason}: ${result.error}`); -} - async function loadGraph(context: GraphQueryCommandContext): Promise { if (context.collectGraph) { const graph = await context.collectGraph( diff --git a/src/cli/help.ts b/src/cli/help.ts index 79807d83..8f167d57 100644 --- a/src/cli/help.ts +++ b/src/cli/help.ts @@ -199,7 +199,7 @@ Defaults: export const DUPLICATES_HELP_TEXT = `codegraph duplicates - Detect duplicate and near-duplicate code units -Usage: codegraph duplicates [path ...] [--root ] [--min-confidence high|medium|low] [--limit ] [--include-same-file] [--include-small] +Usage: codegraph duplicates [path ...] [--root ] [--min-confidence high|medium|low] [--limit ] [--include-same-file] [--include-small] [--raw-pairs] Path behavior: A single positional directory becomes the project root when --root is omitted. @@ -207,15 +207,19 @@ Path behavior: Options: --min-confidence Minimum confidence to report. Defaults to medium. - --limit Maximum suggestions to return. Defaults to 50. + --limit Maximum duplicate groups to return. Defaults to 50. --include-same-file Report non-overlapping clones in the same file. --include-small Include units below the default token floor. + --raw-pairs Include low-level scored unit pairs as suggestions. --min-tokens Minimum unit tokens. Defaults to 40. --max-tokens Maximum fallback chunk tokens. Defaults to 800. --max-bucket-size Skip candidate buckets larger than this value. Defaults to 200. Output: - Always emits JSON with scored suggestions, confidence, clone type, metrics, omission counts, and pair stats. + Always emits JSON with grouped duplicate findings, confidence, clone type, metrics, omission counts, and pair stats. + Grouped duplicate output uses schemaVersion 2. + Group variants are bounded by default and include rawPairCount/omittedVariantCount for hidden evidence. + Use --raw-pairs to include the underlying scored unit-pair suggestions. `; export function helpTextForCommand(command: string, positionals: readonly string[]): string | undefined { diff --git a/src/cli/navigation.ts b/src/cli/navigation.ts index b5ec25a1..89ba4952 100644 --- a/src/cli/navigation.ts +++ b/src/cli/navigation.ts @@ -1,13 +1,10 @@ import { buildProjectIndex } from "../indexer/build-index.js"; import { findReferences, goToDefinition } from "../indexer/navigation.js"; import type { NativeRuntimeMode } from "../native/treeSitterNative.js"; -import { assertFilePathWithinRoot, toProjectDisplayPath } from "../util/paths.js"; +import { toProjectDisplayPath } from "../util/paths.js"; import { type ProjectFileDiscoveryOptions } from "../util/projectFiles.js"; import { parsePositiveIntegerOption } from "./options.js"; - -type CliProjectFileInput = - | { status: "ok"; file: string } - | { status: "error"; reason: "outside_project_root"; error: string }; +import { resolveCliProjectFile, writeCliProjectFileError } from "./projectFile.js"; export type NavigationCommandContext = { projectRootFs: string; @@ -24,33 +21,6 @@ export type NavigationCommandContext = { exit: (code: number) => never; }; -function resolveCliProjectFile(projectRoot: string, fileArg: string, label: string): CliProjectFileInput { - try { - return { - status: "ok", - file: assertFilePathWithinRoot(projectRoot, fileArg, label), - }; - } catch (error) { - return { - status: "error", - reason: "outside_project_root", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -function writeCliProjectFileError( - context: Pick, - result: Extract, - output: "json" | "text" = "json", -): void { - if (output === "json") { - context.writeJSONLine(result); - return; - } - context.writeStdoutLine(`error: ${result.reason}: ${result.error}`); -} - function indexOptions(context: NavigationCommandContext) { return { onProgress: context.progressHandler, diff --git a/src/cli/options.ts b/src/cli/options.ts index 0497072b..bb636da8 100644 --- a/src/cli/options.ts +++ b/src/cli/options.ts @@ -95,25 +95,46 @@ function parseIntegerOptionValue( return parsedValue; } -export function parsePositiveIntegerOption( +function parseDefaultedIntegerOption( rawValue: string | undefined, optionName: string, defaultValue: number, + expectedDescription: string, + minValue: number, + maxValue?: number, ): number { if (rawValue === undefined) { return defaultValue; } - return parseIntegerOptionValue(rawValue, optionName, "a positive integer", 1); + return parseIntegerOptionValue(rawValue, optionName, expectedDescription, minValue, maxValue); } -export function parseOptionalPositiveIntegerOption( +function parseOptionalIntegerOption( rawValue: string | undefined, optionName: string, + expectedDescription: string, + minValue: number, + maxValue?: number, ): number | undefined { if (rawValue === undefined) { return undefined; } - return parseIntegerOptionValue(rawValue, optionName, "a positive integer", 1); + return parseIntegerOptionValue(rawValue, optionName, expectedDescription, minValue, maxValue); +} + +export function parsePositiveIntegerOption( + rawValue: string | undefined, + optionName: string, + defaultValue: number, +): number { + return parseDefaultedIntegerOption(rawValue, optionName, defaultValue, "a positive integer", 1); +} + +export function parseOptionalPositiveIntegerOption( + rawValue: string | undefined, + optionName: string, +): number | undefined { + return parseOptionalIntegerOption(rawValue, optionName, "a positive integer", 1); } export function parseNonNegativeIntegerOption( @@ -121,20 +142,14 @@ export function parseNonNegativeIntegerOption( optionName: string, defaultValue: number, ): number { - if (rawValue === undefined) { - return defaultValue; - } - return parseIntegerOptionValue(rawValue, optionName, "a non-negative integer", 0); + return parseDefaultedIntegerOption(rawValue, optionName, defaultValue, "a non-negative integer", 0); } export function parseOptionalNonNegativeIntegerOption( rawValue: string | undefined, optionName: string, ): number | undefined { - if (rawValue === undefined) { - return undefined; - } - return parseIntegerOptionValue(rawValue, optionName, "a non-negative integer", 0); + return parseOptionalIntegerOption(rawValue, optionName, "a non-negative integer", 0); } export function parseBoundedIntegerOption( @@ -144,12 +159,10 @@ export function parseBoundedIntegerOption( minValue: number, maxValue: number, ): number { - if (rawValue === undefined) { - return defaultValue; - } - return parseIntegerOptionValue( + return parseDefaultedIntegerOption( rawValue, optionName, + defaultValue, `an integer from ${minValue} to ${maxValue}`, minValue, maxValue, @@ -162,10 +175,7 @@ export function parseOptionalBoundedIntegerOption( minValue: number, maxValue: number, ): number | undefined { - if (rawValue === undefined) { - return undefined; - } - return parseIntegerOptionValue( + return parseOptionalIntegerOption( rawValue, optionName, `an integer from ${minValue} to ${maxValue}`, diff --git a/src/cli/projectFile.ts b/src/cli/projectFile.ts new file mode 100644 index 00000000..ffb20a8e --- /dev/null +++ b/src/cli/projectFile.ts @@ -0,0 +1,39 @@ +import { assertFilePathWithinRoot } from "../util/paths.js"; + +export type CliProjectFileInput = + | { status: "ok"; file: string } + | { status: "error"; reason: "outside_project_root"; error: string }; + +export type CliProjectFileErrorOutput = "json" | "text"; + +export type CliProjectFileErrorContext = { + writeJSONLine: (value: unknown) => void; + writeStdoutLine: (message: string) => void; +}; + +export function resolveCliProjectFile(projectRoot: string, fileArg: string, label: string): CliProjectFileInput { + try { + return { + status: "ok", + file: assertFilePathWithinRoot(projectRoot, fileArg, label), + }; + } catch (error) { + return { + status: "error", + reason: "outside_project_root", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +export function writeCliProjectFileError( + context: CliProjectFileErrorContext, + result: Extract, + output: CliProjectFileErrorOutput = "json", +): void { + if (output === "json") { + context.writeJSONLine(result); + return; + } + context.writeStdoutLine(`error: ${result.reason}: ${result.error}`); +} diff --git a/src/duplicates.ts b/src/duplicates.ts index 4223659c..e8a99722 100644 --- a/src/duplicates.ts +++ b/src/duplicates.ts @@ -42,6 +42,21 @@ export type DuplicateSuggestion = { reasons: string[]; }; +export type DuplicateGroup = { + id: string; + score: number; + confidence: DuplicateConfidence; + cloneType: DuplicateCloneType; + primaryLeft: DuplicateUnitRef; + primaryRight: DuplicateUnitRef; + variants: DuplicateSuggestion[]; + variantCount: number; + rawPairCount: number; + omittedVariantCount: number; + metrics: DuplicateMetrics; + reasons: string[]; +}; + export type DuplicateDetectionOptions = { projectRoot?: string; files?: readonly string[]; @@ -55,10 +70,14 @@ export type DuplicateDetectionOptions = { maxBucketSize?: number; shingleSize?: number; windowSize?: number; + includeRawPairs?: boolean; }; export type DuplicateDetectionOmittedCounts = { + groups: number; + /** @deprecated Use `groups`; retained as a compatibility alias. */ suggestions: number; + rawSuggestions: number; oversizedBuckets: number; belowThresholdUnits: number; overlappingPairs: number; @@ -70,13 +89,20 @@ export type DuplicateDetectionStats = { }; export type DuplicateDetectionResult = { - schemaVersion: 1; + schemaVersion: 2; units: number; - suggestions: DuplicateSuggestion[]; + groups: DuplicateGroup[]; + suggestions?: DuplicateSuggestion[]; omittedCounts: DuplicateDetectionOmittedCounts; stats: DuplicateDetectionStats; }; +type UnitCluster = { + id: string; + refs: DuplicateUnitRef[]; + primary: DuplicateUnitRef; +}; + type DuplicateInternalUnit = DuplicateUnitRef & { id: string; absoluteFile: string; @@ -110,6 +136,7 @@ const DEFAULT_MIN_TOKENS = 40; const DEFAULT_MAX_TOKENS = 800; const DEFAULT_LIMIT = 50; const DEFAULT_MAX_BUCKET_SIZE = 200; +const DEFAULT_GROUP_VARIANT_LIMIT = 5; const DEFAULT_SHINGLE_SIZE = 5; const DEFAULT_WINDOW_SIZE = 4; const DEFAULT_MAX_FINGERPRINTS = 128; @@ -135,6 +162,13 @@ const confidenceRank: Record = { high: 3, }; +const cloneTypeRank: Record = { + weak: 1, + near: 2, + renamed: 3, + exact: 4, +}; + const symbolUnitKinds = new Set([ SymbolKind.Function, SymbolKind.Class, @@ -284,6 +318,22 @@ function lineSpan(unit: Pick): number return Math.max(1, unit.endLine - unit.startLine + 1); } +function lineOverlap( + left: Pick, + right: Pick, +): number { + const startLine = Math.max(left.startLine, right.startLine); + const endLine = Math.min(left.endLine, right.endLine); + return Math.max(0, endLine - startLine + 1); +} + +function rangesSubstantiallyOverlap(left: DuplicateUnitRef, right: DuplicateUnitRef): boolean { + if (left.file !== right.file || left.languageId !== right.languageId) return false; + const overlap = lineOverlap(left, right); + if (!overlap) return false; + return overlap / Math.min(lineSpan(left), lineSpan(right)) >= 0.8; +} + function ratio(left: number, right: number): number { if (!left || !right) return 0; return Math.min(left, right) / Math.max(left, right); @@ -305,20 +355,36 @@ function normalizeConfidence(value: DuplicateConfidence | undefined): DuplicateC return value ?? "medium"; } -function normalizePositiveIntegerOption(value: number | undefined, optionName: string, fallback: number): number { +function normalizeIntegerOption( + value: number | undefined, + optionName: string, + fallback: number, + minValue: number, + expectedDescription: string, +): number { const resolved = value ?? fallback; - if (!Number.isInteger(resolved) || resolved < 1) { - throw new Error(`Invalid ${optionName} value "${String(resolved)}". Expected a positive integer.`); + if (!Number.isInteger(resolved) || resolved < minValue) { + throw new Error(`Invalid ${optionName} value "${String(resolved)}". Expected ${expectedDescription}.`); } return resolved; } +function normalizePositiveIntegerOption(value: number | undefined, optionName: string, fallback: number): number { + return normalizeIntegerOption(value, optionName, fallback, 1, "a positive integer"); +} + function normalizeNonNegativeIntegerOption(value: number | undefined, optionName: string, fallback: number): number { - const resolved = value ?? fallback; - if (!Number.isInteger(resolved) || resolved < 0) { - throw new Error(`Invalid ${optionName} value "${String(resolved)}". Expected a non-negative integer.`); - } - return resolved; + return normalizeIntegerOption(value, optionName, fallback, 0, "a non-negative integer"); +} + +function bestConfidence(left: DuplicateConfidence, right: DuplicateConfidence): DuplicateConfidence { + if (confidenceRank[left] >= confidenceRank[right]) return left; + return right; +} + +function bestCloneType(left: DuplicateCloneType, right: DuplicateCloneType): DuplicateCloneType { + if (cloneTypeRank[left] >= cloneTypeRank[right]) return left; + return right; } function confidenceForScore(score: number): DuplicateConfidence { @@ -784,6 +850,222 @@ function unitRef(unit: DuplicateInternalUnit): DuplicateUnitRef { }; } +function unitRefIdentity(ref: DuplicateUnitRef): string { + return [ + ref.file, + ref.startLine, + ref.endLine, + ref.languageId, + ref.kind, + ref.name ?? "", + ref.symbolKind ?? "", + ].join("\u0000"); +} + +function compareUnitRefs(left: DuplicateUnitRef, right: DuplicateUnitRef): number { + const fileCompare = left.file.localeCompare(right.file); + if (fileCompare) return fileCompare; + const startCompare = left.startLine - right.startLine; + if (startCompare) return startCompare; + const endCompare = left.endLine - right.endLine; + if (endCompare) return endCompare; + return (left.name ?? "").localeCompare(right.name ?? ""); +} + +function unitPrimaryRank(ref: DuplicateUnitRef): number { + let rank = 0; + if (ref.kind === "symbol") rank += 8; + if (ref.name) rank += 4; + if (ref.symbolKind !== undefined) rank += 2; + return rank; +} + +function comparePrimaryUnitRefs(left: DuplicateUnitRef, right: DuplicateUnitRef): number { + const rankCompare = unitPrimaryRank(right) - unitPrimaryRank(left); + if (rankCompare) return rankCompare; + const spanCompare = lineSpan(left) - lineSpan(right); + if (spanCompare) return spanCompare; + return compareUnitRefs(left, right); +} + +function suggestionPrimaryRank(suggestion: DuplicateSuggestion): number { + let rank = 0; + if (suggestion.left.kind === "symbol") rank += 8; + if (suggestion.right.kind === "symbol") rank += 8; + if (suggestion.left.name) rank += 2; + if (suggestion.right.name) rank += 2; + return rank; +} + +function compareSuggestions(left: DuplicateSuggestion, right: DuplicateSuggestion): number { + const scoreCompare = right.score - left.score; + if (scoreCompare) return scoreCompare; + const confidenceCompare = confidenceRank[right.confidence] - confidenceRank[left.confidence]; + if (confidenceCompare) return confidenceCompare; + const cloneTypeCompare = cloneTypeRank[right.cloneType] - cloneTypeRank[left.cloneType]; + if (cloneTypeCompare) return cloneTypeCompare; + const leftFileCompare = left.left.file.localeCompare(right.left.file); + if (leftFileCompare) return leftFileCompare; + const rightFileCompare = left.right.file.localeCompare(right.right.file); + if (rightFileCompare) return rightFileCompare; + return left.left.startLine - right.left.startLine; +} + +function compareSuggestionsForPrimary(left: DuplicateSuggestion, right: DuplicateSuggestion): number { + const rankCompare = suggestionPrimaryRank(right) - suggestionPrimaryRank(left); + if (rankCompare) return rankCompare; + return compareSuggestions(left, right); +} + +function createUnitClusters(refs: readonly DuplicateUnitRef[]): Map { + const uniqueRefs = new Map(); + for (const ref of refs) uniqueRefs.set(unitRefIdentity(ref), ref); + const parent = new Map(); + for (const key of uniqueRefs.keys()) parent.set(key, key); + const find = (key: string): string => { + const current = parent.get(key); + if (current === undefined || current === key) return key; + const root = find(current); + parent.set(key, root); + return root; + }; + const union = (left: string, right: string): void => { + const leftRoot = find(left); + const rightRoot = find(right); + if (leftRoot === rightRoot) return; + if (leftRoot < rightRoot) { + parent.set(rightRoot, leftRoot); + return; + } + parent.set(leftRoot, rightRoot); + }; + const refsByFile = new Map>(); + for (const [key, ref] of uniqueRefs) { + const fileKey = `${ref.file}\u0000${ref.languageId}`; + const existing = refsByFile.get(fileKey); + if (existing) existing.push({ key, ref }); + else refsByFile.set(fileKey, [{ key, ref }]); + } + for (const fileRefs of refsByFile.values()) { + fileRefs.sort((left, right) => compareUnitRefs(left.ref, right.ref)); + for (let i = 0; i < fileRefs.length; i++) { + const left = fileRefs[i]!; + for (let j = i + 1; j < fileRefs.length; j++) { + const right = fileRefs[j]!; + if (right.ref.startLine > left.ref.endLine) break; + if (rangesSubstantiallyOverlap(left.ref, right.ref)) union(left.key, right.key); + } + } + } + const refsByRoot = new Map(); + for (const [key, ref] of uniqueRefs) { + const root = find(key); + const existing = refsByRoot.get(root); + if (existing) existing.push(ref); + else refsByRoot.set(root, [ref]); + } + const clustersByRef = new Map(); + for (const refsInCluster of refsByRoot.values()) { + refsInCluster.sort(comparePrimaryUnitRefs); + const primary = refsInCluster[0]!; + const cluster = { id: shortHashText(unitRefIdentity(primary)), refs: refsInCluster, primary }; + for (const ref of refsInCluster) clustersByRef.set(unitRefIdentity(ref), cluster); + } + return clustersByRef; +} + +function singletonUnitCluster(ref: DuplicateUnitRef): UnitCluster { + return { id: shortHashText(unitRefIdentity(ref)), refs: [ref], primary: ref }; +} + +function orderedGroupKey(left: UnitCluster, right: UnitCluster): string { + if (left.id < right.id) return `${left.id}\u0000${right.id}`; + return `${right.id}\u0000${left.id}`; +} + +function mergeReasons(suggestions: readonly DuplicateSuggestion[]): string[] { + const reasons = new Set(); + for (const suggestion of suggestions) { + for (const reason of suggestion.reasons) reasons.add(reason); + } + return Array.from(reasons).sort(); +} + +function groupForSuggestions( + key: string, + suggestions: DuplicateSuggestion[], + left: UnitCluster, + right: UnitCluster, + variantLimit: number, +): DuplicateGroup { + suggestions.sort(compareSuggestionsForPrimary); + const primary = suggestions[0]!; + const variants = suggestions.slice(0, variantLimit); + let score = primary.score; + let confidence = primary.confidence; + let cloneType = primary.cloneType; + for (const suggestion of suggestions.slice(1)) { + score = Math.max(score, suggestion.score); + confidence = bestConfidence(confidence, suggestion.confidence); + cloneType = bestCloneType(cloneType, suggestion.cloneType); + } + return { + id: shortHashText(key), + score, + confidence, + cloneType, + primaryLeft: left.primary, + primaryRight: right.primary, + variants, + variantCount: variants.length, + rawPairCount: suggestions.length, + omittedVariantCount: Math.max(0, suggestions.length - variants.length), + metrics: primary.metrics, + reasons: mergeReasons(suggestions), + }; +} + +function compareGroups(left: DuplicateGroup, right: DuplicateGroup): number { + const scoreCompare = right.score - left.score; + if (scoreCompare) return scoreCompare; + const confidenceCompare = confidenceRank[right.confidence] - confidenceRank[left.confidence]; + if (confidenceCompare) return confidenceCompare; + const cloneTypeCompare = cloneTypeRank[right.cloneType] - cloneTypeRank[left.cloneType]; + if (cloneTypeCompare) return cloneTypeCompare; + const tokenCompare = + right.primaryLeft.tokenCount + right.primaryRight.tokenCount - (left.primaryLeft.tokenCount + left.primaryRight.tokenCount); + if (tokenCompare) return tokenCompare; + const leftCompare = compareUnitRefs(left.primaryLeft, right.primaryLeft); + if (leftCompare) return leftCompare; + return compareUnitRefs(left.primaryRight, right.primaryRight); +} + +function groupSuggestions(suggestions: readonly DuplicateSuggestion[], includeRawPairs: boolean): DuplicateGroup[] { + const refs = suggestions.flatMap((suggestion) => [suggestion.left, suggestion.right]); + const clusters = createUnitClusters(refs); + const variantLimit = includeRawPairs ? Number.POSITIVE_INFINITY : DEFAULT_GROUP_VARIANT_LIMIT; + const suggestionsByGroup = new Map(); + for (const suggestion of suggestions) { + let leftCluster = clusters.get(unitRefIdentity(suggestion.left)); + let rightCluster = clusters.get(unitRefIdentity(suggestion.right)); + if (!leftCluster || !rightCluster) continue; + if (leftCluster.id === rightCluster.id) { + if (rangesSubstantiallyOverlap(suggestion.left, suggestion.right)) continue; + leftCluster = singletonUnitCluster(suggestion.left); + rightCluster = singletonUnitCluster(suggestion.right); + } + const key = orderedGroupKey(leftCluster, rightCluster); + const existing = suggestionsByGroup.get(key); + if (existing) existing.suggestions.push(suggestion); + else suggestionsByGroup.set(key, { left: leftCluster, right: rightCluster, suggestions: [suggestion] }); + } + const groups = Array.from(suggestionsByGroup, ([key, value]) => + groupForSuggestions(key, value.suggestions, value.left, value.right, variantLimit), + ); + groups.sort(compareGroups); + return groups; +} + /** Finds scored duplicate candidates from an already-built project index. */ export async function findDuplicates( index: ProjectIndex, @@ -835,23 +1117,21 @@ export async function findDuplicates( suggestions.push(suggestion); } - suggestions.sort((left, right) => { - const scoreCompare = right.score - left.score; - if (scoreCompare) return scoreCompare; - const leftFileCompare = left.left.file.localeCompare(right.left.file); - if (leftFileCompare) return leftFileCompare; - const rightFileCompare = left.right.file.localeCompare(right.right.file); - if (rightFileCompare) return rightFileCompare; - return left.left.startLine - right.left.startLine; - }); + suggestions.sort(compareSuggestions); - const limitedSuggestions = suggestions.slice(0, limit); - return { - schemaVersion: 1, + const includeRawPairs = options.includeRawPairs ?? false; + const groups = groupSuggestions(suggestions, includeRawPairs); + const limitedGroups = groups.slice(0, limit); + const omittedGroups = Math.max(0, groups.length - limitedGroups.length); + const limitedRawSuggestions = includeRawPairs ? suggestions.slice(0, limit) : []; + const result: DuplicateDetectionResult = { + schemaVersion: 2, units: units.length, - suggestions: limitedSuggestions, + groups: limitedGroups, omittedCounts: { - suggestions: Math.max(0, suggestions.length - limitedSuggestions.length), + groups: omittedGroups, + suggestions: omittedGroups, + rawSuggestions: Math.max(0, suggestions.length - limitedRawSuggestions.length), oversizedBuckets, belowThresholdUnits, overlappingPairs, @@ -861,4 +1141,8 @@ export async function findDuplicates( candidatePairs: pairs.size, }, }; + if (includeRawPairs) { + result.suggestions = limitedRawSuggestions; + } + return result; } diff --git a/src/graphs/traversal.ts b/src/graphs/traversal.ts index 63ba80f7..c21bec92 100644 --- a/src/graphs/traversal.ts +++ b/src/graphs/traversal.ts @@ -9,10 +9,13 @@ import { getFiniteNonNegativeLimit } from "./limits.js"; export type DependencyNode = { file: FileId; depth: number }; -export function getDependencies( +type NeighborProvider = (adjacency: GraphAdjacencyIndex, file: FileId) => readonly FileId[]; + +function walkDependencies( graph: Graph, startFile: FileId, - opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, + opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex }, + getNeighbors: NeighborProvider, ): DependencyNode[] { const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; const finiteLimit = getFiniteNonNegativeLimit(opts.limit); @@ -37,7 +40,7 @@ export function getDependencies( } if (depth >= maxDepth) continue; - for (const neighbor of getForwardNeighbors(adjacency, file)) { + for (const neighbor of getNeighbors(adjacency, file)) { if (!visited.has(neighbor)) { visited.add(neighbor); queue.push({ file: neighbor, depth: depth + 1 }); @@ -47,42 +50,20 @@ export function getDependencies( return out; } +export function getDependencies( + graph: Graph, + startFile: FileId, + opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, +): DependencyNode[] { + return walkDependencies(graph, startFile, opts, getForwardNeighbors); +} + export function getReverseDependencies( graph: Graph, targetFile: FileId, opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, ): DependencyNode[] { - const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; - const finiteLimit = getFiniteNonNegativeLimit(opts.limit); - const maxResults = finiteLimit ?? Number.POSITIVE_INFINITY; - if (maxResults === 0) { - return []; - } - const out: DependencyNode[] = []; - const visited = new Set(); - const queue: Array<{ file: string; depth: number }> = [{ file: targetFile, depth: 0 }]; - const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); - visited.add(targetFile); - - let index = 0; - while (index < queue.length) { - const { file, depth } = queue[index++]!; - if (depth > 0) { - out.push({ file, depth }); - if (out.length >= maxResults) { - break; - } - } - if (depth >= maxDepth) continue; - - for (const neighbor of getReverseNeighbors(adjacency, file)) { - if (!visited.has(neighbor)) { - visited.add(neighbor); - queue.push({ file: neighbor, depth: depth + 1 }); - } - } - } - return out; + return walkDependencies(graph, targetFile, opts, getReverseNeighbors); } export function getShortestPath( diff --git a/src/impact/reportCompact.ts b/src/impact/reportCompact.ts index b349e3a7..171bca80 100644 --- a/src/impact/reportCompact.ts +++ b/src/impact/reportCompact.ts @@ -1,5 +1,6 @@ import type { FileId } from "../types.js"; import { type ProjectIndex } from "../indexer/types.js"; +import { mapExportSummary, mapReexportChains, mapTopImpacts } from "./reportShared.js"; import { IMPACT_SCHEMA_VERSION } from "./types.js"; import type { ChangedSymbol, @@ -194,10 +195,7 @@ function buildCompactExportSummary( ): Pick { if (!exportSummary.length) return {}; return { - exportSummary: exportSummary.map((entry) => ({ - file: fileId(entry.file), - symbols: entry.symbols, - })), + exportSummary: mapExportSummary(exportSummary, fileId), }; } @@ -206,14 +204,10 @@ function buildCompactReexportChains( fileId: (file: FileId) => number, ): Pick { if (!reexportChains) return {}; + const mappedChains = mapReexportChains(reexportChains, fileId); + if (!mappedChains) return {}; return { - reexportChains: { - chains: reexportChains.chains.map((entry) => ({ - symbol: entry.symbol, - file: fileId(entry.file), - paths: entry.paths.map((pathChain) => pathChain.map((file) => fileId(file))), - })), - }, + reexportChains: mappedChains, }; } @@ -223,16 +217,7 @@ function buildCompactTopImpacts( ): Pick { if (!topImpacts.length) return {}; return { - topImpacts: topImpacts.map((item) => ({ - file: fileId(item.file), - symbols: item.symbols, - reasons: item.reasons, - severity: item.severity, - ...(item.confidence !== undefined ? { confidence: item.confidence } : {}), - ...(item.depth !== undefined ? { depth: item.depth } : {}), - ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), - ...(item.explain ? { explain: item.explain } : {}), - })), + topImpacts: mapTopImpacts(topImpacts, fileId), }; } diff --git a/src/impact/reportFull.ts b/src/impact/reportFull.ts index 43d2e65f..1ed5dc7f 100644 --- a/src/impact/reportFull.ts +++ b/src/impact/reportFull.ts @@ -1,5 +1,6 @@ import type { FileId } from "../types.js"; import { type ProjectIndex } from "../indexer/types.js"; +import { mapExportSummary, mapReexportChains, mapTopImpacts } from "./reportShared.js"; import { IMPACT_SCHEMA_VERSION } from "./types.js"; import type { ChangedSymbol, @@ -112,10 +113,7 @@ function buildFullExportSummary( ): Pick { if (!exportSummary.length) return {}; return { - exportSummary: exportSummary.map((entry) => ({ - ...entry, - file: displayFile(entry.file), - })), + exportSummary: mapExportSummary(exportSummary, displayFile), }; } @@ -124,14 +122,10 @@ function buildFullReexportChains( displayFile: (file: FileId) => FileId, ): Pick { if (!reexportChains) return {}; + const mappedChains = mapReexportChains(reexportChains, displayFile); + if (!mappedChains) return {}; return { - reexportChains: { - chains: reexportChains.chains.map((entry) => ({ - ...entry, - file: displayFile(entry.file), - paths: entry.paths.map((pathChain) => pathChain.map((file) => displayFile(file))), - })), - }, + reexportChains: mappedChains, }; } @@ -141,10 +135,7 @@ function buildFullTopImpacts( ): Pick { if (!topImpacts.length) return {}; return { - topImpacts: topImpacts.map((item) => ({ - ...item, - file: displayFile(item.file), - })), + topImpacts: mapTopImpacts(topImpacts, displayFile), }; } diff --git a/src/impact/reportShared.ts b/src/impact/reportShared.ts new file mode 100644 index 00000000..a50d748b --- /dev/null +++ b/src/impact/reportShared.ts @@ -0,0 +1,49 @@ +import type { FileId } from "../types.js"; +import type { ExportSummaryEntry, ImpactTopItem, ReexportChainEntry } from "./types.js"; + +export type MappedExportSummaryEntry = Omit & { + file: TFile; +}; + +export type MappedReexportChainEntry = Omit & { + file: TFile; + paths: TFile[][]; +}; + +export type MappedImpactTopItem = Omit & { + file: TFile; +}; + +export function mapExportSummary( + exportSummary: readonly ExportSummaryEntry[], + mapFile: (file: FileId) => TFile, +): Array> { + return exportSummary.map((entry) => ({ + ...entry, + file: mapFile(entry.file), + })); +} + +export function mapReexportChains( + reexportChains: { chains: ReexportChainEntry[] } | undefined, + mapFile: (file: FileId) => TFile, +): { chains: Array> } | undefined { + if (!reexportChains) return undefined; + return { + chains: reexportChains.chains.map((entry) => ({ + ...entry, + file: mapFile(entry.file), + paths: entry.paths.map((pathChain) => pathChain.map((file) => mapFile(file))), + })), + }; +} + +export function mapTopImpacts( + topImpacts: readonly ImpactTopItem[], + mapFile: (file: FileId) => TFile, +): Array> { + return topImpacts.map((item) => ({ + ...item, + file: mapFile(item.file), + })); +} diff --git a/src/index.ts b/src/index.ts index 11794243..01918b20 100644 --- a/src/index.ts +++ b/src/index.ts @@ -325,6 +325,7 @@ export { type DuplicateDetectionOptions, type DuplicateDetectionResult, type DuplicateDetectionStats, + type DuplicateGroup, type DuplicateMetrics, type DuplicateSuggestion, type DuplicateUnitKind, diff --git a/src/indexer/imports/languageSpecific.ts b/src/indexer/imports/languageSpecific.ts index 4e32cbe6..b2bc6afa 100644 --- a/src/indexer/imports/languageSpecific.ts +++ b/src/indexer/imports/languageSpecific.ts @@ -253,27 +253,8 @@ async function applyJavaStatementOverride( ): Promise { const parsed = parseJavaImportStatement(normalizedStmt); if (!parsed) return false; - - const resolved = await context.resolveFrom(parsed.from); - if (parsed.kind === "star") { - context.pushBinding({ - kind: "star", - from: parsed.from, - resolved, - typeOnly, - }); - return true; - } - - context.pushBinding({ - kind: "named", - local: parsed.imported, - imported: parsed.imported, - from: parsed.from, - resolved, - typeOnly, - }); - return true; + const local = parsed.kind === "named" ? parsed.imported : undefined; + return await pushJvmImportBinding(context, parsed, local, typeOnly); } async function applyKotlinStatementOverride( @@ -283,7 +264,16 @@ async function applyKotlinStatementOverride( ): Promise { const parsed = parseKotlinImportStatement(normalizedStmt); if (!parsed) return false; + const local = parsed.kind === "named" ? parsed.local : undefined; + return await pushJvmImportBinding(context, parsed, local, typeOnly); +} +async function pushJvmImportBinding( + context: LanguageSpecificImportContext, + parsed: { kind: "star"; from: string } | { kind: "named"; from: string; imported: string }, + local: string | undefined, + typeOnly: boolean, +): Promise { const resolved = await context.resolveFrom(parsed.from); if (parsed.kind === "star") { context.pushBinding({ @@ -297,7 +287,7 @@ async function applyKotlinStatementOverride( context.pushBinding({ kind: "named", - local: parsed.local, + local: local ?? parsed.imported, imported: parsed.imported, from: parsed.from, resolved, diff --git a/src/indexer/shared.ts b/src/indexer/shared.ts index 27ac98da..f7a8555a 100644 --- a/src/indexer/shared.ts +++ b/src/indexer/shared.ts @@ -1,7 +1,4 @@ -import path from "node:path"; - -import { normalizePath } from "../util/paths.js"; -import type { Edge } from "../types.js"; +export { compareEdges, edgeKey, toRelativeEdge } from "../util/graphEdges.js"; export const DEFAULT_REF_CONTEXT_LINES = 5; @@ -23,41 +20,3 @@ export function parseGoImportAlias(stmtText: string): string | null { const match = importBody.match(/^([._A-Za-z][\w]*)\s+["'`]/); return match?.[1] ?? null; } - -export function edgeKey(edge: Edge): string { - const toKey = edge.to.type === "file" ? `file:${edge.to.path}` : `external:${edge.to.name}`; - const typeOnly = edge.typeOnly ? "1" : "0"; - return `${edge.from}|${toKey}|${edge.raw}|${typeOnly}`; -} - -export function compareEdges(left: Edge, right: Edge): number { - const fromCompare = left.from.localeCompare(right.from); - if (fromCompare !== 0) return fromCompare; - if (left.to.type !== right.to.type) { - return left.to.type === "file" ? -1 : 1; - } - const leftTo = left.to.type === "file" ? left.to.path : left.to.name; - const rightTo = right.to.type === "file" ? right.to.path : right.to.name; - const toCompare = leftTo.localeCompare(rightTo); - if (toCompare !== 0) return toCompare; - const rawCompare = left.raw.localeCompare(right.raw); - if (rawCompare !== 0) return rawCompare; - const leftTypeOnly = left.typeOnly ? 1 : 0; - const rightTypeOnly = right.typeOnly ? 1 : 0; - return leftTypeOnly - rightTypeOnly; -} - -export function toRelativeEdge(projectRoot: string, edge: Edge): Edge { - return { - from: normalizePath(path.relative(projectRoot, edge.from)), - to: - edge.to.type === "file" - ? { - type: "file", - path: normalizePath(path.relative(projectRoot, edge.to.path)), - } - : edge.to, - raw: edge.raw, - ...(edge.typeOnly ? { typeOnly: edge.typeOnly } : {}), - }; -} diff --git a/src/languages/definitions/c.ts b/src/languages/definitions/c.ts index 38880e21..c58f74f6 100644 --- a/src/languages/definitions/c.ts +++ b/src/languages/definitions/c.ts @@ -1,90 +1,18 @@ -import type { LanguageDefinition, SyntaxNodeLike } from "../types.js"; +import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { + cFamilyContainerTypes, + cFunctionNameQuery, + findAncestor, + isFunctionDeclarator, + isInAncestorDeclarator, + isInField, + isInParameterList, +} from "./cFamily.js"; -const FUNCTION_NAME_QUERY = ` - declarator: [ - (function_declarator declarator: (identifier) @chunk.name) - (function_declarator declarator: (pointer_declarator declarator: (identifier) @chunk.name)) - (pointer_declarator declarator: (function_declarator declarator: (identifier) @chunk.name)) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (identifier) @chunk.name))) - ] -`; - -const GRAPH_FUNCTION_NAME_QUERY = ` - declarator: [ - (function_declarator declarator: (identifier) @name) - (function_declarator declarator: (pointer_declarator declarator: (identifier) @name)) - (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (identifier) @name))) - ] -`; - -const isWithin = (node: SyntaxNodeLike, ancestor: SyntaxNodeLike | null): boolean => { - let current: SyntaxNodeLike | null = node; - while (current) { - if (ancestor && current.id === ancestor.id) return true; - current = current.parent; - } - return false; -}; - -const isInField = (node: SyntaxNodeLike, parent: SyntaxNodeLike, field: string): boolean => - isWithin(node, parent.childForFieldName(field)); - -const findAncestor = (node: SyntaxNodeLike, types: Set): SyntaxNodeLike | null => { - let current: SyntaxNodeLike | null = node.parent; - while (current) { - if (types.has(current.type)) return current; - current = current.parent; - } - return null; -}; - -const containerTypes = new Set([ - "function_definition", - "declaration", - "parameter_declaration", - "field_declaration", - "type_definition", - "init_declarator", -]); - -const paramListTypes = new Set(["parameter_declaration", "parameter_list"]); - -const isInParameterList = (node: SyntaxNodeLike): boolean => !!findAncestor(node, paramListTypes); - -const resolveDeclaratorRoot = (ancestor: SyntaxNodeLike): SyntaxNodeLike | null => { - let declaratorNode = ancestor.childForFieldName("declarator"); - if (!declaratorNode) return null; - if (declaratorNode.type === "init_declarator") { - const inner = declaratorNode.childForFieldName("declarator"); - if (inner) declaratorNode = inner; - } - if (declaratorNode.type === "function_declarator") { - const inner = declaratorNode.childForFieldName("declarator"); - if (inner) declaratorNode = inner; - } - return declaratorNode; -}; - -const isInAncestorDeclarator = (node: SyntaxNodeLike, ancestorTypes: Set): boolean => { - const ancestor = findAncestor(node, ancestorTypes); - if (!ancestor) return false; - const declaratorNode = resolveDeclaratorRoot(ancestor); - if (!declaratorNode) return false; - return isWithin(node, declaratorNode); -}; - -const isFunctionDeclarator = (node: SyntaxNodeLike): boolean => { - let current: SyntaxNodeLike | null = node.parent; - while (current) { - if (current.type === "function_declarator") return true; - if (containerTypes.has(current.type)) return false; - current = current.parent; - } - return false; -}; +const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", false); +const GRAPH_FUNCTION_NAME_QUERY = cFunctionNameQuery("name", false); export const C_DEF: LanguageDefinition = { id: "c", @@ -186,7 +114,7 @@ export const C_DEF: LanguageDefinition = { if (parent.type === "struct_specifier" || parent.type === "union_specifier" || parent.type === "enum_specifier") return "class"; if (parent.type === "type_definition" && isInField(node, parent, "declarator")) return "type"; - const container = findAncestor(node, containerTypes); + const container = findAncestor(node, cFamilyContainerTypes); if (container?.type === "function_definition") return "function"; if (container?.type === "declaration" && isFunctionDeclarator(node)) return "function"; return "variable"; diff --git a/src/languages/definitions/cFamily.ts b/src/languages/definitions/cFamily.ts new file mode 100644 index 00000000..80c5668d --- /dev/null +++ b/src/languages/definitions/cFamily.ts @@ -0,0 +1,92 @@ +import type { SyntaxNodeLike } from "../types.js"; + +export const cFamilyContainerTypes = new Set([ + "function_definition", + "declaration", + "parameter_declaration", + "field_declaration", + "type_definition", + "init_declarator", +]); + +const cFamilyParameterListTypes = new Set(["parameter_declaration", "parameter_list"]); + +export function cFunctionNameQuery(captureName: string, includeFieldIdentifier: boolean): string { + const identifierTypes = includeFieldIdentifier ? ["identifier", "field_identifier"] : ["identifier"]; + const patterns: string[] = []; + for (const identifierType of identifierTypes) { + patterns.push(`(function_declarator declarator: (${identifierType}) @${captureName})`); + patterns.push( + `(function_declarator declarator: (pointer_declarator declarator: (${identifierType}) @${captureName}))`, + ); + patterns.push( + `(pointer_declarator declarator: (function_declarator declarator: (${identifierType}) @${captureName}))`, + ); + patterns.push( + `(function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (${identifierType}) @${captureName})))`, + ); + } + return ` + declarator: [ + ${patterns.join("\n ")} + ] +`; +} + +export function isWithin(node: SyntaxNodeLike, ancestor: SyntaxNodeLike | null): boolean { + let current: SyntaxNodeLike | null = node; + while (current) { + if (ancestor && current.id === ancestor.id) return true; + current = current.parent; + } + return false; +} + +export function isInField(node: SyntaxNodeLike, parent: SyntaxNodeLike, field: string): boolean { + return isWithin(node, parent.childForFieldName(field)); +} + +export function findAncestor(node: SyntaxNodeLike, types: Set): SyntaxNodeLike | null { + let current: SyntaxNodeLike | null = node.parent; + while (current) { + if (types.has(current.type)) return current; + current = current.parent; + } + return null; +} + +export function isInParameterList(node: SyntaxNodeLike): boolean { + return !!findAncestor(node, cFamilyParameterListTypes); +} + +function resolveDeclaratorRoot(ancestor: SyntaxNodeLike): SyntaxNodeLike | null { + let declaratorNode = ancestor.childForFieldName("declarator"); + if (!declaratorNode) return null; + if (declaratorNode.type === "init_declarator") { + const inner = declaratorNode.childForFieldName("declarator"); + if (inner) declaratorNode = inner; + } + if (declaratorNode.type === "function_declarator") { + const inner = declaratorNode.childForFieldName("declarator"); + if (inner) declaratorNode = inner; + } + return declaratorNode; +} + +export function isInAncestorDeclarator(node: SyntaxNodeLike, ancestorTypes: Set): boolean { + const ancestor = findAncestor(node, ancestorTypes); + if (!ancestor) return false; + const declaratorNode = resolveDeclaratorRoot(ancestor); + if (!declaratorNode) return false; + return isWithin(node, declaratorNode); +} + +export function isFunctionDeclarator(node: SyntaxNodeLike): boolean { + let current: SyntaxNodeLike | null = node.parent; + while (current) { + if (current.type === "function_declarator") return true; + if (cFamilyContainerTypes.has(current.type)) return false; + current = current.parent; + } + return false; +} diff --git a/src/languages/definitions/cpp.ts b/src/languages/definitions/cpp.ts index 0f719559..aae4559b 100644 --- a/src/languages/definitions/cpp.ts +++ b/src/languages/definitions/cpp.ts @@ -1,98 +1,18 @@ -import type { LanguageDefinition, SyntaxNodeLike } from "../types.js"; +import type { LanguageDefinition } from "../types.js"; import { loadTreeSitterLanguage } from "./loadLanguage.js"; import { registerLanguage } from "../registry.js"; +import { + cFamilyContainerTypes, + cFunctionNameQuery, + findAncestor, + isFunctionDeclarator, + isInAncestorDeclarator, + isInField, + isInParameterList, +} from "./cFamily.js"; -const FUNCTION_NAME_QUERY = ` - declarator: [ - (function_declarator declarator: (identifier) @chunk.name) - (function_declarator declarator: (field_identifier) @chunk.name) - (function_declarator declarator: (pointer_declarator declarator: (identifier) @chunk.name)) - (function_declarator declarator: (pointer_declarator declarator: (field_identifier) @chunk.name)) - (pointer_declarator declarator: (function_declarator declarator: (identifier) @chunk.name)) - (pointer_declarator declarator: (function_declarator declarator: (field_identifier) @chunk.name)) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (identifier) @chunk.name))) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (field_identifier) @chunk.name))) - ] -`; - -const GRAPH_FUNCTION_NAME_QUERY = ` - declarator: [ - (function_declarator declarator: (identifier) @name) - (function_declarator declarator: (field_identifier) @name) - (function_declarator declarator: (pointer_declarator declarator: (identifier) @name)) - (function_declarator declarator: (pointer_declarator declarator: (field_identifier) @name)) - (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)) - (pointer_declarator declarator: (function_declarator declarator: (field_identifier) @name)) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (identifier) @name))) - (function_declarator declarator: (parenthesized_declarator (pointer_declarator declarator: (field_identifier) @name))) - ] -`; - -const isWithin = (node: SyntaxNodeLike, ancestor: SyntaxNodeLike | null): boolean => { - let current: SyntaxNodeLike | null = node; - while (current) { - if (ancestor && current.id === ancestor.id) return true; - current = current.parent; - } - return false; -}; - -const isInField = (node: SyntaxNodeLike, parent: SyntaxNodeLike, field: string): boolean => - isWithin(node, parent.childForFieldName(field)); - -const findAncestor = (node: SyntaxNodeLike, types: Set): SyntaxNodeLike | null => { - let current: SyntaxNodeLike | null = node.parent; - while (current) { - if (types.has(current.type)) return current; - current = current.parent; - } - return null; -}; - -const containerTypes = new Set([ - "function_definition", - "declaration", - "parameter_declaration", - "field_declaration", - "type_definition", - "init_declarator", -]); - -const paramListTypes = new Set(["parameter_declaration", "parameter_list"]); - -const isInParameterList = (node: SyntaxNodeLike): boolean => !!findAncestor(node, paramListTypes); - -const resolveDeclaratorRoot = (ancestor: SyntaxNodeLike): SyntaxNodeLike | null => { - let declaratorNode = ancestor.childForFieldName("declarator"); - if (!declaratorNode) return null; - if (declaratorNode.type === "init_declarator") { - const inner = declaratorNode.childForFieldName("declarator"); - if (inner) declaratorNode = inner; - } - if (declaratorNode.type === "function_declarator") { - const inner = declaratorNode.childForFieldName("declarator"); - if (inner) declaratorNode = inner; - } - return declaratorNode; -}; - -const isInAncestorDeclarator = (node: SyntaxNodeLike, ancestorTypes: Set): boolean => { - const ancestor = findAncestor(node, ancestorTypes); - if (!ancestor) return false; - const declaratorNode = resolveDeclaratorRoot(ancestor); - if (!declaratorNode) return false; - return isWithin(node, declaratorNode); -}; - -const isFunctionDeclarator = (node: SyntaxNodeLike): boolean => { - let current: SyntaxNodeLike | null = node.parent; - while (current) { - if (current.type === "function_declarator") return true; - if (containerTypes.has(current.type)) return false; - current = current.parent; - } - return false; -}; +const FUNCTION_NAME_QUERY = cFunctionNameQuery("chunk.name", true); +const GRAPH_FUNCTION_NAME_QUERY = cFunctionNameQuery("name", true); export const CPP_DEF: LanguageDefinition = { id: "cpp", @@ -206,7 +126,7 @@ export const CPP_DEF: LanguageDefinition = { (parent.type === "type_definition" && isInField(node, parent, "declarator")) ) return "type"; - const container = findAncestor(node, containerTypes); + const container = findAncestor(node, cFamilyContainerTypes); if (container?.type === "function_definition") return "function"; if (container?.type === "declaration" && isFunctionDeclarator(node)) return "function"; return "variable"; diff --git a/src/languages/types.ts b/src/languages/types.ts index f2932bda..3ec167c4 100644 --- a/src/languages/types.ts +++ b/src/languages/types.ts @@ -1,27 +1,8 @@ -import type { JsLanguage } from "../jsFallback.js"; +import type { JsLanguage, JsPoint, JsSyntaxNode } from "../jsFallback.js"; export type { JsLanguage }; -export interface SyntaxPositionLike { - row: number; - column: number; -} - -export interface SyntaxNodeLike { - id?: number; - type: string; - text: string; - startIndex: number; - endIndex: number; - startPosition: SyntaxPositionLike; - endPosition: SyntaxPositionLike; - parent: SyntaxNodeLike | null; - namedChildren: SyntaxNodeLike[]; - previousSibling?: SyntaxNodeLike | null; - previousNamedSibling?: SyntaxNodeLike | null; - child(index: number): SyntaxNodeLike | null; - childForFieldName(fieldName: string): SyntaxNodeLike | null; -} - +export type SyntaxPositionLike = JsPoint; +export type SyntaxNodeLike = JsSyntaxNode; export interface SyntaxTreeLike { rootNode: SyntaxNodeLike & { descendantForIndex(startIndex: number, endIndex: number): SyntaxNodeLike; diff --git a/src/native/execution.ts b/src/native/execution.ts index 19a62453..87cf56c0 100644 --- a/src/native/execution.ts +++ b/src/native/execution.ts @@ -180,16 +180,14 @@ export function getNativeSyntaxTreeExecution( function unavailableQueryExecution(state: Extract): NativeQueryExecution { return { results: null, - fallbackReason: "unavailable", - ...nativeError(state), + ...unavailableNativeFailure(state), }; } function unavailableCompactExecution(state: Extract): CompactImportsExecution { return { results: null, - fallbackReason: "unavailable", - ...nativeError(state), + ...unavailableNativeFailure(state), }; } @@ -198,8 +196,7 @@ function unavailableSingleQueryExecution( ): NativeSingleQueryExecution { return { matches: null, - fallbackReason: "unavailable", - ...nativeError(state), + ...unavailableNativeFailure(state), }; } @@ -208,17 +205,19 @@ function unavailableSyntaxTreeExecution( ): NativeSyntaxTreeExecution { return { tree: null, - fallbackReason: "unavailable", - ...nativeError(state), + ...unavailableNativeFailure(state), }; } -function nativeError(state: Extract): { error?: string } { +function unavailableNativeFailure(state: Extract): { + fallbackReason: "unavailable"; + error?: string; +} { if (!state.error) { - return {}; + return { fallbackReason: "unavailable" }; } if (state.error instanceof Error) { - return { error: state.error.message }; + return { fallbackReason: "unavailable", error: state.error.message }; } - return { error: stringifyUnknown(state.error) }; + return { fallbackReason: "unavailable", error: stringifyUnknown(state.error) }; } diff --git a/src/presets.ts b/src/presets.ts index 189ec817..96fea437 100644 --- a/src/presets.ts +++ b/src/presets.ts @@ -10,6 +10,7 @@ import { type BuildOptions } from "./indexer/types.js"; import type { ImpactOptions } from "./impact/types.js"; +import { isPlainRecord } from "./util/guards.js"; export type PresetName = "code-review" | "ci-fast" | "development" | "production"; @@ -210,7 +211,7 @@ export function mergePreset>(preset: T, custom if (customValue === undefined) continue; // Deep merge for nested objects - if (isPlainObject(customValue) && isPlainObject(presetValue)) { + if (isPlainRecord(customValue) && isPlainRecord(presetValue)) { merged[key] = { ...presetValue, ...customValue }; } else { merged[key] = customValue; @@ -219,7 +220,3 @@ export function mergePreset>(preset: T, custom return merged as T; } - -function isPlainObject(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} diff --git a/src/review/deleted.ts b/src/review/deleted.ts index 7bea1450..6f818eeb 100644 --- a/src/review/deleted.ts +++ b/src/review/deleted.ts @@ -10,13 +10,14 @@ import { collectLocalsAndExportsFromSource } from "../indexer/locals-and-exports import { type ExportEntry, type ImportBinding, type ModuleIndex, type ProjectIndex } from "../indexer/types.js"; import { supportForFile } from "../languages.js"; import type { Edge, FileId } from "../types.js"; +import { edgeKey, toRelativeEdge } from "../util/graphEdges.js"; import { listResolutionCandidates, loadNearestTsconfigFor } from "../util/resolution.js"; import { listWorkspacePackageResolutionCandidates, loadWorkspaceConfig, type WorkspaceConfig, } from "../util/workspace.js"; -import { normalizePath, toProjectDisplayPath } from "../util/paths.js"; +import { normalizePath } from "../util/paths.js"; import type { GraphBuildOptions } from "../graphs/types.js"; const execFileAsync = promisify(execFile); @@ -28,10 +29,6 @@ export type DeletedFileSnapshot = { type ReviewableExportEntry = Exclude; -function relativePath(root: string, file: string): string { - return toProjectDisplayPath(root, file); -} - function normalizeSpecifierBase(fromFile: string, spec: string): string { return normalizePath(path.resolve(path.dirname(fromFile), spec)); } @@ -328,27 +325,6 @@ async function resolveDeletedSnapshotTarget(input: { return { type: "external", name: spec }; } -export function edgeKey(edge: Edge): string { - const toKey = edge.to.type === "file" ? `file:${edge.to.path}` : `external:${edge.to.name}`; - const typeOnly = edge.typeOnly ? "1" : "0"; - return `${edge.from}|${toKey}|${edge.raw}|${typeOnly}`; -} - -export function toRelativeEdge(projectRoot: string, edge: Edge): Edge { - return { - from: relativePath(projectRoot, edge.from), - to: - edge.to.type === "file" - ? { - type: "file", - path: relativePath(projectRoot, edge.to.path), - } - : edge.to, - raw: edge.raw, - ...(edge.typeOnly ? { typeOnly: edge.typeOnly } : {}), - }; -} - export async function collectDeletedImporterEdges( index: ProjectIndex, deletedFiles: readonly string[], diff --git a/src/review/report.ts b/src/review/report.ts index 95424fd8..28ac26c5 100644 --- a/src/review/report.ts +++ b/src/review/report.ts @@ -3,42 +3,16 @@ import type { CandidateTestFile } from "../impact/context.js"; import { type ProjectIndex } from "../indexer/types.js"; import { collectSqlReviewContext, type SqlReviewContext } from "../sql/review.js"; import type { Edge, FileId } from "../types.js"; +import { compareEdges, edgeKey, toRelativeEdge } from "../util/graphEdges.js"; import { normalizePath } from "../util/paths.js"; import { type ProjectFileInfo } from "../util/projectFiles.js"; import type { ReviewDiagnostics, ReviewOptions, ReviewReport } from "../review.js"; -import { - collectDeletedImporterEdges, - collectDeletedSnapshotEdges, - edgeKey, - toRelativeEdge, - type DeletedFileSnapshot, -} from "./deleted.js"; +import { collectDeletedImporterEdges, collectDeletedSnapshotEdges, type DeletedFileSnapshot } from "./deleted.js"; import { buildReviewTasks, computeRiskSummary, hasDiagnostics } from "./risk.js"; import type { ReviewFileSummary } from "./summaries.js"; export const REVIEW_SCHEMA_VERSION = 2; -function comparePaths(left: string, right: string): number { - return left.localeCompare(right); -} - -function compareEdges(left: Edge, right: Edge): number { - const fromCompare = comparePaths(left.from, right.from); - if (fromCompare !== 0) return fromCompare; - if (left.to.type !== right.to.type) { - return left.to.type === "file" ? -1 : 1; - } - const leftTarget = left.to.type === "file" ? left.to.path : left.to.name; - const rightTarget = right.to.type === "file" ? right.to.path : right.to.name; - const toCompare = comparePaths(leftTarget, rightTarget); - if (toCompare !== 0) return toCompare; - const rawCompare = left.raw.localeCompare(right.raw); - if (rawCompare !== 0) return rawCompare; - const leftTypeOnly = left.typeOnly ? 1 : 0; - const rightTypeOnly = right.typeOnly ? 1 : 0; - return leftTypeOnly - rightTypeOnly; -} - export async function collectReviewGraphDelta(input: { projectRoot: string; index: ProjectIndex; diff --git a/src/sql/extractFacts.ts b/src/sql/extractFacts.ts index 6cb4e928..cde9a86b 100644 --- a/src/sql/extractFacts.ts +++ b/src/sql/extractFacts.ts @@ -9,6 +9,7 @@ import { sqlObjectBaseName, sqlParenDepthAt, } from "./lex.js"; +import { sqlObjectLookupKeys } from "./lookup.js"; import type { SqlFactKind, SqlFileRole, SqlStatementFact } from "./types.js"; export { maskSqlStringsAndComments, normalizeSqlObjectName, sqlObjectBaseName } from "./lex.js"; @@ -362,12 +363,6 @@ function findMatchingParen(text: string, openIndex: number): number { return -1; } -function cteNameKeys(name: string): string[] { - const normalized = name.toLowerCase(); - const baseName = sqlObjectBaseName(name).toLowerCase(); - return normalized === baseName ? [normalized] : [normalized, baseName]; -} - function collectCteReads(text: string): { names: Set; facts: SqlFactDraft[] } { const names = new Set(); const facts: SqlFactDraft[] = []; @@ -380,7 +375,7 @@ function collectCteReads(text: string): { names: Set; facts: SqlFactDraf if (sqlParenDepthAt(text, match.index ?? 0) > 0) continue; const name = normalizeSqlObjectName(match[1]); if (!name) continue; - for (const key of cteNameKeys(name)) names.add(key); + for (const key of sqlObjectLookupKeys(name)) names.add(key); const bodyStart = (match.index ?? 0) + match[0].length; const bodyEnd = findMatchingParen(text, bodyStart - 1); if (bodyEnd < 0) continue; @@ -433,7 +428,7 @@ function extractCreateTableConstraintFacts(text: string, tableName: string | nul function extractReadFacts(text: string): SqlFactDraft[] { const cteReads = collectCteReads(text); - const isCteName = (name: string): boolean => cteNameKeys(name).some((key) => cteReads.names.has(key)); + const isCteName = (name: string): boolean => sqlObjectLookupKeys(name).some((key) => cteReads.names.has(key)); const fromObjects = collectCommaSeparatedObjectsAfterKeywords(text, ["from", "using"]).filter( (name) => !isCteName(name), ); diff --git a/src/sql/lookup.ts b/src/sql/lookup.ts new file mode 100644 index 00000000..22eeed4a --- /dev/null +++ b/src/sql/lookup.ts @@ -0,0 +1,16 @@ +import { sqlObjectBaseName } from "./lex.js"; + +export function sqlObjectLookupKeys(name: string): string[] { + const normalized = name.toLowerCase(); + const baseName = sqlObjectBaseName(name).toLowerCase(); + return normalized === baseName ? [normalized] : [normalized, baseName]; +} + +export function pushSqlLookupValue(lookup: Map, key: string, value: T): void { + const existing = lookup.get(key); + if (existing) { + existing.push(value); + return; + } + lookup.set(key, [value]); +} diff --git a/src/sql/navigation.ts b/src/sql/navigation.ts index 3ebf5a79..0e6627dd 100644 --- a/src/sql/navigation.ts +++ b/src/sql/navigation.ts @@ -13,6 +13,7 @@ import type { import type { Range } from "../types.js"; import { normalizePath } from "../util/paths.js"; import { extractSqlFactsFromSource } from "./extractFacts.js"; +import { pushSqlLookupValue } from "./lookup.js"; import { maskSqlStringsAndComments, normalizeSqlObjectName, @@ -59,12 +60,6 @@ function sqlFiles(index: ProjectIndex): string[] { .sort((left, right) => left.localeCompare(right)); } -function pushDefinition(lookup: Map, key: string, definition: SymbolDef): void { - const existing = lookup.get(key); - if (existing) existing.push(definition); - else lookup.set(key, [definition]); -} - function buildSqlDefinitionLookup(index: ProjectIndex): SqlDefinitionLookup { const exact = new Map(); const basename = new Map(); @@ -72,8 +67,8 @@ function buildSqlDefinitionLookup(index: ProjectIndex): SqlDefinitionLookup { const module = index.byFile.get(file); if (!module) continue; for (const local of module.locals) { - pushDefinition(exact, local.localName.toLowerCase(), local); - pushDefinition(basename, sqlObjectBaseName(local.localName).toLowerCase(), local); + pushSqlLookupValue(exact, local.localName.toLowerCase(), local); + pushSqlLookupValue(basename, sqlObjectBaseName(local.localName).toLowerCase(), local); } } return { exact, basename }; diff --git a/src/sql/review.ts b/src/sql/review.ts index 1c19e8de..1acb4c0d 100644 --- a/src/sql/review.ts +++ b/src/sql/review.ts @@ -4,6 +4,7 @@ import { listProjectFiles } from "../util/projectFiles.js"; import { normalizePath } from "../util/paths.js"; import { mapLimit } from "../util/concurrency.js"; import { extractSqlFactsFromSource, sqlObjectBaseName } from "./extractFacts.js"; +import { sqlObjectLookupKeys } from "./lookup.js"; import type { SqlBridgeReason, SqlStatementFact } from "./types.js"; export type SqlReviewContextEntry = { @@ -97,12 +98,6 @@ async function collectChangedSqlLiteralSources(changedFiles: readonly string[]): return sources; } -function objectLookupKeys(name: string): string[] { - const normalized = name.toLowerCase(); - const baseName = sqlObjectBaseName(name).toLowerCase(); - return normalized === baseName ? [normalized] : [normalized, baseName]; -} - function changedSourceObjectMentions(source: string): Set { const mentions = new Set(); const objectRe = /[A-Za-z_][A-Za-z0-9_$]*(?:\s*\.\s*[A-Za-z_][A-Za-z0-9_$]*)*/g; @@ -122,7 +117,7 @@ function collectChangedSqlLiteralObjects( for (const fact of facts) { if (!fact.objectName) continue; const canonicalName = fact.objectName.toLowerCase(); - for (const key of objectLookupKeys(fact.objectName)) { + for (const key of sqlObjectLookupKeys(fact.objectName)) { const existing = objectNamesByKey.get(key); if (existing) existing.add(canonicalName); else objectNamesByKey.set(key, new Set([canonicalName])); diff --git a/src/sql/sourceGraph.ts b/src/sql/sourceGraph.ts index a16af2a7..276e921a 100644 --- a/src/sql/sourceGraph.ts +++ b/src/sql/sourceGraph.ts @@ -8,6 +8,7 @@ import type { Edge, Range } from "../types.js"; import { normalizePath } from "../util/paths.js"; import { mapLimit } from "../util/concurrency.js"; import { extractSqlFactsFromSource, sqlObjectBaseName } from "./extractFacts.js"; +import { pushSqlLookupValue } from "./lookup.js"; import type { SqlFactKind, SqlStatementFact } from "./types.js"; const SQL_FACT_READ_CONCURRENCY = 32; @@ -99,12 +100,6 @@ function referenceObjectNames(fact: SqlStatementFact): string[] { return Array.from(new Set(names)); } -function pushDefinition(lookup: Map, key: string, fact: SqlStatementFact): void { - const existing = lookup.get(key); - if (existing) existing.push(fact); - else lookup.set(key, [fact]); -} - function uniqueFacts(candidates: readonly SqlStatementFact[]): SqlStatementFact[] { const unique: SqlStatementFact[] = []; const seen = new Set(); @@ -149,8 +144,8 @@ function addDefinition( ): void { if (!fact.objectName) return; const keys = definitionKeys(fact.objectName); - pushDefinition(definitionsByExactName, keys.exact, fact); - pushDefinition(definitionsByBaseName, keys.base, fact); + pushSqlLookupValue(definitionsByExactName, keys.exact, fact); + pushSqlLookupValue(definitionsByBaseName, keys.base, fact); } function sqlEdgesForCandidates( diff --git a/src/util/graphEdges.ts b/src/util/graphEdges.ts new file mode 100644 index 00000000..936fc7a7 --- /dev/null +++ b/src/util/graphEdges.ts @@ -0,0 +1,42 @@ +import type { Edge } from "../types.js"; +import { toProjectDisplayPath } from "./paths.js"; + +export function edgeKey(edge: Edge): string { + const toKey = edge.to.type === "file" ? `file:${edge.to.path}` : `external:${edge.to.name}`; + const typeOnly = edge.typeOnly ? "1" : "0"; + return `${edge.from}|${toKey}|${edge.raw}|${typeOnly}`; +} + +export function compareEdges(left: Edge, right: Edge): number { + const fromCompare = left.from.localeCompare(right.from); + if (fromCompare) return fromCompare; + if (left.to.type !== right.to.type) { + return left.to.type === "file" ? -1 : 1; + } + const leftTo = left.to.type === "file" ? left.to.path : left.to.name; + const rightTo = right.to.type === "file" ? right.to.path : right.to.name; + const toCompare = leftTo.localeCompare(rightTo); + if (toCompare) return toCompare; + const rawCompare = left.raw.localeCompare(right.raw); + if (rawCompare) return rawCompare; + const leftTypeOnly = left.typeOnly ? 1 : 0; + const rightTypeOnly = right.typeOnly ? 1 : 0; + return leftTypeOnly - rightTypeOnly; +} + +export function toRelativeEdge(projectRoot: string, edge: Edge): Edge { + const from = toProjectDisplayPath(projectRoot, edge.from); + let to = edge.to; + if (edge.to.type === "file") { + to = { + type: "file", + path: toProjectDisplayPath(projectRoot, edge.to.path), + }; + } + return { + from, + to, + raw: edge.raw, + ...(edge.typeOnly ? { typeOnly: edge.typeOnly } : {}), + }; +} diff --git a/src/util/guards.ts b/src/util/guards.ts new file mode 100644 index 00000000..90ed3597 --- /dev/null +++ b/src/util/guards.ts @@ -0,0 +1,3 @@ +export function isPlainRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} diff --git a/src/util/projectFiles/parsers.ts b/src/util/projectFiles/parsers.ts index bf254263..03a6f67c 100644 --- a/src/util/projectFiles/parsers.ts +++ b/src/util/projectFiles/parsers.ts @@ -1,12 +1,10 @@ +import { isPlainRecord } from "../guards.js"; + export function trimToNull(value: string | null | undefined): string | null { const trimmed = value?.trim(); return trimmed ? trimmed : null; } -function isPlainRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - export function parseJsonName(raw: string): string | null { try { const data: unknown = JSON.parse(raw); @@ -80,8 +78,7 @@ export function parseIniName(raw: string, section: string, key: string): string } export function parseSetupPyName(raw: string): string | null { - const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); - return trimToNull(match?.[1]); + return parseAssignedStringName(raw); } export function parsePomName(raw: string): string | null { @@ -141,6 +138,10 @@ export function parseGoModuleName(raw: string): string | null { } export function parseGemspecName(raw: string): string | null { + return parseAssignedStringName(raw); +} + +function parseAssignedStringName(raw: string): string | null { const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); return trimToNull(match?.[1]); } diff --git a/src/util/resolution.ts b/src/util/resolution.ts index bea156af..fd8e2140 100644 --- a/src/util/resolution.ts +++ b/src/util/resolution.ts @@ -3,7 +3,7 @@ import fsp from "node:fs/promises"; import path from "node:path"; import { GRAPH_ONLY_RESOLUTION_EXTENSIONS } from "./graphOnlyExtensions.js"; import { normalizePath, normalizeResolutionHints } from "./paths.js"; -import { DEFAULT_RESOLUTION_EXTENSIONS, listResolutionCandidates } from "./resolutionCandidates.js"; +import { DEFAULT_RESOLUTION_EXTENSIONS, getResolutionExtensions } from "./resolutionCandidates.js"; import { clearWorkspaceCaches, clearFileExistsCache, @@ -13,6 +13,7 @@ import { type WorkspaceConfig, } from "./workspace.js"; import { clearJvmResolutionCaches, resolveJavaImportPath, resolveKotlinImportPath } from "./resolution/jvm.js"; +import { findFirstExistingResolutionCandidate } from "./resolution/findFirstExisting.js"; import { resolveGoImportPath } from "./resolution/go.js"; import { resolveFromNodeModules } from "./resolution/node.js"; import { clearPhpResolutionCaches, getPhpComposerImplicitFiles, resolvePhpImportPath } from "./resolution/php.js"; @@ -52,11 +53,6 @@ const GRAPH_ONLY_LANGUAGE_SOURCE_RESOLUTION_EXTENSIONS: Record { - for (const candidate of listResolutionCandidates(base, resolutionExtensions)) { - if (await fileExists(candidate)) { - return path.resolve(candidate); - } - } - return null; -} - async function findFirstExistingScssPartialCandidate(base: string): Promise { const basename = path.basename(base); if (!basename || basename.startsWith("_")) return null; diff --git a/src/util/resolution/findFirstExisting.ts b/src/util/resolution/findFirstExisting.ts new file mode 100644 index 00000000..6f844214 --- /dev/null +++ b/src/util/resolution/findFirstExisting.ts @@ -0,0 +1,15 @@ +import path from "node:path"; +import { listResolutionCandidates } from "../resolutionCandidates.js"; +import { fileExists } from "../workspace.js"; + +export async function findFirstExistingResolutionCandidate( + base: string, + resolutionExtensions?: readonly string[], +): Promise { + for (const candidate of listResolutionCandidates(base, resolutionExtensions)) { + if (await fileExists(candidate)) { + return path.resolve(candidate); + } + } + return null; +} diff --git a/src/util/resolution/node.ts b/src/util/resolution/node.ts index d9f9e432..feae0191 100644 --- a/src/util/resolution/node.ts +++ b/src/util/resolution/node.ts @@ -1,18 +1,6 @@ import path from "node:path"; -import { listResolutionCandidates } from "../resolutionCandidates.js"; -import { directoryExists, fileExists, loadJSON, type MinimalPackageJson } from "../workspace.js"; - -async function findFirstExistingResolutionCandidate( - base: string, - resolutionExtensions?: readonly string[], -): Promise { - for (const candidate of listResolutionCandidates(base, resolutionExtensions)) { - if (await fileExists(candidate)) { - return path.resolve(candidate); - } - } - return null; -} +import { findFirstExistingResolutionCandidate } from "./findFirstExisting.js"; +import { directoryExists, loadJSON, type MinimalPackageJson } from "../workspace.js"; export async function resolveFromNodeModules( spec: string, diff --git a/src/util/resolution/php.ts b/src/util/resolution/php.ts index 0d9ffb4c..159dfd5c 100644 --- a/src/util/resolution/php.ts +++ b/src/util/resolution/php.ts @@ -1,9 +1,8 @@ import fsp from "node:fs/promises"; import path from "node:path"; import { stringifyUnknown } from "../ast.js"; -import { listResolutionCandidates } from "../resolutionCandidates.js"; import { mapLimitSemaphore } from "../concurrency.js"; -import { fileExists } from "../workspace.js"; +import { findFirstExistingResolutionCandidate } from "./findFirstExisting.js"; import { clearPhpComposerResolutionCaches, findPhpComposerPath, @@ -23,18 +22,6 @@ import { type FileId = string; -async function findFirstExistingResolutionCandidate( - base: string, - resolutionExtensions?: readonly string[], -): Promise { - for (const candidate of listResolutionCandidates(base, resolutionExtensions)) { - if (await fileExists(candidate)) { - return path.resolve(candidate); - } - } - return null; -} - async function resolvePhpPathLikeSpecifier( projectRoot: string, fromFile: string, diff --git a/src/util/resolution/phpComposer.ts b/src/util/resolution/phpComposer.ts index 6f3bd903..8c12d9ff 100644 --- a/src/util/resolution/phpComposer.ts +++ b/src/util/resolution/phpComposer.ts @@ -2,8 +2,8 @@ import fsp from "node:fs/promises"; import path from "node:path"; import { normalizePath } from "../paths.js"; import { listProjectFiles } from "../projectFiles.js"; -import { listResolutionCandidates } from "../resolutionCandidates.js"; import { fileExists } from "../workspace.js"; +import { findFirstExistingResolutionCandidate } from "./findFirstExisting.js"; import { findNearestFile } from "./files.js"; export type PhpComposerConfig = { @@ -24,18 +24,6 @@ type PhpComposerAutoloadRoot = { namespacePrefixes: string[]; }; -async function findFirstExistingResolutionCandidate( - base: string, - resolutionExtensions?: readonly string[], -): Promise { - for (const candidate of listResolutionCandidates(base, resolutionExtensions)) { - if (await fileExists(candidate)) { - return path.resolve(candidate); - } - } - return null; -} - function readComposerNamespaceDirs(value: unknown, composerDir: string): Map { const result = new Map(); if (!value || typeof value !== "object") { diff --git a/src/util/resolutionCandidates.ts b/src/util/resolutionCandidates.ts index 4a1e8a74..dd2f7d01 100644 --- a/src/util/resolutionCandidates.ts +++ b/src/util/resolutionCandidates.ts @@ -50,7 +50,7 @@ const EXPLICIT_SPECIFIER_EXTENSION_FAMILIES: Record = ".cjs": [".cts", ".cjs"], }; -function getResolutionExtensions(resolutionExtensions?: readonly string[]): string[] { +export function getResolutionExtensions(resolutionExtensions?: readonly string[]): string[] { const extensions = resolutionExtensions === undefined ? DEFAULT_RESOLUTION_EXTENSIONS : resolutionExtensions; return Array.from(new Set(extensions)); } diff --git a/src/util/workspace.ts b/src/util/workspace.ts index 72d5edb2..ee70220c 100644 --- a/src/util/workspace.ts +++ b/src/util/workspace.ts @@ -6,32 +6,30 @@ import path from "node:path"; import fg from "fast-glob"; import { listResolutionCandidates } from "./resolutionCandidates.js"; -export async function fileExists(p: string): Promise { - const cached = fileExistsCache.get(p); +async function pathMatchesCachedStat( + cache: Map, + pathName: string, + matches: (stat: Awaited>) => boolean, +): Promise { + const cached = cache.get(pathName); if (cached !== undefined) return cached; try { - const stat = await fsp.stat(p); - const exists = stat.isFile(); - fileExistsCache.set(p, exists); + const stat = await fsp.stat(pathName); + const exists = matches(stat); + cache.set(pathName, exists); return exists; } catch { - fileExistsCache.set(p, false); + cache.set(pathName, false); return false; } } +export async function fileExists(p: string): Promise { + return await pathMatchesCachedStat(fileExistsCache, p, (stat) => stat.isFile()); +} + export async function directoryExists(p: string): Promise { - const cached = directoryExistsCache.get(p); - if (cached !== undefined) return cached; - try { - const stat = await fsp.stat(p); - const exists = stat.isDirectory(); - directoryExistsCache.set(p, exists); - return exists; - } catch { - directoryExistsCache.set(p, false); - return false; - } + return await pathMatchesCachedStat(directoryExistsCache, p, (stat) => stat.isDirectory()); } async function findWorkspaceRoot(startDir: string): Promise { diff --git a/tests/agent-explain.test.ts b/tests/agent-explain.test.ts index 9a851dcc..61024a51 100644 --- a/tests/agent-explain.test.ts +++ b/tests/agent-explain.test.ts @@ -1,10 +1,10 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { execFileSync } from "node:child_process"; import { describe, expect, it } from "vitest"; import { explainCodegraphTarget } from "../src/agent/explain.js"; import { searchCodegraph } from "../src/agent/search.js"; +import { runGit } from "./helpers/git.js"; async function mkRepo(): Promise { const root = await fs.mkdtemp(path.join(os.tmpdir(), "cg-agent-explain-")); @@ -263,7 +263,3 @@ describe("agent explain", () => { expect(typeof candidate?.confidence).toBe("string"); }); }); - -function runGit(cwd: string, args: string[]): string { - return execFileSync("git", args, { cwd, encoding: "utf8" }); -} diff --git a/tests/agent-search.test.ts b/tests/agent-search.test.ts index d89dc406..ebef259f 100644 --- a/tests/agent-search.test.ts +++ b/tests/agent-search.test.ts @@ -2,11 +2,13 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { describe, expect, it } from "vitest"; -import { createAgentSession, type AgentProjectSnapshot, type AgentSession } from "../src/agent/session.js"; +import { createAgentSession } from "../src/agent/session.js"; import { searchCodegraph, searchCodegraphWithSession } from "../src/agent/search.js"; import type { SymbolEdge, SymbolGraph, SymbolNode } from "../src/graphs.js"; import { SymbolKind, type ModuleIndex, type ProjectIndex, type SymbolDef } from "../src/indexer/types.js"; import type { Edge, Graph, Range } from "../src/types.js"; +import { countingSession } from "./helpers/agent.js"; +import { isSymlinkUnavailable } from "./helpers/filesystem.js"; async function mkRepo(): Promise { const root = await fs.mkdtemp(path.join(os.tmpdir(), "cg-agent-search-")); @@ -35,27 +37,6 @@ async function mkRepo(): Promise { return root; } -function countingSession(session: AgentSession): { session: AgentSession; loads: () => number } { - let cached: Promise | undefined; - let loadCount = 0; - return { - session: { - loadProject: async () => { - if (!cached) { - loadCount += 1; - cached = session.loadProject(); - } - return await cached; - }, - invalidate: () => { - cached = undefined; - session.invalidate(); - }, - }, - loads: () => loadCount, - }; -} - function oneLineRange(index: number): Range { return { start: { line: 1, column: 1, index }, @@ -404,11 +385,3 @@ describe("agent search", () => { expect(response.results).toEqual([]); }); }); - -function isSymlinkUnavailable(error: unknown): boolean { - return ( - error instanceof Error && - "code" in error && - (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") - ); -} diff --git a/tests/artifact-build.test.ts b/tests/artifact-build.test.ts index 71fddd7b..4538649e 100644 --- a/tests/artifact-build.test.ts +++ b/tests/artifact-build.test.ts @@ -3,28 +3,9 @@ import os from "node:os"; import path from "node:path"; import { describe, expect, it } from "vitest"; import { buildCodegraphArtifact, buildCodegraphArtifactWithSession } from "../src/agent/artifact.js"; -import { createAgentSession, type AgentProjectSnapshot, type AgentSession } from "../src/agent/session.js"; - -function countingSession(session: AgentSession): { session: AgentSession; loads: () => number } { - let cached: Promise | undefined; - let loadCount = 0; - return { - session: { - loadProject: async () => { - if (!cached) { - loadCount += 1; - cached = session.loadProject(); - } - return await cached; - }, - invalidate: () => { - cached = undefined; - session.invalidate(); - }, - }, - loads: () => loadCount, - }; -} +import { createAgentSession } from "../src/agent/session.js"; +import { countingSession } from "./helpers/agent.js"; +import { isSymlinkUnavailable } from "./helpers/filesystem.js"; describe("artifact build", () => { it("writes sqlite, graph JSON, optional report, questions, and manifest from real project logic", async () => { @@ -252,11 +233,3 @@ describe("artifact build", () => { expect(graph.graph.files.some((file) => file.includes("linked") || file.includes("secret.ts"))).toBe(false); }); }); - -function isSymlinkUnavailable(error: unknown): boolean { - return ( - error instanceof Error && - "code" in error && - (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") - ); -} diff --git a/tests/cache-invalidation.test.ts b/tests/cache-invalidation.test.ts index 9db3a67a..686f74be 100644 --- a/tests/cache-invalidation.test.ts +++ b/tests/cache-invalidation.test.ts @@ -4,7 +4,6 @@ import os from "node:os"; import fsp from "node:fs/promises"; import fs from "node:fs"; import { DatabaseSync } from "node:sqlite"; -import { spawnSync } from "node:child_process"; import { buildProjectIndex, buildProjectIndexIncremental, type BuildReport } from "../src/index.js"; import * as indexer from "../src/indexer.js"; import { MANIFEST_VERSION, summarizeBuildOptions, writeManifest, type IndexManifest } from "../src/indexer/build-cache.js"; @@ -20,6 +19,7 @@ import { } from "../src/util.js"; import * as util from "../src/util.js"; import * as filePrep from "../src/languages/filePrep.js"; +import { runGit } from "./helpers/git.js"; async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); @@ -65,19 +65,6 @@ function createManifest(root: string): IndexManifest { }; } -function runGit(root: string, args: string[]): string { - const result = spawnSync("git", args, { - cwd: root, - env: process.env, - encoding: "utf8", - }); - if (result.error) throw result.error; - if (result.status !== 0) { - throw new Error(`git ${args.join(" ")} failed (${result.status}): ${result.stderr}`); - } - return result.stdout.trim(); -} - describe("Cache invalidation and strict hashing", () => { it("retries transient manifest write contention without warning after recovery", async () => { const root = await mkTmpDir("dg-manifest-retry-"); diff --git a/tests/cli-regressions.test.ts b/tests/cli-regressions.test.ts index 66f06e8d..e1284edc 100644 --- a/tests/cli-regressions.test.ts +++ b/tests/cli-regressions.test.ts @@ -2,12 +2,13 @@ import { describe, it, expect } from "vitest"; import os from "node:os"; import path from "node:path"; import fsp from "node:fs/promises"; -import { execFileSync, spawn } from "node:child_process"; +import { spawn } from "node:child_process"; import { pathToFileURL } from "node:url"; import { textGrep } from "../src/index.js"; import { isCliDiscoveryRelativePathInside, runCli } from "../src/cli.js"; import { getSkillTargetDirForAgent, type SkillInstallAgent } from "../src/cli/skill.js"; import packageJson from "../package.json" with { type: "json" }; +import { runGit as git } from "./helpers/git.js"; const tsxCliPath = path.resolve(process.cwd(), "node_modules", "tsx", "dist", "cli.mjs"); const sourceCliPath = path.resolve(process.cwd(), "src", "cli.ts"); @@ -1423,20 +1424,6 @@ async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); } -function git(cwd: string, args: string[]): string { - return execFileSync("git", args, { - cwd, - encoding: "utf8", - env: { - ...process.env, - GIT_AUTHOR_NAME: "Codegraph Test", - GIT_AUTHOR_EMAIL: "codegraph@example.test", - GIT_COMMITTER_NAME: "Codegraph Test", - GIT_COMMITTER_EMAIL: "codegraph@example.test", - }, - }).trim(); -} - function initGitRepo(root: string): void { git(root, ["init"]); git(root, ["symbolic-ref", "HEAD", "refs/heads/main"]); diff --git a/tests/duplicates.test.ts b/tests/duplicates.test.ts index 831128b3..e09354f2 100644 --- a/tests/duplicates.test.ts +++ b/tests/duplicates.test.ts @@ -77,14 +77,80 @@ export function normalizeInvoiceRows(rows: Array<{ amount: number; tax: number } const index = await buildProjectIndex(root); const result = await findDuplicates(index, { minConfidence: "high", limit: 5 }); - expect(result.suggestions.length).toBeGreaterThan(0); - expect(result.suggestions[0]?.cloneType).toBe("exact"); - expect(result.suggestions[0]?.confidence).toBe("high"); - expect(result.suggestions[0]?.left.file).toBe("src/a.ts"); - expect(result.suggestions[0]?.right.file).toBe("src/b.ts"); + expect(result.schemaVersion).toBe(2); + expect(result.groups.length).toBeGreaterThan(0); + expect(result.groups[0]?.cloneType).toBe("exact"); + expect(result.groups[0]?.confidence).toBe("high"); + expect(result.groups[0]?.primaryLeft.file).toBe("src/a.ts"); + expect(result.groups[0]?.primaryRight.file).toBe("src/b.ts"); }); - test("returns bounded suggestions with omission counts", async () => { + test("groups overlapping symbol and chunk variants into one finding", async () => { + const root = await makeTempProject(); + const duplicateSource = ` +export function normalizeInvoiceRows(rows: Array<{ amount: number; tax: number }>) { + const totals: number[] = []; + const labels: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + labels.push(label); + totals.push(rounded); + } + const encoded = totals.map((value, index) => labels[index] + ":" + value.toFixed(2)); + return encoded.filter((value) => value.includes(":")).join(","); +} +`; + + await writeProjectFile(root, "src/a.ts", duplicateSource); + await writeProjectFile(root, "src/b.ts", duplicateSource); + + const index = await buildProjectIndex(root); + const result = await findDuplicates(index, { includeRawPairs: true, minConfidence: "high", limit: 5 }); + + expect(result.groups).toHaveLength(1); + expect(result.groups[0]?.variantCount).toBeGreaterThan(1); + expect(result.groups[0]?.primaryLeft.kind).toBe("symbol"); + expect(result.groups[0]?.primaryRight.kind).toBe("symbol"); + expect(result.suggestions?.length).toBeGreaterThan(result.groups.length); + }); + + test("omits raw unit pairs unless requested", async () => { + const root = await makeTempProject(); + const source = ` +export function summarizePayments(rows: Array<{ amount: number; fee: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.fee; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); +} +`; + + await writeProjectFile(root, "src/a.ts", source); + await writeProjectFile(root, "src/b.ts", source); + + const index = await buildProjectIndex(root); + const defaultResult = await findDuplicates(index, { minConfidence: "high", limit: 5 }); + const rawResult = await findDuplicates(index, { includeRawPairs: true, minConfidence: "high", limit: 5 }); + const defaultGroup = defaultResult.groups[0]; + const rawGroup = rawResult.groups[0]; + + expect(defaultResult.groups.length).toBeGreaterThan(0); + expect(defaultResult.suggestions).toBeUndefined(); + expect(defaultResult.omittedCounts.rawSuggestions).toBeGreaterThan(0); + expect(defaultGroup?.rawPairCount).toBeGreaterThanOrEqual(defaultGroup?.variantCount ?? 0); + expect(defaultGroup?.omittedVariantCount).toBe((defaultGroup?.rawPairCount ?? 0) - (defaultGroup?.variantCount ?? 0)); + expect(rawResult.suggestions?.length).toBeGreaterThan(0); + expect(rawGroup?.rawPairCount).toBe(rawGroup?.variantCount); + expect(rawGroup?.omittedVariantCount).toBe(0); + }); + + test("returns bounded groups with omission counts", async () => { const root = await makeTempProject(); const source = ` export function summarizePayments(rows: Array<{ amount: number; fee: number }>) { @@ -106,7 +172,8 @@ export function summarizePayments(rows: Array<{ amount: number; fee: number }>) const index = await buildProjectIndex(root); const result = await findDuplicates(index, { minConfidence: "high", limit: 1 }); - expect(result.suggestions).toHaveLength(1); + expect(result.groups).toHaveLength(1); + expect(result.omittedCounts.groups).toBeGreaterThan(0); expect(result.omittedCounts.suggestions).toBeGreaterThan(0); expect(result.stats.candidatePairs).toBeGreaterThan(0); expect(result.stats.comparedPairs).toBeGreaterThan(0); @@ -156,8 +223,8 @@ export function scoreAccounts(accounts: Array<{ enabled: boolean; credits: numbe const index = await buildProjectIndex(root); const result = await findDuplicates(index, { minConfidence: "medium", limit: 5 }); - const match = result.suggestions.find( - (suggestion) => suggestion.left.file === "src/accounts.ts" || suggestion.right.file === "src/accounts.ts", + const match = result.groups.find( + (group) => group.primaryLeft.file === "src/accounts.ts" || group.primaryRight.file === "src/accounts.ts", ); expect(match).toBeDefined(); @@ -199,7 +266,7 @@ export function sharedName(input: string): string { minConfidence: "high", }); - expect(result.suggestions).toHaveLength(0); + expect(result.groups).toHaveLength(0); }); test("filters small helpers unless explicitly included", async () => { @@ -213,9 +280,9 @@ export function sharedName(input: string): string { const defaultResult = await findDuplicates(index, { minConfidence: "low" }); const includedResult = await findDuplicates(index, { includeSmall: true, minConfidence: "high" }); - expect(defaultResult.suggestions).toHaveLength(0); + expect(defaultResult.groups).toHaveLength(0); expect(defaultResult.omittedCounts.belowThresholdUnits).toBeGreaterThan(0); - expect(includedResult.suggestions.length).toBeGreaterThan(0); + expect(includedResult.groups.length).toBeGreaterThan(0); }); test("rejects invalid token bounds", async () => { @@ -267,7 +334,7 @@ export function sharedClone(rows) { const index = await buildProjectIndex(root); const result = await findDuplicates(index, { includeSmall: true, minConfidence: "high" }); - expect(result.suggestions).toHaveLength(0); + expect(result.groups).toHaveLength(0); }); test("duplicates CLI detects duplicate JSON text files", async () => { @@ -292,15 +359,15 @@ export function sharedClone(rows) { root, ); const parsed = JSON.parse(result.stdout) as { - suggestions?: Array<{ left?: { file?: string; tokenCount?: number }; right?: { file?: string } }>; + groups?: Array<{ primaryLeft?: { file?: string; tokenCount?: number }; primaryRight?: { file?: string } }>; }; expect(result.exitCode).toBeUndefined(); expect(result.stderr).toBe(""); - expect(parsed.suggestions).toHaveLength(1); - expect(parsed.suggestions?.[0]?.left?.file).toBe("configs/a.json"); - expect(parsed.suggestions?.[0]?.right?.file).toBe("configs/b.json"); - expect(parsed.suggestions?.[0]?.left?.tokenCount).toBeGreaterThan(40); + expect(parsed.groups).toHaveLength(1); + expect(parsed.groups?.[0]?.primaryLeft?.file).toBe("configs/a.json"); + expect(parsed.groups?.[0]?.primaryRight?.file).toBe("configs/b.json"); + expect(parsed.groups?.[0]?.primaryLeft?.tokenCount).toBeGreaterThan(40); }); test("accepts project-relative file filters", async () => { @@ -318,8 +385,8 @@ export function sharedClone(rows) { minConfidence: "high", }); - expect(result.suggestions.length).toBeGreaterThan(0); - expect(result.suggestions[0]?.left.file).toBe("src/a.ts"); + expect(result.groups.length).toBeGreaterThan(0); + expect(result.groups[0]?.primaryLeft.file).toBe("src/a.ts"); }); test("rejects duplicate file filters outside the project root", async () => { @@ -371,13 +438,60 @@ export function secondClone(rows: number[]) { minConfidence: "medium", }); - expect(defaultResult.suggestions).toHaveLength(0); - expect(sameFileResult.suggestions.length).toBeGreaterThan(0); - expect(sameFileResult.suggestions[0]?.left.file).toBe("src/local.ts"); - expect(sameFileResult.suggestions[0]?.right.file).toBe("src/local.ts"); + expect(defaultResult.groups).toHaveLength(0); + expect(sameFileResult.groups.length).toBeGreaterThan(0); + expect(sameFileResult.groups[0]?.primaryLeft.file).toBe("src/local.ts"); + expect(sameFileResult.groups[0]?.primaryRight.file).toBe("src/local.ts"); }); - test("duplicates CLI emits bounded JSON suggestions", async () => { + test("does not collapse same-file inner clones into self-duplicate groups", async () => { + const root = await makeTempProject(); + const source = ` +export class InvoiceNormalizer { + normalizeDomestic(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } + + normalizeInternational(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); + } +} +`; + + await writeProjectFile(root, "src/local-class.ts", source); + + const index = await buildProjectIndex(root); + const result = await findDuplicates(index, { + includeSameFile: true, + minConfidence: "high", + limit: 10, + }); + + const sameFileGroups = result.groups.filter((group) => group.primaryLeft.file === group.primaryRight.file); + expect(sameFileGroups.length).toBeGreaterThan(0); + for (const group of sameFileGroups) { + expect({ startLine: group.primaryLeft.startLine, endLine: group.primaryLeft.endLine }).not.toEqual({ + startLine: group.primaryRight.startLine, + endLine: group.primaryRight.endLine, + }); + } + }); + + test("duplicates CLI emits bounded JSON groups", async () => { const root = await makeTempProject(); const source = ` export function summarizeOrders(rows: Array<{ amount: number; tax: number }>) { @@ -396,12 +510,44 @@ export function summarizeOrders(rows: Array<{ amount: number; tax: number }>) { await writeProjectFile(root, "src/orders-b.ts", source); const result = await captureCli(["duplicates", "src", "--min-confidence", "high", "--limit", "1"], root); - const parsed = JSON.parse(result.stdout) as { suggestions?: Array<{ score?: number }> }; + const parsed = JSON.parse(result.stdout) as { groups?: Array<{ score?: number }> }; expect(result.exitCode).toBeUndefined(); expect(result.stderr).toBe(""); - expect(parsed.suggestions).toHaveLength(1); - expect(parsed.suggestions?.[0]?.score).toBeGreaterThan(90); + expect(parsed.groups).toHaveLength(1); + expect(parsed.groups?.[0]?.score).toBeGreaterThan(90); + }); + + test("duplicates CLI includes raw suggestions only with --raw-pairs", async () => { + const root = await makeTempProject(); + const source = ` +export function summarizeOrders(rows: Array<{ amount: number; tax: number }>) { + const output: string[] = []; + for (const row of rows) { + const subtotal = row.amount + row.tax; + const rounded = Math.round(subtotal * 100) / 100; + const label = rounded > 100 ? "large" : "small"; + output.push(label + ":" + rounded.toFixed(2)); + } + return output.filter((value) => value.includes(":")).join(","); +} +`; + + await writeProjectFile(root, "src/orders-a.ts", source); + await writeProjectFile(root, "src/orders-b.ts", source); + + const defaultResult = await captureCli(["duplicates", "src", "--min-confidence", "high", "--limit", "5"], root); + const rawResult = await captureCli( + ["duplicates", "src", "--min-confidence", "high", "--limit", "5", "--raw-pairs"], + root, + ); + const defaultParsed = JSON.parse(defaultResult.stdout) as { suggestions?: unknown[] }; + const rawParsed = JSON.parse(rawResult.stdout) as { suggestions?: unknown[] }; + + expect(defaultResult.exitCode).toBeUndefined(); + expect(rawResult.exitCode).toBeUndefined(); + expect(defaultParsed.suggestions).toBeUndefined(); + expect(rawParsed.suggestions?.length).toBeGreaterThan(0); }); test("duplicates CLI accepts a zero suggestion limit", async () => { @@ -421,14 +567,15 @@ export function sameRows(rows: number[]) { const result = await captureCli(["duplicates", "--root", ".", "src", "--limit", "0", "--include-small"], root); const parsed = JSON.parse(result.stdout) as { - suggestions?: unknown[]; - omittedCounts?: { suggestions?: number; candidatePairs?: number }; + groups?: unknown[]; + omittedCounts?: { groups?: number; suggestions?: number; candidatePairs?: number }; stats?: { candidatePairs?: number }; }; expect(result.exitCode).toBeUndefined(); expect(result.stderr).toBe(""); - expect(parsed.suggestions).toHaveLength(0); + expect(parsed.groups).toHaveLength(0); + expect(parsed.omittedCounts?.groups).toBeGreaterThan(0); expect(parsed.omittedCounts?.suggestions).toBeGreaterThan(0); expect(parsed.omittedCounts?.candidatePairs).toBeUndefined(); expect(parsed.stats?.candidatePairs).toBeGreaterThan(0); @@ -466,8 +613,8 @@ export function sameRows(rows: number[]) { expect(result.omittedCounts.oversizedBuckets).toBeGreaterThan(0); expect( - result.suggestions.some( - (suggestion) => suggestion.left.file === "src/a.txt" && suggestion.right.file === "src/b.txt", + result.groups.some( + (group) => group.primaryLeft.file === "src/a.txt" && group.primaryRight.file === "src/b.txt", ), ).toBeTruthy(); }); @@ -498,7 +645,7 @@ export function sharedOversizedClone(rows) { minConfidence: "high", }); - expect(result.suggestions).toHaveLength(0); + expect(result.groups).toHaveLength(0); expect(result.omittedCounts.oversizedBuckets).toBeGreaterThan(0); }); }); diff --git a/tests/git-diff-semantics.test.ts b/tests/git-diff-semantics.test.ts index 6819a28f..ca6b07a1 100644 --- a/tests/git-diff-semantics.test.ts +++ b/tests/git-diff-semantics.test.ts @@ -2,12 +2,8 @@ import { describe, it, expect } from "vitest"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { execFileSync } from "node:child_process"; import { listChangedFiles, getUnifiedDiff } from "../src/util.js"; - -function git(cwd: string, args: string[]): string { - return execFileSync("git", args, { cwd, encoding: "utf8" }).trim(); -} +import { runGit as git } from "./helpers/git.js"; function makeGitTempDir(prefix: string): Promise { return fs.mkdtemp(path.join(os.tmpdir(), prefix)); diff --git a/tests/graph-delta.test.ts b/tests/graph-delta.test.ts index ac0ce4cc..7b9c0c73 100644 --- a/tests/graph-delta.test.ts +++ b/tests/graph-delta.test.ts @@ -2,9 +2,9 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; import os from "node:os"; import fsp from "node:fs/promises"; -import { spawnSync } from "node:child_process"; import { buildProjectIndex, buildGraphDelta } from "../src/index.js"; import type { IndexManifest } from "../src/indexer/build-cache.js"; +import { runGit } from "./helpers/git.js"; async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); @@ -25,19 +25,6 @@ function manifestPathFor(root: string): string { return path.join(root, ".codegraph-cache", "index-v1", "manifest.json"); } -function runGit(root: string, args: string[]): string { - const result = spawnSync("git", args, { - cwd: root, - env: process.env, - encoding: "utf8", - }); - if (result.error) throw result.error; - if (result.status !== 0) { - throw new Error(`git ${args.join(" ")} failed (${result.status}): ${result.stderr}`); - } - return result.stdout.trim(); -} - describe("Graph delta export", () => { it("reports added and removed edges for changed files", async () => { const root = await mkTmpDir("dg-graph-delta-"); diff --git a/tests/helpers/agent.ts b/tests/helpers/agent.ts new file mode 100644 index 00000000..78bc59f2 --- /dev/null +++ b/tests/helpers/agent.ts @@ -0,0 +1,22 @@ +import type { AgentProjectSnapshot, AgentSession } from "../../src/agent/session.js"; + +export function countingSession(session: AgentSession): { session: AgentSession; loads: () => number } { + let cached: Promise | undefined; + let loadCount = 0; + return { + session: { + loadProject: async () => { + if (!cached) { + loadCount += 1; + cached = session.loadProject(); + } + return await cached; + }, + invalidate: () => { + cached = undefined; + session.invalidate(); + }, + }, + loads: () => loadCount, + }; +} diff --git a/tests/helpers/filesystem.ts b/tests/helpers/filesystem.ts new file mode 100644 index 00000000..3a4a7fa8 --- /dev/null +++ b/tests/helpers/filesystem.ts @@ -0,0 +1,7 @@ +export function isSymlinkUnavailable(error: unknown): boolean { + return ( + error instanceof Error && + "code" in error && + (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") + ); +} diff --git a/tests/helpers/git.ts b/tests/helpers/git.ts new file mode 100644 index 00000000..51c3dfd9 --- /dev/null +++ b/tests/helpers/git.ts @@ -0,0 +1,20 @@ +import { spawnSync } from "node:child_process"; + +export function runGit(root: string, args: string[]): string { + const result = spawnSync("git", args, { + cwd: root, + env: { + ...process.env, + GIT_AUTHOR_NAME: process.env.GIT_AUTHOR_NAME ?? "Codegraph Test", + GIT_AUTHOR_EMAIL: process.env.GIT_AUTHOR_EMAIL ?? "codegraph@example.test", + GIT_COMMITTER_NAME: process.env.GIT_COMMITTER_NAME ?? "Codegraph Test", + GIT_COMMITTER_EMAIL: process.env.GIT_COMMITTER_EMAIL ?? "codegraph@example.test", + }, + encoding: "utf8", + }); + if (result.error) throw result.error; + if (result.status !== 0) { + throw new Error(`git ${args.join(" ")} failed (${result.status}): ${result.stderr}`); + } + return result.stdout.trim(); +} diff --git a/tests/impact-cli.test.ts b/tests/impact-cli.test.ts index 4a66e657..b5a5093c 100644 --- a/tests/impact-cli.test.ts +++ b/tests/impact-cli.test.ts @@ -1,9 +1,10 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; -import { spawn, spawnSync } from "node:child_process"; +import { spawn } from "node:child_process"; import os from "node:os"; import fsp from "node:fs/promises"; import { runCli } from "../src/cli.js"; +import { runGit } from "./helpers/git.js"; const tsxCliPath = path.resolve(process.cwd(), "node_modules", "tsx", "dist", "cli.mjs"); const codegraphCliPath = path.resolve(process.cwd(), "src", "cli.ts"); @@ -27,19 +28,6 @@ index 1234567..abcdef0 100644 +export const IMPACT_FLAG = "impact"; `; -function runGit(root: string, args: string[]): string { - const result = spawnSync("git", args, { - cwd: root, - env: process.env, - encoding: "utf8", - }); - if (result.error) throw result.error; - if (result.status !== 0) { - throw new Error(`git ${args.join(" ")} failed (${result.status}): ${result.stderr}`); - } - return result.stdout.trim(); -} - function stdinForImpactCli(opts?: { stdin?: string }): string { if (opts && Object.hasOwn(opts, "stdin")) return opts.stdin ?? ""; return impactDiff; diff --git a/tests/impact-git-provider.test.ts b/tests/impact-git-provider.test.ts index 38b01e73..c6d67d81 100644 --- a/tests/impact-git-provider.test.ts +++ b/tests/impact-git-provider.test.ts @@ -2,8 +2,8 @@ import { afterEach, describe, expect, it } from "vitest"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { execFileSync } from "node:child_process"; import { getDiff } from "../src/impact/providers/base.js"; +import { runGit as git } from "./helpers/git.js"; const gitRepoRoots: string[] = []; @@ -13,20 +13,6 @@ afterEach(() => { } }); -function git(cwd: string, args: string[]): string { - return execFileSync("git", args, { - cwd, - encoding: "utf8", - env: { - ...process.env, - GIT_AUTHOR_NAME: "Codegraph Test", - GIT_AUTHOR_EMAIL: "codegraph@example.test", - GIT_COMMITTER_NAME: "Codegraph Test", - GIT_COMMITTER_EMAIL: "codegraph@example.test", - }, - }).trim(); -} - function writeFile(root: string, relativePath: string, text: string): void { const filePath = path.join(root, relativePath); fs.mkdirSync(path.dirname(filePath), { recursive: true }); diff --git a/tests/impact-streaming.test.ts b/tests/impact-streaming.test.ts index bb88b7b8..bc03ff23 100644 --- a/tests/impact-streaming.test.ts +++ b/tests/impact-streaming.test.ts @@ -2,7 +2,6 @@ import { describe, it, expect } from "vitest"; import path from "node:path"; import os from "node:os"; import fsp from "node:fs/promises"; -import { execFileSync } from "node:child_process"; import { analyzeImpactFromDiff, analyzeImpactStreaming, @@ -11,25 +10,12 @@ import { } from "../src/impact/index.js"; import { impactItemEmissionKey } from "../src/impact/streaming.js"; import { buildProjectIndex } from "../src/index.js"; +import { runGit as git } from "./helpers/git.js"; async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); } -function git(root: string, args: string[]): string { - return execFileSync("git", args, { - cwd: root, - encoding: "utf8", - env: { - ...process.env, - GIT_AUTHOR_NAME: "Codegraph Test", - GIT_AUTHOR_EMAIL: "codegraph@example.test", - GIT_COMMITTER_NAME: "Codegraph Test", - GIT_COMMITTER_EMAIL: "codegraph@example.test", - }, - }).trim(); -} - async function firstStreamError(stream: AsyncGenerator): Promise { for await (const chunk of stream) { if (chunk.type === "error") { diff --git a/tests/mcp-server.test.ts b/tests/mcp-server.test.ts index a9abc1dd..f3f53344 100644 --- a/tests/mcp-server.test.ts +++ b/tests/mcp-server.test.ts @@ -3,29 +3,10 @@ import { request as httpRequest } from "node:http"; import os from "node:os"; import path from "node:path"; import { describe, expect, it, vi } from "vitest"; -import { createAgentSession, type AgentProjectSnapshot, type AgentSession } from "../src/agent/session.js"; +import { createAgentSession } from "../src/agent/session.js"; import { createCodegraphMcpHandlers, listCodegraphMcpTools, startCodegraphMcpHttpServer } from "../src/mcp/server.js"; - -function countingSession(session: AgentSession): { session: AgentSession; loads: () => number } { - let cached: Promise | undefined; - let loadCount = 0; - return { - session: { - loadProject: async () => { - if (!cached) { - loadCount += 1; - cached = session.loadProject(); - } - return await cached; - }, - invalidate: () => { - cached = undefined; - session.invalidate(); - }, - }, - loads: () => loadCount, - }; -} +import { countingSession } from "./helpers/agent.js"; +import { isSymlinkUnavailable } from "./helpers/filesystem.js"; type JsonRpcObject = { jsonrpc?: unknown; @@ -720,11 +701,3 @@ describe("codegraph MCP handlers", () => { function normalizeSqlitePath(value: unknown): string { return typeof value === "string" ? value.replace(/\\/g, "/") : ""; } - -function isSymlinkUnavailable(error: unknown): boolean { - return ( - error instanceof Error && - "code" in error && - (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") - ); -} diff --git a/tests/package-metadata.test.ts b/tests/package-metadata.test.ts index 8329cd62..5a25f2c6 100644 --- a/tests/package-metadata.test.ts +++ b/tests/package-metadata.test.ts @@ -314,6 +314,15 @@ describe("package metadata", () => { expect(scripts.prepare).toBe("node ./scripts/prepare-package.mjs"); }); + it("keeps npm test on the fast suite and benchmark coverage opt-in", () => { + const rootPackage = readJson("package.json"); + const scripts = readStringRecord(rootPackage.scripts); + + expect(scripts.test).toBe("npm run test:fast"); + expect(scripts["test:fast"]).toContain("--exclude tests/bench-harness.test.ts"); + expect(scripts["test:bench"]).toContain("tests/bench-harness.test.ts"); + }); + it("exposes opt-in JavaScript and native HTML coverage reporting", () => { const rootPackage = readJson("package.json"); const scripts = readStringRecord(rootPackage.scripts); diff --git a/tests/project-file-discovery.test.ts b/tests/project-file-discovery.test.ts index a6399786..a03656ca 100644 --- a/tests/project-file-discovery.test.ts +++ b/tests/project-file-discovery.test.ts @@ -6,6 +6,7 @@ import { buildProjectIndex, listProjectFiles, discoverProjectFiles } from "../sr import { DEFAULT_PROJECT_MANIFESTS } from "../src/util.js"; import { isRelativePathInside, translateGlobRootIgnoreGlobsForScanRoot } from "../src/util/projectFiles.js"; import { parseDotnetName, parseGoModuleName, parsePomName, parseTomlName } from "../src/util/projectFiles/parsers.js"; +import { isSymlinkUnavailable } from "./helpers/filesystem.js"; const normalize = (value: string) => value.replace(/\\/g, "/"); @@ -687,11 +688,3 @@ describe("project file discovery", () => { expect(discovered.map(normalize)).toContain(normalize(appFile)); }); }); - -function isSymlinkUnavailable(error: unknown): boolean { - return ( - error instanceof Error && - "code" in error && - (error.code === "EPERM" || error.code === "EACCES" || error.code === "ENOTSUP") - ); -} diff --git a/tests/review.test.ts b/tests/review.test.ts index 8684c5da..59c66d0c 100644 --- a/tests/review.test.ts +++ b/tests/review.test.ts @@ -3,30 +3,17 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import fsp from "node:fs/promises"; -import { spawnSync } from "node:child_process"; import { buildProjectIndex, buildProjectIndexFromFiles, buildReviewReport } from "../src/index.js"; import * as indexerBuild from "../src/indexer/build-index.js"; import * as indexerNavigation from "../src/indexer/navigation.js"; import type { IncrementalBuildOptions, SymbolDef } from "../src/indexer/types.js"; import * as impactMap from "../src/impact/map.js"; +import { runGit } from "./helpers/git.js"; async function mkTmpDir(prefix: string): Promise { return await fsp.mkdtemp(path.join(os.tmpdir(), prefix)); } -function runGit(root: string, args: string[]): string { - const result = spawnSync("git", args, { - cwd: root, - env: process.env, - encoding: "utf8", - }); - if (result.error) throw result.error; - if (result.status !== 0) { - throw new Error(`git ${args.join(" ")} failed (${result.status}): ${result.stderr}`); - } - return result.stdout.trim(); -} - function normalize(file: string): string { return file.replace(/\\/g, "/"); }