diff --git a/CLAUDE.md b/CLAUDE.md index e65ef88..5d06e14 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -59,7 +59,7 @@ make help # List all targets cmd/contextception/ CLI entrypoint cmd/gen-schema/ JSON schema generator internal/ - analyzer/ Core analysis engine (scoring, categorization, cycles) + analyzer/ Core analysis engine (scoring, categorization, risk triage, cycles) change/ PR/branch diff analysis classify/ File role classification cli/ Command handlers (cobra subcommands) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c5308b4..e1b8a58 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,16 +31,30 @@ make lint # Run golangci-lint ``` cmd/contextception/ CLI entrypoint +cmd/gen-schema/ JSON schema generator internal/ - analyzer/ Core analysis engine + analyzer/ Core analysis engine (scoring, categorization, risk triage) change/ PR/branch diff analysis - cli/ Command handlers - config/ Configuration parsing - db/ SQLite database layer + classify/ File role classification + cli/ Command handlers (cobra subcommands) + config/ Configuration parsing (per-repo + global) + db/ SQLite database layer (migrations, store, search) extractor/ Language-specific extractors (python, typescript, golang, java, rust) - git/ Git history signals - indexer/ Incremental indexing + git/ Git history signal extraction + grader/ Internal quality evaluation framework + history/ Historical analysis, usage tracking, and feedback storage + indexer/ Incremental indexing pipeline + mcpserver/ MCP server (tools, stdio transport) + model/ Shared data types resolver/ Module resolution (per-language) + session/ Claude Code session parser (discover, adoption) + update/ Version check, self-update, install method detection + validation/ Fixture-based validation framework + version/ Version injection (set via ldflags) +protocol/ JSON Schema specifications +schema/ Go types for schema generation +integrations/ MCP config examples and slash commands +testdata/ Test fixtures (synthetic repos + expected outputs) ``` ## Adding a New Language diff --git a/README.md b/README.md index 9bd8f9c..510bd63 100644 --- a/README.md +++ b/README.md @@ -172,13 +172,47 @@ $ contextception analyze-change origin/main Returns a PR-level impact report with: +- **Per-file risk scoring:** every changed file gets a risk score (0–100) and tier (SAFE/REVIEW/TEST/CRITICAL) +- **Risk triage:** files grouped by tier with human-readable risk narratives explaining why each file is flagged - **Test gaps:** changed files with no test coverage, flagged before merge +- **Test suggestions:** auto-generated recommendations for high-risk untested files (which test file to create, what to test) - **Coupling detection:** pairs of changed files that depend on each other - **Hidden coupling:** co-change partners *not in your diff* that may need updating -- **Per-file blast radius:** which specific changes carry the most risk +- **Aggregate risk:** overall PR risk score with percentile ranking against historical baselines (after 10+ analyses) - **Aggregated must_read:** merged context across all changed files -Use `--ci --fail-on high` to gate PRs automatically. Results are stored in a local history database, enabling trend tracking with `contextception history`: +### Risk Tiers + +| Tier | Score | Meaning | +|------|-------|---------| +| **SAFE** | 0–20 | New files, well-tested utilities, low coupling | +| **REVIEW** | 21–50 | Moderate risk, standard code review sufficient | +| **TEST** | 51–75 | High risk, targeted testing recommended | +| **CRITICAL** | 76–100 | Maximum risk, regressions likely without careful review | + +Risk scores combine change status, structural factors (importer count, co-change frequency, fragility, mutual dependencies), and test coverage adjustments. + +### Token-optimized output + +Use `--compact` for a text summary optimized for LLM consumption (~60–75% fewer tokens than JSON): + +```bash +$ contextception analyze-change --compact +``` + +### CI integration + +Use `--ci --fail-on` to gate PRs automatically. A risk badge is printed to stderr: + +```bash +# Fail on high blast radius +contextception analyze-change --ci --fail-on high + +# Fail only if risk triage has CRITICAL files +contextception analyze-change --ci --fail-on critical +``` + +Results are stored in a local history database, enabling trend tracking with `contextception history`: ```bash $ contextception history hotspots # Files that repeatedly appear as hotspots @@ -249,7 +283,20 @@ Contextception averages ~1,000 tokens per analysis vs. Repomix's full-repo outpu ## MCP Setup (30 seconds) -Make your AI agent smarter. Add to your `~/.claude.json` (Claude Code) or equivalent MCP config: +Make your AI agent smarter. The `setup` command auto-detects your editor and configures everything: + +```bash +# Claude Code (MCP server + hooks + slash commands) +contextception setup + +# Cursor or Windsurf +contextception setup --editor cursor +contextception setup --editor windsurf +``` + +Use `--dry-run` to preview changes, or `--uninstall` to reverse. + +Or configure manually — add to your `~/.claude.json` (Claude Code) or equivalent MCP config: ```json { @@ -273,11 +320,22 @@ This exposes nine tools to the AI agent: | `get_entrypoints` | Return entrypoint and foundation files for project orientation | | `get_structure` | Return directory structure with file counts and language distribution | | `get_archetypes` | Detect representative files across architectural layers | -| `analyze_change` | Analyze the impact of a git diff / PR (blast radius, test gaps, coupling) | +| `analyze_change` | Analyze the impact of a git diff / PR (risk scoring, triage, test gaps, coupling) | | `rate_context` | Rate how useful a previous `get_context` result was (feedback for accuracy tracking) | Works with **Claude Code**, **Cursor**, **Windsurf**, and any MCP-compatible tool. +### Slash Commands + +Two built-in slash commands for AI-assisted PR review (installed automatically by `contextception setup`): + +| Command | Description | +|---------|-------------| +| `/pr-risk` | Run risk analysis on the current branch and present an actionable, human-friendly review | +| `/pr-fix` | Analyze risk, then build an ordered plan to fix every issue found (test gaps, coupling, fragility) | + +These work by combining contextception's deterministic risk analysis with the LLM's ability to explain and translate — contextception computes the scores, the LLM presents them in plain language. See [`integrations/`](integrations/) for setup details. + --- ## Language Support @@ -320,7 +378,7 @@ contextception session Show contextception adoption across Clau | `--mode plan\|implement\|review` | Shape output for AI workflow stage | | `--token-budget N` | Cap output to fit token limits | | `--compact` | Token-optimized text summary (~60-75% fewer tokens than JSON) | -| `--ci --fail-on high\|medium` | Exit codes for CI pipelines | +| `--ci --fail-on high\|medium\|critical` | Exit codes for CI pipelines | | `--cap N` | Limit must_read entries (overflow to related) | | `--no-external` | Exclude external dependencies | | `--no-update-check` | Disable automatic update version check | diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index fe3cf70..d3fa660 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -119,11 +119,23 @@ The historical cap prevents co-change signals from overwhelming structural evide Analyzes the impact of a git diff (PR or branch): 1. Diffs `base..head` to find changed files -2. Analyzes each changed file independently -3. Detects coupling between changed files (structural edges) -4. Identifies test gaps (changed files with no test coverage) -5. Surfaces hidden coupling (co-change partners not in the diff) -6. Aggregates blast radius across all changed files +2. Analyzes each changed file independently (full per-file AnalysisOutput) +3. Computes per-file risk scores (0--100) with tier classification (SAFE/REVIEW/TEST/CRITICAL) +4. Detects coupling between changed files (structural edges) +5. Identifies test gaps (changed files with no test coverage) +6. Surfaces hidden coupling (co-change partners not in the diff) +7. Aggregates blast radius and risk triage across all changed files +8. Generates test suggestions for high-risk untested files + +### Risk Scoring Engine (`internal/analyzer/risk.go`) + +Per-file risk scoring for change analysis. Formula: `base_score + structural_risk * coverage_multiplier`, clamped to [0, 100]. + +- **Base score**: added=10 (20 with exports), modified=30, deleted=5, renamed=5 +- **Structural risk**: normalized importer count, co-change frequency, fragility (Ce/(Ca+Ce)), mutual deps, cycles +- **Coverage adjustment**: direct tests ×0.7, dependency tests ×0.85, no tests ×1.2 +- **Evidence gating**: same-package siblings filtered unless they have import edges, co-change ≥2, or prefix match +- **Percentile ranking**: stored in `history.sqlite` `risk_scores` table, computed after 10+ records ### Database Layer (`internal/db/`) diff --git a/docs/features.md b/docs/features.md index 675340b..6e88b8f 100644 --- a/docs/features.md +++ b/docs/features.md @@ -98,6 +98,57 @@ Where Ce = files the subject imports (outdegree), Ca = files that import the sub --- +## Risk Triage + +The `analyze-change` command assigns a per-file **risk score** (0--100) and groups files into tiers: + +| Tier | Score Range | Meaning | +|------|-------------|---------| +| `SAFE` | 0--20 | New files, well-tested utilities, low coupling | +| `REVIEW` | 21--50 | Moderate risk, standard code review sufficient | +| `TEST` | 51--75 | High risk, targeted testing recommended | +| `CRITICAL` | 76--100 | Maximum risk, regressions likely without careful review | + +### Risk Score Formula + +The score combines four components: + +1. **Base score** by change status: added=10 (20 with exports), modified=30, deleted=5, renamed=5 +2. **Structural risk** (modified files only): normalized importer count, co-change frequency, fragility, mutual dependencies, circular dependencies +3. **Coverage adjustment**: direct tests ×0.7, dependency tests ×0.85, no tests ×1.2 +4. **Clamp** to [0, 100] + +### Per-file fields + +| Field | Type | Description | +|-------|------|-------------| +| `risk_score` | int | Computed risk score (0--100) | +| `risk_tier` | string | `SAFE`, `REVIEW`, `TEST`, or `CRITICAL` | +| `risk_factors` | []string | Factors contributing to the score | +| `risk_narrative` | string | Human-readable risk explanation | + +### Report-level fields + +| Field | Type | Description | +|-------|------|-------------| +| `risk_triage` | object | Files grouped by tier (critical, test, review, safe) | +| `aggregate_risk.score` | int | Max per-file score across the PR | +| `aggregate_risk.percentile` | int | Percentile vs. historical scores (after 10+ analyses) | +| `aggregate_risk.regression_risk` | string | Summary of regression risk from critical files | +| `aggregate_risk.test_coverage_ratio` | float | Ratio of changed files with direct tests | +| `test_suggestions` | []object | Suggested tests for high-risk untested files | + +### Evidence-Gated Same-Package Filtering + +Same-package siblings (Go, Java, Rust) are only included in `must_read` if they have structural evidence: +- Direct import/call edge +- Co-change frequency >= 2 +- Filename prefix match + +This reduces noise in large packages where most siblings are irrelevant. + +--- + ## Hotspot Detection Identifies files that are both high-churn AND structural bottlenecks: @@ -370,19 +421,24 @@ contextception session Show adoption across Claude Code session | `--signatures` | false | Include code signatures for must_read symbols | | `--stable-threshold` | adaptive | Indegree threshold for the stable flag | | `--ci` | false | CI mode: suppress output, exit code reflects blast radius | -| `--fail-on` | high | Blast radius level that triggers non-zero exit (`high` or `medium`) | +| `--fail-on` | high | Trigger non-zero exit: `high`, `medium`, or `critical` (risk triage) | | `--mode` | (none) | Workflow mode: `plan`, `implement`, or `review` | | `--token-budget` | 0 | Target token budget (auto-adjusts caps) | | `--compact` | false | Token-optimized text summary (~60-75% fewer tokens than JSON) | ### CI mode -When `--ci` is set, output is suppressed and the exit code reflects blast radius: +When `--ci` is set, output is suppressed and the exit code reflects blast radius. A risk badge is also printed to stderr: + +``` +contextception: main..HEAD blast_radius=high files=27 +RISK: 72/100 | 1 CRITICAL | 2 TEST | 5 REVIEW | 19 SAFE +``` | Exit code | Meaning | |-----------|---------| | 0 | Blast radius below threshold | -| 1 | Medium blast radius (with `--fail-on medium`) | +| 1 | Medium blast radius (with `--fail-on medium`) or CRITICAL files (with `--fail-on critical`) | | 2 | High blast radius | ```bash @@ -391,6 +447,9 @@ contextception analyze-change --ci --fail-on high # Fail on medium or high contextception analyze-change --ci --fail-on medium + +# Fail only if risk triage has CRITICAL files +contextception analyze-change --ci --fail-on critical ``` ### Workflow modes diff --git a/integrations/README.md b/integrations/README.md index 268a1fb..2c5f119 100644 --- a/integrations/README.md +++ b/integrations/README.md @@ -38,7 +38,10 @@ contextception setup --editor cursor contextception setup --editor windsurf ``` -Use `--dry-run` to preview changes, or `--uninstall` to reverse. For Claude Code, this also installs hooks that remind the AI to call `get_context` before editing files. +Use `--dry-run` to preview changes, or `--uninstall` to reverse. For Claude Code, this installs: +- MCP server configuration +- PreToolUse hooks that remind the AI to call `get_context` before editing files +- `/pr-risk` and `/pr-fix` slash commands for AI-assisted PR review ## Manual Configuration @@ -151,6 +154,17 @@ All integrations expose the same nine tools: Contextception supports repositories using: Python, TypeScript/JavaScript, Go, Java, Rust. +## Slash Commands + +Two slash commands are included for AI-assisted PR review. These are installed automatically by `contextception setup` for Claude Code. + +| Command | File | Description | +|---------|------|-------------| +| `/pr-risk` | [`claude-code/pr-risk.md`](claude-code/pr-risk.md) | Run risk analysis and present a human-friendly review with verdicts, test coverage, and next steps | +| `/pr-fix` | [`claude-code/pr-fix.md`](claude-code/pr-fix.md) | Analyze risk, then build an ordered fix plan for every issue (test gaps, coupling, fragility) | + +For Cursor/Windsurf, place the command files in `.cursor/rules/` or `.windsurf/rules/` respectively. For other agents, see [`pr-risk-review.md`](pr-risk-review.md) for the full prompt template. + ## Further Reading - [MCP Tutorial](../docs/mcp-tutorial.md) — step-by-step guide to adding context intelligence to any AI agent diff --git a/integrations/claude-code/pr-fix.md b/integrations/claude-code/pr-fix.md new file mode 100644 index 0000000..a58158f --- /dev/null +++ b/integrations/claude-code/pr-fix.md @@ -0,0 +1,107 @@ +Analyze a PR or current branch for risk, then build a plan to fix every issue found. + +Use: `/pr-fix` (current branch vs main) or `/pr-fix 123` (PR #123) + +## Instructions + +### Step 1: Get the diff + +If the user provided a PR number: + +``` +gh pr view --json baseRefName,headRefName,url,title +``` + +Extract the base and head refs, then run: + +``` +contextception analyze-change --json .. +``` + +If no PR number was provided (current branch mode): + +``` +contextception analyze-change --json +``` + +If contextception is not installed, tell the user to install it and stop. + +### Step 2: Present the Risk Assessment + +Follow the same format as /pr-risk: + +1. One-sentence verdict based on aggregate_risk.score +2. Files that need attention (REVIEW/TEST/CRITICAL tiers only) +3. Safe files (one-line count) +4. Test coverage assessment + +But keep this section brief — the plan is the main event. + +### Step 3: Build the Fix Plan + +This is the core value. Analyze every issue found and create a concrete, ordered plan to resolve them. + +For each issue category, generate specific fix tasks: + +**Test gaps** (from test_gaps and test_suggestions): +- For each high-risk file without tests, suggest exactly what test file to create and what to test +- Use the risk_factors and coupling data to know what behaviors matter +- Example: "Create handler_test.go — test the new analytics wiring by verifying RecordUsage is called with correct parameters" + +**Coupling risks** (from coupling where direction is "mutual"): +- For mutual dependencies, suggest whether to break the cycle or add integration tests +- Example: "server.go and tools.go have a mutual dependency — add a test that exercises the full MCP request path through both files" + +**High-fragility files** (from risk_factors containing "fragility"): +- For files with high fragility (many imports, few importers), suggest interface boundaries or reducing imports +- Only suggest this for files scoring in TEST or CRITICAL tiers — don't over-optimize REVIEW files + +**Missing coverage for foundation files** (files with many importers): +- If a file has 5+ importers and no direct tests, prioritize testing it +- Example: "history.go has 10 importers — add migration tests to verify schema changes are backward-compatible" + +### Step 4: Order the Plan + +Sort tasks by impact: +1. Test gaps for CRITICAL/TEST tier files (highest value — prevents regressions) +2. Test gaps for foundation files (high importer count) +3. Coupling fixes (mutual deps, circular deps) +4. Everything else + +Number each task. Keep descriptions actionable — "Create X file that tests Y behavior" not "Consider adding tests." + +### Step 5: Present the Plan + +Format: + +``` +## Fix Plan (N tasks) + +Addressing these will move the PR from [CURRENT LEVEL] to lower risk. + +1. **[File]**: [What to do and why] +2. **[File]**: [What to do and why] +... +``` + +### Step 6: Offer to Execute + +After presenting the plan, ask: + +"Want me to start working through this plan? I'll tackle them in order and check each fix as I go." + +If the user says yes, work through the tasks one at a time: +- Before each task, say what you're about to do +- After each task, run `contextception analyze-change --json` again to verify the score improved +- Report progress: "Task 1/N complete — risk score dropped from X to Y" + +### Rules + +- Never show raw JSON or risk scores to the user +- Translate all technical factors into plain language +- Keep the assessment section under 15 lines — the plan is the focus +- Each plan task should be specific enough to execute without further research +- Don't suggest fixes for SAFE files +- Don't suggest architectural refactors for REVIEW files — save those for CRITICAL/TEST +- If the PR is all SAFE files, say "This PR looks clean — no fixes needed" and stop +- When in PR mode, note the PR title and URL for context diff --git a/integrations/claude-code/pr-risk.md b/integrations/claude-code/pr-risk.md new file mode 100644 index 0000000..a7a2e62 --- /dev/null +++ b/integrations/claude-code/pr-risk.md @@ -0,0 +1,63 @@ +Run a risk analysis on the current branch and present an actionable review. + +## Instructions + +Run this command to get the raw risk data: + +``` +contextception analyze-change --json +``` + +If contextception is not installed or the command fails, tell the user to install it (`brew install kehoej/tap/contextception` or `go install github.com/kehoej/contextception/cmd/contextception@latest`) and stop. + +If the command succeeds, parse the JSON output and present a review following this structure: + +### 1. One-Sentence Verdict + +Start with a single sentence that answers: "Is this PR safe to merge?" + +Use plain language based on the aggregate risk score: +- Score 0-20: "This PR is low risk — safe to merge with standard review." +- Score 21-50: "This PR has moderate risk — a few files deserve closer attention." +- Score 51-75: "This PR is high risk — targeted testing recommended before merging." +- Score 76-100: "This PR has critical risk — careful review required, regressions likely without it." + +### 2. Files That Need Attention + +Only show files in the REVIEW, TEST, or CRITICAL tiers. Skip SAFE files entirely. + +For each file worth discussing, explain in plain language: +- **What it does** (infer from the file path and name) +- **Why it's flagged** (translate risk_factors into human language — don't say "fragility 0.83", say "this file imports many packages but few things depend on it, making it sensitive to upstream changes") +- **What to check** (specific advice: "verify the output format didn't change", "check that the new flag is handled in all code paths", etc.) + +Group related files together. If 5 CLI handlers all have the same risk profile, summarize them as a group instead of listing each one. + +### 3. What's Safe to Skip + +One line: "N files are low risk (new files with tests, documentation updates, etc.) — no special attention needed." + +### 4. Test Coverage Assessment + +If test_coverage_ratio < 0.5, flag it: "Less than half the changed code files have direct tests." +If there are test_gaps, name the most important untested files (not all of them). +If there are test_suggestions, present them as actionable items. + +### 5. Offer Next Steps + +End with concrete options the user can choose: + +- "Want me to look at [highest-risk file] more closely?" +- "Should I check [file with test gap] for missing test coverage?" +- "Want me to review the coupling between [coupled files]?" +- "Should I run the tests to verify nothing broke?" + +Only offer options that make sense for the specific results. Don't offer to check test coverage if coverage is already good. + +### Formatting Rules + +- Never show raw JSON, risk scores, or factor lists to the user +- Never say "risk_score", "risk_tier", "risk_factors", or "fragility" — translate everything into plain language +- Keep the entire review under 40 lines +- Use the file's basename (not full path) when it's unambiguous +- If the PR is all SAFE files, keep the review to 3-4 lines total diff --git a/integrations/pr-risk-review.md b/integrations/pr-risk-review.md new file mode 100644 index 0000000..e240229 --- /dev/null +++ b/integrations/pr-risk-review.md @@ -0,0 +1,128 @@ +# /pr-risk — AI-Powered PR Risk Review + +A slash command / skill that runs contextception's `analyze-change` and presents the results as a conversational, actionable review instead of raw data. + +Works with: Claude Code, Cursor, Windsurf, Codex, or any LLM-powered editor that supports custom prompts or slash commands. + +## Setup + +### Claude Code + +Add to your project's `.claude/commands/pr-risk.md`: + +```markdown +Run a risk analysis on the current branch and present an actionable review. + +## Instructions + +Run this command to get the raw risk data: + +``` +contextception analyze-change --json +``` + +If contextception is not installed, tell the user to install it and stop. + +If the command succeeds, parse the JSON output and present a review following this structure: + +### 1. One-Sentence Verdict + +Start with a single sentence that answers: "Is this PR safe to merge?" + +Use plain language based on the aggregate risk score: +- Score 0-20: "This PR is low risk — safe to merge with standard review." +- Score 21-50: "This PR has moderate risk — a few files deserve closer attention." +- Score 51-75: "This PR is high risk — targeted testing recommended before merging." +- Score 76-100: "This PR has critical risk — careful review required, regressions likely without it." + +### 2. Files That Need Attention + +Only show files in the REVIEW, TEST, or CRITICAL tiers. Skip SAFE files entirely. + +For each file worth discussing, explain in plain language: +- **What it does** (infer from the file path and name) +- **Why it's flagged** (translate risk_factors into human language — don't say "fragility 0.83", say "this file imports many packages but few things depend on it, making it sensitive to upstream changes") +- **What to check** (specific advice: "verify the output format didn't change", "check that the new flag is handled in all code paths", etc.) + +Group related files together. If 5 CLI handlers all have the same risk profile, summarize them as a group instead of listing each one. + +### 3. What's Safe to Skip + +One line: "N files are low risk (new files with tests, documentation updates, etc.) — no special attention needed." + +### 4. Test Coverage Assessment + +If test_coverage_ratio < 0.5, flag it: "Less than half the changed code files have direct tests." +If there are test_gaps, name the most important untested files (not all of them). +If there are test_suggestions, present them as actionable items. + +### 5. Offer Next Steps + +End with concrete options the user can choose: + +- "Want me to look at [highest-risk file] more closely?" +- "Should I check [file with test gap] for missing test coverage?" +- "Want me to review the coupling between [coupled files]?" +- "Should I run the tests to verify nothing broke?" + +Only offer options that make sense for the specific results. Don't offer to check test coverage if coverage is already good. + +### Formatting Rules + +- Never show raw JSON, risk scores, or factor lists to the user +- Never say "risk_score", "risk_tier", "risk_factors", or "fragility" — translate everything +- Keep the entire review under 40 lines +- Use the file's basename (not full path) when it's unambiguous +- If the PR is all SAFE files, keep the review to 3 lines total +``` + +### Cursor / Windsurf + +Add the same content to your `.cursor/rules/pr-risk.md` or `.windsurf/rules/pr-risk.md`. The LLM will pick it up as a custom rule. + +Alternatively, paste the instructions section into your editor's custom instructions or system prompt. + +### Codex / Other Agents + +Add to your agent's system prompt or AGENTS.md: + +``` +When asked to review a PR for risk, run `contextception analyze-change --json` and follow the pr-risk review format from integrations/pr-risk-review.md. +``` + +## Example Output + +Here's what the LLM would present to a user after running `/pr-risk` on a 28-file PR: + +--- + +**This PR has moderate risk — a few files deserve closer attention.** + +**Files to review:** + +The MCP server and CLI analysis handlers are the highest-risk area. `tools.go`, `analyze.go`, and `server.go` all have mutual dependencies and circular coupling with each other. They're sensitive to upstream changes since they import many packages. Check that the new analytics wiring doesn't change the output format that existing MCP consumers expect. + +`history.go` is a foundation file — 10 other files import it. The changes here are relatively safe (low fragility, good test coverage), but because so many things depend on it, verify the migration schema is backward-compatible. + +`root.go`, `analyze_change.go`, and `hook_context.go` are standard CLI wiring changes — lower risk since they have test coverage. + +**19 files are low risk** (new CLI commands with tests, documentation updates) — no special attention needed. + +**Test coverage:** 57% of changed code files have direct tests. `analyze.go` and `tools.go` don't have dedicated test files, though they're covered by `cli_test.go` and `server_test.go` respectively. + +**Want me to:** +- Look at the coupling between `tools.go` and `server.go` more closely? +- Check `history.go`'s schema migration for backward compatibility? +- Run the test suite to verify nothing broke? + +--- + +## How It Works + +The skill is purely a prompt — it tells the LLM how to interpret contextception's JSON output and present it as a human-friendly review. The deterministic analysis (risk scoring, triage, coupling detection) is done by contextception. The LLM's job is translation and presentation. + +``` +contextception (deterministic) → JSON risk data → LLM (interpretation) → human-friendly review +``` + +This keeps the separation clean: contextception never hallucinates about code structure, and the LLM never computes risk scores. Each does what it's good at. diff --git a/internal/analyzer/compact.go b/internal/analyzer/compact.go index e840311..a98a1b8 100644 --- a/internal/analyzer/compact.go +++ b/internal/analyzer/compact.go @@ -2,6 +2,7 @@ package analyzer import ( "fmt" + "path" "sort" "strings" @@ -106,6 +107,11 @@ func FormatCompactChange(report *model.ChangeReport) string { } } + // Risk triage — single-line badge. + if badge := FormatRiskBadge(report); badge != "" { + fmt.Fprintf(&b, "%s\n", badge) + } + // Must read — inline. if len(report.MustRead) > 0 { var mrParts []string @@ -213,6 +219,243 @@ func compactLikelyModify(lm map[string][]model.LikelyModifyEntry) []string { return parts } +// FormatRiskTriage produces a structured risk triage output for a change report. +func FormatRiskTriage(report *model.ChangeReport) string { + if report == nil { + return "" + } + + var b strings.Builder + + fmt.Fprintf(&b, "RISK TRIAGE (%d files changed)\n", report.Summary.TotalFiles) + b.WriteString("════════════════════════════════════════════════════════════\n") + + // Regression risk summary. + if report.AggregateRisk != nil && report.AggregateRisk.RegressionRisk != "" { + fmt.Fprintf(&b, "Regression risk: %s\n", report.AggregateRisk.RegressionRisk) + } + + if report.RiskTriage == nil { + return b.String() + } + + // CRITICAL section — full details. + if len(report.RiskTriage.Critical) > 0 { + fmt.Fprintf(&b, "\nCRITICAL (%d file%s):\n", len(report.RiskTriage.Critical), pluralS(len(report.RiskTriage.Critical))) + for _, file := range report.RiskTriage.Critical { + cf := findChangedFile(report, file) + if cf != nil { + fmt.Fprintf(&b, " %-40s score: %d\n", file, cf.RiskScore) + if cf.RiskNarrative != "" { + fmt.Fprintf(&b, " %s\n", cf.RiskNarrative) + } + } + } + } + + // TEST section — with scores. + if len(report.RiskTriage.Test) > 0 { + fmt.Fprintf(&b, "\nTEST (%d file%s):\n", len(report.RiskTriage.Test), pluralS(len(report.RiskTriage.Test))) + for _, file := range report.RiskTriage.Test { + cf := findChangedFile(report, file) + if cf != nil { + fmt.Fprintf(&b, " %-40s score: %d\n", file, cf.RiskScore) + if cf.RiskNarrative != "" { + fmt.Fprintf(&b, " %s\n", cf.RiskNarrative) + } + } + } + } + + // REVIEW section — listed with scores. + if len(report.RiskTriage.Review) > 0 { + fmt.Fprintf(&b, "\nREVIEW (%d file%s):\n", len(report.RiskTriage.Review), pluralS(len(report.RiskTriage.Review))) + for _, file := range report.RiskTriage.Review { + cf := findChangedFile(report, file) + if cf != nil { + fmt.Fprintf(&b, " %-40s score: %d\n", file, cf.RiskScore) + } + } + } + + // SAFE section — count only. + if len(report.RiskTriage.Safe) > 0 { + fmt.Fprintf(&b, "\nSAFE (%d file%s)\n", len(report.RiskTriage.Safe), pluralS(len(report.RiskTriage.Safe))) + } + + return b.String() +} + +// FormatRiskBadge produces a human-readable risk summary for CI output. +// Designed to be understood without knowledge of the scoring system. +func FormatRiskBadge(report *model.ChangeReport) string { + if report == nil || report.AggregateRisk == nil || report.RiskTriage == nil { + return "" + } + + score := report.AggregateRisk.Score + triage := report.RiskTriage + total := len(triage.Critical) + len(triage.Test) + len(triage.Review) + len(triage.Safe) + + // Risk level label. + var level string + switch { + case score <= 20: + level = "LOW RISK" + case score <= 50: + level = "MODERATE RISK" + case score <= 75: + level = "HIGH RISK" + default: + level = "CRITICAL RISK" + } + + // Build the actionable summary, naming the top-risk files. + var detail string + switch { + case len(triage.Critical) > 0: + names := topFileNames(report, triage.Critical, 3) + detail = fmt.Sprintf("%d file%s need%s careful review: %s", + len(triage.Critical), pluralS(len(triage.Critical)), + verbS(len(triage.Critical)), names) + case len(triage.Test) > 0: + names := topFileNames(report, triage.Test, 3) + detail = fmt.Sprintf("%d file%s should be tested before merging: %s", + len(triage.Test), pluralS(len(triage.Test)), names) + case len(triage.Review) > 0: + names := topFileNames(report, triage.Review, 3) + detail = fmt.Sprintf("%d to review (%s), %d safe", + len(triage.Review), names, len(triage.Safe)) + default: + detail = fmt.Sprintf("all %d files are low risk", total) + } + + return fmt.Sprintf("%s (%d files) — %s", level, total, detail) +} + +// topFileNames returns disambiguated names of the highest-risk files from a tier list, +// sorted by risk score descending. Shows up to maxNames, with "+N more" if truncated. +func topFileNames(report *model.ChangeReport, files []string, maxNames int) string { + // Sort by risk score descending. + type scored struct { + fullPath string + score int + } + var items []scored + for _, f := range files { + cf := findChangedFile(report, f) + s := 0 + if cf != nil { + s = cf.RiskScore + } + items = append(items, scored{fullPath: f, score: s}) + } + sort.Slice(items, func(i, j int) bool { + return items[i].score > items[j].score + }) + + // Collect the top N paths for disambiguation. + var topPaths []string + for i, item := range items { + if i >= maxNames { + break + } + topPaths = append(topPaths, item.fullPath) + } + + names := shortestUniqueNames(topPaths) + result := strings.Join(names, ", ") + if len(files) > maxNames { + result += fmt.Sprintf(" +%d more", len(files)-maxNames) + } + return result +} + +// shortestUniqueNames returns the shortest path suffix for each file that +// disambiguates it from the others. If all basenames are unique, returns basenames. +// If duplicates exist, adds parent directories until names are unique. +// Example: ["schema/change.go", "internal/model/change.go"] → ["schema/change.go", "model/change.go"] +func shortestUniqueNames(paths []string) []string { + if len(paths) == 0 { + return nil + } + + // Start with basenames. + names := make([]string, len(paths)) + for i, p := range paths { + names[i] = path.Base(p) + } + + // Check for duplicates and resolve by adding parent path segments. + for depth := 1; depth < 10; depth++ { + dupes := findDuplicates(names) + if len(dupes) == 0 { + break + } + for i := 0; i < len(names) && i < len(paths); i++ { + if !dupes[names[i]] { + continue + } + // Add one more parent directory segment. + names[i] = pathSuffix(paths[i], depth+1) + } + } + + return names +} + +// findDuplicates returns a set of names that appear more than once. +func findDuplicates(names []string) map[string]bool { + counts := make(map[string]int, len(names)) + for _, n := range names { + counts[n]++ + } + dupes := make(map[string]bool) + for n, c := range counts { + if c > 1 { + dupes[n] = true + } + } + return dupes +} + +// pathSuffix returns the last n segments of a path. +// pathSuffix("internal/model/change.go", 2) → "model/change.go" +func pathSuffix(p string, n int) string { + parts := strings.Split(p, "/") + if n >= len(parts) { + return p + } + return strings.Join(parts[len(parts)-n:], "/") +} + +// verbS returns "s" for count == 1 (third-person singular), "" otherwise. +// "1 file needs" vs "2 files need". +func verbS(n int) string { + if n == 1 { + return "s" + } + return "" +} + +// findChangedFile returns the ChangedFile for a given path, or nil. +func findChangedFile(report *model.ChangeReport, path string) *model.ChangedFile { + for i := range report.ChangedFiles { + if report.ChangedFiles[i].File == path { + return &report.ChangedFiles[i] + } + } + return nil +} + +// pluralS returns "s" for counts != 1, empty string for 1. +func pluralS(n int) string { + if n == 1 { + return "" + } + return "s" +} + // compactWarnings collects warning-worthy conditions. func compactWarnings(output *model.AnalysisOutput) []string { var warnings []string diff --git a/internal/analyzer/compact_test.go b/internal/analyzer/compact_test.go index d93e640..1d060ff 100644 --- a/internal/analyzer/compact_test.go +++ b/internal/analyzer/compact_test.go @@ -156,6 +156,36 @@ func TestFormatCompactChange(t *testing.T) { } } +func TestFormatCompactChange_WithRiskTriage(t *testing.T) { + report := &model.ChangeReport{ + RefRange: "main..HEAD", + ChangedFiles: []model.ChangedFile{ + {File: "pkg/core.go", Status: "modified", RiskScore: 48}, + {File: "pkg/util.go", Status: "modified", RiskScore: 35}, + {File: "pkg/new.go", Status: "added", RiskScore: 10}, + }, + Summary: model.ChangeSummary{TotalFiles: 3, Modified: 2, Added: 1}, + BlastRadius: &model.BlastRadius{Level: "medium"}, + MustRead: []model.MustReadEntry{}, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{"pkg/core.go", "pkg/util.go"}, + Safe: []string{"pkg/new.go"}, + }, + AggregateRisk: &model.AggregateRisk{Score: 48}, + } + + text := FormatCompactChange(report) + if !strings.Contains(text, "MODERATE RISK") { + t.Errorf("compact output should include risk badge: %s", text) + } + if !strings.Contains(text, "2 to review") { + t.Errorf("compact output should mention files to review: %s", text) + } + t.Logf("compact output:\n%s", text) +} + func TestCompactTokenSavings(t *testing.T) { // Verify compact output is significantly smaller than JSON. output := &model.AnalysisOutput{ @@ -196,3 +226,278 @@ func TestCompactTokenSavings(t *testing.T) { t.Error("compact output is empty") } } + +func TestFormatRiskTriage_Full(t *testing.T) { + report := &model.ChangeReport{ + Summary: model.ChangeSummary{TotalFiles: 10}, + ChangedFiles: []model.ChangedFile{ + {File: "core.go", Status: "modified", RiskScore: 82, RiskTier: "CRITICAL", RiskNarrative: "Modified. 7 importers."}, + {File: "handler.go", Status: "modified", RiskScore: 61, RiskTier: "TEST", RiskNarrative: "Modified. Output format changed."}, + {File: "util.go", Status: "modified", RiskScore: 35, RiskTier: "REVIEW"}, + {File: "new.go", Status: "added", RiskScore: 10, RiskTier: "SAFE"}, + }, + RiskTriage: &model.RiskTriage{ + Critical: []string{"core.go"}, + Test: []string{"handler.go"}, + Review: []string{"util.go"}, + Safe: []string{"new.go"}, + }, + AggregateRisk: &model.AggregateRisk{ + Score: 82, + RegressionRisk: "core.go: 7 importers, 3 untested", + }, + } + + output := FormatRiskTriage(report) + if !strings.Contains(output, "RISK TRIAGE") { + t.Error("missing RISK TRIAGE header") + } + if !strings.Contains(output, "CRITICAL (1 file)") { + t.Errorf("missing CRITICAL section: %s", output) + } + if !strings.Contains(output, "TEST (1 file)") { + t.Errorf("missing TEST section: %s", output) + } + if !strings.Contains(output, "REVIEW (1 file)") { + t.Errorf("missing REVIEW section: %s", output) + } + if !strings.Contains(output, "SAFE (1 file)") { + t.Errorf("missing SAFE section: %s", output) + } + if !strings.Contains(output, "score: 82") { + t.Errorf("missing CRITICAL score: %s", output) + } +} + +func TestFormatRiskTriage_NoCritical(t *testing.T) { + report := &model.ChangeReport{ + Summary: model.ChangeSummary{TotalFiles: 3}, + ChangedFiles: []model.ChangedFile{ + {File: "a.go", Status: "modified", RiskScore: 40, RiskTier: "REVIEW"}, + {File: "b.go", Status: "added", RiskScore: 10, RiskTier: "SAFE"}, + }, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{"a.go"}, + Safe: []string{"b.go"}, + }, + AggregateRisk: &model.AggregateRisk{Score: 40}, + } + + output := FormatRiskTriage(report) + if strings.Contains(output, "CRITICAL") { + t.Error("should not contain CRITICAL section when no critical files") + } + if !strings.Contains(output, "REVIEW") { + t.Error("should contain REVIEW section") + } +} + +func TestFormatRiskTriage_AllSafe(t *testing.T) { + report := &model.ChangeReport{ + Summary: model.ChangeSummary{TotalFiles: 5}, + ChangedFiles: []model.ChangedFile{ + {File: "a.go", Status: "added", RiskScore: 10, RiskTier: "SAFE"}, + {File: "b.go", Status: "added", RiskScore: 10, RiskTier: "SAFE"}, + }, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{}, + Safe: []string{"a.go", "b.go"}, + }, + AggregateRisk: &model.AggregateRisk{ + Score: 10, + RegressionRisk: "No modifications to existing code. Low regression risk.", + }, + } + + output := FormatRiskTriage(report) + if !strings.Contains(output, "SAFE (2 files)") { + t.Errorf("should show safe count: %s", output) + } +} + +func TestFormatRiskBadge_Critical(t *testing.T) { + report := &model.ChangeReport{ + ChangedFiles: []model.ChangedFile{ + {File: "pkg/core.go", RiskScore: 85}, + {File: "pkg/handler.go", RiskScore: 60}, + {File: "pkg/cli.go", RiskScore: 55}, + {File: "pkg/util.go", RiskScore: 40}, + {File: "pkg/config.go", RiskScore: 35}, + {File: "pkg/new1.go", RiskScore: 10}, + {File: "pkg/new2.go", RiskScore: 10}, + }, + AggregateRisk: &model.AggregateRisk{Score: 85}, + RiskTriage: &model.RiskTriage{ + Critical: []string{"pkg/core.go"}, + Test: []string{"pkg/handler.go", "pkg/cli.go"}, + Review: []string{"pkg/util.go", "pkg/config.go"}, + Safe: []string{"pkg/new1.go", "pkg/new2.go"}, + }, + } + + badge := FormatRiskBadge(report) + if !strings.Contains(badge, "CRITICAL RISK") { + t.Errorf("badge should say CRITICAL RISK: %s", badge) + } + if !strings.Contains(badge, "careful review") { + t.Errorf("badge should mention careful review: %s", badge) + } + if !strings.Contains(badge, "core.go") { + t.Errorf("badge should name the critical file: %s", badge) + } + t.Logf("badge: %s", badge) +} + +func TestFormatRiskBadge_Moderate(t *testing.T) { + report := &model.ChangeReport{ + ChangedFiles: []model.ChangedFile{ + {File: "pkg/a.go", RiskScore: 45}, + {File: "pkg/b.go", RiskScore: 40}, + {File: "pkg/c.go", RiskScore: 10}, + {File: "pkg/d.go", RiskScore: 10}, + {File: "pkg/e.go", RiskScore: 10}, + }, + AggregateRisk: &model.AggregateRisk{Score: 45}, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{"pkg/a.go", "pkg/b.go"}, + Safe: []string{"pkg/c.go", "pkg/d.go", "pkg/e.go"}, + }, + } + + badge := FormatRiskBadge(report) + if !strings.Contains(badge, "MODERATE RISK") { + t.Errorf("badge should say MODERATE RISK: %s", badge) + } + if !strings.Contains(badge, "a.go") { + t.Errorf("badge should name review files: %s", badge) + } + if !strings.Contains(badge, "3 safe") { + t.Errorf("badge should mention safe count: %s", badge) + } + t.Logf("badge: %s", badge) +} + +func TestFormatRiskBadge_AllSafe(t *testing.T) { + report := &model.ChangeReport{ + ChangedFiles: []model.ChangedFile{ + {File: "a.go", RiskScore: 10}, + {File: "b.go", RiskScore: 10}, + {File: "c.go", RiskScore: 10}, + }, + AggregateRisk: &model.AggregateRisk{Score: 10}, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{}, + Safe: []string{"a.go", "b.go", "c.go"}, + }, + } + + badge := FormatRiskBadge(report) + if !strings.Contains(badge, "LOW RISK") { + t.Errorf("badge should say LOW RISK: %s", badge) + } + if !strings.Contains(badge, "all 3 files are low risk") { + t.Errorf("badge should say all files are low risk: %s", badge) + } + t.Logf("badge: %s", badge) +} + +func TestFormatRiskBadge_Nil(t *testing.T) { + if FormatRiskBadge(nil) != "" { + t.Error("nil report should return empty badge") + } + if FormatRiskBadge(&model.ChangeReport{}) != "" { + t.Error("empty report should return empty badge") + } +} + +func TestShortestUniqueNames_UniqueBasenames(t *testing.T) { + paths := []string{"internal/analyzer/risk.go", "internal/cli/setup.go", "schema/analysis.go"} + names := shortestUniqueNames(paths) + if len(names) != 3 { + t.Fatalf("got %d names, want 3", len(names)) + } + // All basenames are unique, so just basenames. + if names[0] != "risk.go" || names[1] != "setup.go" || names[2] != "analysis.go" { + t.Errorf("expected basenames, got %v", names) + } +} + +func TestShortestUniqueNames_DuplicateBasenames(t *testing.T) { + paths := []string{"schema/change.go", "internal/model/change.go"} + names := shortestUniqueNames(paths) + if len(names) != 2 { + t.Fatalf("got %d names, want 2", len(names)) + } + // Both are "change.go" — need parent dir to disambiguate. + if names[0] == names[1] { + t.Errorf("names should be different, got %v", names) + } + if names[0] != "schema/change.go" { + t.Errorf("expected 'schema/change.go', got %q", names[0]) + } + if names[1] != "model/change.go" { + t.Errorf("expected 'model/change.go', got %q", names[1]) + } + t.Logf("disambiguated: %v", names) +} + +func TestShortestUniqueNames_TripleDuplicate(t *testing.T) { + paths := []string{"a/b/change.go", "c/b/change.go", "d/e/change.go"} + names := shortestUniqueNames(paths) + // "b/change.go" appears twice, "e/change.go" is unique at depth 2. + // Need depth 3 for the first two: "a/b/change.go" vs "c/b/change.go". + dupes := findDuplicates(names) + if len(dupes) > 0 { + t.Errorf("still have duplicates: %v", names) + } + t.Logf("disambiguated: %v", names) +} + +func TestShortestUniqueNames_Empty(t *testing.T) { + names := shortestUniqueNames(nil) + if names != nil { + t.Errorf("expected nil, got %v", names) + } +} + +func TestShortestUniqueNames_Single(t *testing.T) { + names := shortestUniqueNames([]string{"internal/foo/bar.go"}) + if len(names) != 1 || names[0] != "bar.go" { + t.Errorf("expected [bar.go], got %v", names) + } +} + +func TestFormatRiskBadge_DuplicateBasenames(t *testing.T) { + report := &model.ChangeReport{ + ChangedFiles: []model.ChangedFile{ + {File: "schema/change.go", RiskScore: 63}, + {File: "internal/model/change.go", RiskScore: 74}, + }, + AggregateRisk: &model.AggregateRisk{Score: 74}, + RiskTriage: &model.RiskTriage{ + Critical: []string{}, + Test: []string{"internal/model/change.go", "schema/change.go"}, + Review: []string{}, + Safe: []string{}, + }, + } + + badge := FormatRiskBadge(report) + // Should NOT say "change.go, change.go". + if strings.Contains(badge, "change.go, change.go") { + t.Errorf("badge has duplicate basenames: %s", badge) + } + // Should disambiguate. + if !strings.Contains(badge, "model/change.go") { + t.Errorf("badge should contain 'model/change.go': %s", badge) + } + t.Logf("badge: %s", badge) +} diff --git a/internal/analyzer/graph.go b/internal/analyzer/graph.go index d59e7d0..7ed1ce5 100644 --- a/internal/analyzer/graph.go +++ b/internal/analyzer/graph.go @@ -528,6 +528,10 @@ func collectCandidates(idx *db.Index, subject string, repoRoot string) ([]candid // Fetch git history signals (non-fatal if unavailable). enrichWithGitSignals(idx, candidates, allPaths, subject) + // Evidence gate: filter same-package siblings without structural evidence. + // Runs after enrichWithGitSignals so CoChangeFreq is populated. + filterSamePackageSiblings(candidates, subject) + // Convert to slice (exclude subject — it's handled separately). result := make([]candidate, 0, len(candidates)) for _, c := range candidates { @@ -866,6 +870,37 @@ func prioritizeByRustPrefix(siblings []string, subject string) []string { return append(withPrefix, without...) } +// filterSamePackageSiblings removes same-package siblings that lack structural evidence. +// A sibling is kept if it has: a direct import/call edge, co-change frequency >= 2, +// or a prefix match. Siblings without evidence are removed from the candidate map. +// This runs after enrichWithGitSignals so CoChangeFreq is populated. +func filterSamePackageSiblings(candidates map[string]*candidate, subject string) { + for p, c := range candidates { + if p == subject || c.Distance == 0 { + continue + } + // Only filter same-package siblings (Go, Java, Rust). + isSamePackage := c.IsSamePackageSibling || c.IsGoSamePackage || c.IsJavaSamePackage || c.IsRustSameModule + if !isSamePackage { + continue + } + // Keep if has direct import/call edge. + if c.IsImport || c.IsImporter { + continue + } + // Keep if co-change frequency >= 2. + if c.CoChangeFreq >= 2 { + continue + } + // Keep if has prefix match. + if c.HasPrefixMatch || c.HasJavaClassPrefix || c.HasRustModulePrefix { + continue + } + // No evidence: remove from candidates. + delete(candidates, p) + } +} + // enrichWithGitSignals populates churn and co-change data on candidates. // Failures are silently ignored (graceful degradation to structural-only). func enrichWithGitSignals(idx *db.Index, candidates map[string]*candidate, allPaths []string, subject string) { diff --git a/internal/analyzer/graph_test.go b/internal/analyzer/graph_test.go new file mode 100644 index 0000000..810888f --- /dev/null +++ b/internal/analyzer/graph_test.go @@ -0,0 +1,177 @@ +package analyzer + +import ( + "testing" + + "github.com/kehoej/contextception/internal/db" +) + +func TestEvidenceGate_DirectImport(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "pkg/sibling.go": { + Path: "pkg/sibling.go", Distance: 1, + IsSamePackageSibling: true, IsGoSamePackage: true, + IsImport: true, // has direct import edge + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["pkg/sibling.go"]; !ok { + t.Error("sibling with direct import should be kept") + } +} + +func TestEvidenceGate_CoChange(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "pkg/sibling.go": { + Path: "pkg/sibling.go", Distance: 1, + IsSamePackageSibling: true, IsGoSamePackage: true, + CoChangeFreq: 3, // co-change >= 2 + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["pkg/sibling.go"]; !ok { + t.Error("sibling with co-change >= 2 should be kept") + } +} + +func TestEvidenceGate_PrefixMatch(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "pkg/subject_helper.go": { + Path: "pkg/subject_helper.go", Distance: 1, + IsSamePackageSibling: true, IsGoSamePackage: true, + HasPrefixMatch: true, // prefix match + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["pkg/subject_helper.go"]; !ok { + t.Error("sibling with prefix match should be kept") + } +} + +func TestEvidenceGate_NoEvidence(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "pkg/unrelated.go": { + Path: "pkg/unrelated.go", Distance: 1, + IsSamePackageSibling: true, IsGoSamePackage: true, + // No import, no co-change, no prefix match. + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["pkg/unrelated.go"]; ok { + t.Error("sibling with no evidence should be filtered out") + } +} + +func TestEvidenceGate_LargePackage(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + } + // Add 25 siblings, only some with evidence. + for i := 0; i < 25; i++ { + name := "pkg/sibling_" + string(rune('a'+i)) + ".go" + c := &candidate{ + Path: name, Distance: 1, + IsSamePackageSibling: true, IsGoSamePackage: true, + } + switch i { + case 0: + c.IsImport = true // evidence + case 1: + c.CoChangeFreq = 5 // evidence + } + // rest have no evidence + candidates[name] = c + } + filterSamePackageSiblings(candidates, subject) + // Should keep subject + 2 with evidence, filter 23 without. + kept := 0 + for p := range candidates { + if p != subject { + kept++ + } + } + if kept != 2 { + t.Errorf("large package: kept=%d, want 2 (only evidence-gated)", kept) + } +} + +func TestEvidenceGate_JavaSamePackage(t *testing.T) { + subject := "src/main/java/com/Foo.java" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "src/main/java/com/Bar.java": { + Path: "src/main/java/com/Bar.java", Distance: 1, + IsJavaSamePackage: true, + HasJavaClassPrefix: true, // class prefix match = evidence + }, + "src/main/java/com/Baz.java": { + Path: "src/main/java/com/Baz.java", Distance: 1, + IsJavaSamePackage: true, + // no evidence + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["src/main/java/com/Bar.java"]; !ok { + t.Error("Java sibling with class prefix should be kept") + } + if _, ok := candidates["src/main/java/com/Baz.java"]; ok { + t.Error("Java sibling without evidence should be filtered") + } +} + +func TestEvidenceGate_RustSameModule(t *testing.T) { + subject := "src/sync/mutex.rs" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "src/sync/rwlock.rs": { + Path: "src/sync/rwlock.rs", Distance: 1, + IsRustSameModule: true, + IsImporter: true, // has import edge + }, + "src/sync/barrier.rs": { + Path: "src/sync/barrier.rs", Distance: 1, + IsRustSameModule: true, + // no evidence + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["src/sync/rwlock.rs"]; !ok { + t.Error("Rust sibling with import edge should be kept") + } + if _, ok := candidates["src/sync/barrier.rs"]; ok { + t.Error("Rust sibling without evidence should be filtered") + } +} + +func TestEvidenceGate_NonSamePackageUntouched(t *testing.T) { + subject := "pkg/subject.go" + candidates := map[string]*candidate{ + subject: {Path: subject, Distance: 0}, + "other/dep.go": { + Path: "other/dep.go", Distance: 1, + IsImport: true, + // NOT a same-package sibling — should not be filtered. + }, + "other/consumer.go": { + Path: "other/consumer.go", Distance: 2, + Signals: db.SignalRow{Indegree: 5}, + // NOT a same-package sibling — should not be filtered. + }, + } + filterSamePackageSiblings(candidates, subject) + if _, ok := candidates["other/dep.go"]; !ok { + t.Error("non-same-package dep should not be filtered") + } + if _, ok := candidates["other/consumer.go"]; !ok { + t.Error("non-same-package consumer should not be filtered") + } +} diff --git a/internal/analyzer/risk.go b/internal/analyzer/risk.go new file mode 100644 index 0000000..44ceb07 --- /dev/null +++ b/internal/analyzer/risk.go @@ -0,0 +1,480 @@ +package analyzer + +import ( + "fmt" + "math" + "path" + "strings" + + "github.com/kehoej/contextception/internal/classify" + "github.com/kehoej/contextception/internal/db" + "github.com/kehoej/contextception/internal/model" +) + +// Risk tier thresholds. +const ( + tierSafeMax = 20 + tierReviewMax = 50 + tierTestMax = 75 + riskScoreMax = 100 + riskScoreV1 = 1 // score_version for percentile filtering + narrativeMaxLen = 200 // max characters for risk narrative +) + +// RiskTier constants. +const ( + TierSafe = "SAFE" + TierReview = "REVIEW" + TierTest = "TEST" + TierCritical = "CRITICAL" +) + +// ComputeRiskScore calculates a per-file risk score (0-100). +// +// Formula: +// 1. Base score by status (added/modified/deleted/renamed) +// 2. Structural risk for modified files (importers, co-change, fragility, mutual deps, cycles) +// 3. Coverage adjustment (direct tests reduce, no tests increase) +// 4. Clamp to [0, 100] +func ComputeRiskScore(file model.ChangedFile, analysis *model.AnalysisOutput, idx *db.Index, changedPaths []string, maxIndegree int) (int, []string) { + var factors []string + + // Step 1: Base score by status. + base := baseScore(file, analysis, &factors) + + // Nil guard: if not indexed or no analysis, return base only. + // Non-code files (docs, config, etc.) get reduced scores since they can't + // cause code regressions. + if analysis == nil || !file.Indexed { + if !isCodeFile(file.File) { + base = base / 3 // docs/config: 30 -> 10 (SAFE) + if base < 5 { + base = 5 + } + factors = append(factors, "non-code file") + } + return clampScore(base), factors + } + + // Step 2: Structural risk (modified files only). + structural := 0.0 + if file.Status == "modified" { + structural = structuralRisk(file, analysis, idx, changedPaths, maxIndegree, &factors) + } + + raw := float64(base) + structural + + // Step 3: Coverage adjustment. + multiplier := coverageMultiplier(file, analysis, idx, &factors) + result := raw * multiplier + + return clampScore(int(math.Round(result))), factors +} + +// baseScore returns the base risk score for a file based on its change status. +func baseScore(file model.ChangedFile, analysis *model.AnalysisOutput, factors *[]string) int { + switch file.Status { + case "added": + // Added files with exports are higher risk. + if analysis != nil && hasExports(file.File, analysis) { + *factors = append(*factors, "new file with exports") + return 20 + } + *factors = append(*factors, "new file") + return 10 + case "modified": + *factors = append(*factors, "modified") + return 30 + case "deleted": + *factors = append(*factors, "deleted") + return 5 + case "renamed": + *factors = append(*factors, "renamed") + return 5 + default: + *factors = append(*factors, "modified") + return 30 + } +} + +// hasExports checks if a file has exported symbols (Go exported or TS/JS exports). +func hasExports(filePath string, analysis *model.AnalysisOutput) bool { + if analysis == nil { + return false + } + // Check if this file appears as an import target for other files. + // If it has reverse importers in the analysis, it effectively has exports. + for _, entry := range analysis.MustRead { + if entry.Direction == "imported_by" { + return true + } + } + // Go files with uppercase symbols or TS/JS files that are commonly exported. + ext := path.Ext(filePath) + return ext == ".go" || ext == ".ts" || ext == ".tsx" || ext == ".js" || ext == ".jsx" +} + +// countEffectiveImporters returns the effective importer count for a file. +// For Go files, uses max(direct_reverse_importers, package_importers). +func countEffectiveImporters(file model.ChangedFile, analysis *model.AnalysisOutput, idx *db.Index) int { + count := 0 + if analysis != nil { + for _, entry := range analysis.MustRead { + if entry.Direction == "imported_by" { + count++ + } + } + } + // For Go files: use max(direct, package-level) importer count. + if idx != nil && strings.HasSuffix(file.File, ".go") && !strings.HasSuffix(file.File, "_test.go") { + pkgDir := path.Dir(file.File) + if pkgCount, err := idx.GetPackageImporterCount(pkgDir); err == nil && pkgCount > count { + count = pkgCount + } + } + return count +} + +// structuralRisk computes the structural risk component for modified files. +func structuralRisk(file model.ChangedFile, analysis *model.AnalysisOutput, idx *db.Index, changedPaths []string, maxIndegree int, factors *[]string) float64 { + var risk float64 + + importerCount := countEffectiveImporters(file, analysis, idx) + + var normalizedImporters float64 + if maxIndegree > 0 { + normalizedImporters = float64(importerCount) / float64(maxIndegree) + if normalizedImporters > 1.0 { + normalizedImporters = 1.0 + } + } + importerRisk := normalizedImporters * 3.0 + if importerRisk > 0 { + *factors = append(*factors, fmt.Sprintf("%d importers", importerCount)) + } + risk += importerRisk + + // Co-change with other PR files. + coChangeTotal := 0 + if idx != nil && len(changedPaths) > 1 { + partners, err := idx.GetCoChangePartners(file.File, coChangeWindowDays, 0) + if err == nil { + changedSet := make(map[string]bool, len(changedPaths)) + for _, p := range changedPaths { + changedSet[p] = true + } + for _, p := range partners { + if changedSet[p.Path] && p.Path != file.File { + coChangeTotal += p.Frequency + } + } + } + } + coChangeRisk := float64(coChangeTotal) * 2.0 + if coChangeRisk > 10 { + coChangeRisk = 10 // cap co-change contribution + } + if coChangeTotal > 0 { + *factors = append(*factors, fmt.Sprintf("co-change freq %d", coChangeTotal)) + } + risk += coChangeRisk + + // Fragility: Ce/(Ca+Ce) where Ce=outdegree, Ca=indegree. + if analysis.BlastRadius != nil { + fragility := analysis.BlastRadius.Fragility + if fragility > 0 { + fragilityRisk := fragility * 15.0 + *factors = append(*factors, fmt.Sprintf("fragility %.2f", fragility)) + risk += fragilityRisk + } + } + + // Mutual dependencies. + mutualCount := 0 + for _, entry := range analysis.MustRead { + if entry.Direction == "mutual" { + mutualCount++ + } + } + mutualRisk := float64(mutualCount) * 5.0 + if mutualCount > 0 { + *factors = append(*factors, fmt.Sprintf("%d mutual deps", mutualCount)) + } + risk += mutualRisk + + // Circular dependencies. + cycleCount := len(analysis.CircularDeps) + cycleRisk := float64(cycleCount) * 10.0 + if cycleCount > 0 { + *factors = append(*factors, fmt.Sprintf("%d cycles", cycleCount)) + } + risk += cycleRisk + + return risk +} + +// coverageMultiplier adjusts risk based on test coverage. +// Uses index-based stem matching (not analysis.Tests which may be capped by mode). +func coverageMultiplier(file model.ChangedFile, analysis *model.AnalysisOutput, idx *db.Index, factors *[]string) float64 { + if classify.IsTestFile(file.File) { + return 0.7 // test files themselves are lower risk + } + + // Check for direct tests via index-based stem matching. + hasDirectTest := false + hasDepTest := false + + if idx != nil { + dir := path.Dir(file.File) + ext := path.Ext(file.File) + stem := strings.TrimSuffix(path.Base(file.File), ext) + + // Look for convention-matched test files. + switch { + case strings.HasSuffix(file.File, ".go"): + testFiles, err := idx.GetTestFilesInDir(dir, "_test.go") + if err == nil { + for _, tf := range testFiles { + tfBase := path.Base(tf) + if strings.Contains(tfBase, stem) { + hasDirectTest = true + break + } + } + } + case strings.HasSuffix(file.File, ".py"): + // Python: test_.py + testName := "test_" + stem + ".py" + testFiles, err := idx.GetTestFilesByName("", []string{testName}) + if err == nil && len(testFiles) > 0 { + hasDirectTest = true + } + } + } + + // Fallback: check analysis.Tests when index-based stem matching didn't find tests. + // This catches cross-file test patterns (e.g., cli_test.go testing analyze.go, + // server_test.go testing tools.go) that stem matching misses. + if !hasDirectTest && analysis != nil { + for _, t := range analysis.Tests { + if t.Direct { + hasDirectTest = true + break + } + } + if !hasDirectTest { + for _, t := range analysis.Tests { + if !t.Direct { + hasDepTest = true + break + } + } + } + } + + if hasDirectTest { + *factors = append(*factors, "has direct tests") + return 0.7 + } + if hasDepTest { + *factors = append(*factors, "has dependency tests only") + return 0.85 + } + *factors = append(*factors, "no direct tests") + return 1.2 +} + +// AssignRiskTier maps a risk score to a tier label. +func AssignRiskTier(score int) string { + switch { + case score <= tierSafeMax: + return TierSafe + case score <= tierReviewMax: + return TierReview + case score <= tierTestMax: + return TierTest + default: + return TierCritical + } +} + +// GenerateNarrative produces a human-readable risk explanation for a file. +// idx is optional — when provided, Go files use package-level importer counts. +func GenerateNarrative(file model.ChangedFile, analysis *model.AnalysisOutput, idx *db.Index) string { + // Edge case: empty diff. + if file.File == "" { + return "No changes detected." + } + + // Edge case: all-added PR. + if file.Status == "added" { + return "New file. No modifications to existing code. Low regression risk." + } + + // Edge case: deleted file. + if file.Status == "deleted" { + importerCount := countEffectiveImporters(file, analysis, idx) + if importerCount > 0 { + return fmt.Sprintf("Deleted. %d files imported this. Check for broken imports.", importerCount) + } + return "Deleted. No known importers." + } + + // Modified file: build narrative from signals. + var parts []string + + // Status. + parts = append(parts, "Modified") + + // Importer count (uses package-level for Go files when idx available). + importerCount := countEffectiveImporters(file, analysis, idx) + if importerCount > 0 { + parts = append(parts, fmt.Sprintf("%d files import this directly", importerCount)) + } + + // Test gap signal (skip for test files — they don't need tests for themselves). + if analysis != nil && !classify.IsTestFile(file.File) { + hasTest := false + for _, t := range analysis.Tests { + if t.Direct { + hasTest = true + break + } + } + if !hasTest { + parts = append(parts, "no direct tests") + } else { + parts = append(parts, "has direct tests") + } + } + + // Fragility — recompute using effective importer count for consistency. + // The per-file analysis uses file-level indegree which can be 0 for Go package files, + // producing fragility=1.0 even when the package has many importers. + if analysis != nil && analysis.BlastRadius != nil { + outdegree := 0 + for _, entry := range analysis.MustRead { + if entry.Direction == "imports" { + outdegree++ + } + } + effectiveIndegree := importerCount // uses package-level count from above + total := float64(outdegree + effectiveIndegree) + if total > 0 { + fragility := float64(outdegree) / total + if fragility > 0.01 { // skip near-zero + parts = append(parts, fmt.Sprintf("fragility: %.2f", fragility)) + } + } + } + + // Role. + // Try to determine from must_read entries or blast radius. + if analysis != nil { + for _, entry := range analysis.MustRead { + if entry.Role != "" && entry.Role != "unknown" { + // The subject's role isn't in must_read (those are its deps). + // We'd need classify.ClassifyRole but don't have the signals here. + break + } + } + } + + narrative := strings.Join(parts, ". ") + "." + + // Truncate to max length. + if len(narrative) > narrativeMaxLen { + narrative = narrative[:narrativeMaxLen-3] + "..." + } + + return narrative +} + +// GenerateTestSuggestions creates test suggestions for high-risk untested files. +func GenerateTestSuggestions(changedFiles []model.ChangedFile, analyses map[string]*model.AnalysisOutput) []model.TestSuggestion { + var suggestions []model.TestSuggestion + + for _, file := range changedFiles { + tier := AssignRiskTier(file.RiskScore) + analysis := analyses[file.File] + if analysis == nil { + continue + } + + // Generate suggestions for TEST/CRITICAL tier, or REVIEW tier with high importer count. + if tier == TierSafe { + continue + } + if tier == TierReview { + importerCount := 0 + for _, entry := range analysis.MustRead { + if entry.Direction == "imported_by" { + importerCount++ + } + } + if importerCount < 10 { + continue // REVIEW with few importers: skip + } + } + + // Check if file has direct tests. + hasDirectTest := false + for _, t := range analysis.Tests { + if t.Direct { + hasDirectTest = true + break + } + } + if hasDirectTest { + continue + } + + // Find imported symbols to suggest testing. + for _, entry := range analysis.MustRead { + if entry.Direction != "imports" || len(entry.Symbols) == 0 { + continue + } + symbolList := strings.Join(entry.Symbols, ", ") + suggestions = append(suggestions, model.TestSuggestion{ + File: file.File, + SuggestedTest: fmt.Sprintf("Test %s's use of %s from %s", path.Base(file.File), symbolList, path.Base(entry.File)), + Reason: fmt.Sprintf("%s imports %s but has no direct test", path.Base(file.File), symbolList), + }) + break // one suggestion per file + } + + // If no symbol-specific suggestion, generate a generic one. + if len(suggestions) == 0 || suggestions[len(suggestions)-1].File != file.File { + suggestions = append(suggestions, model.TestSuggestion{ + File: file.File, + SuggestedTest: fmt.Sprintf("Test %s (risk tier: %s)", path.Base(file.File), tier), + Reason: fmt.Sprintf("%s has risk score %d but no direct test coverage", path.Base(file.File), file.RiskScore), + }) + } + } + + return suggestions +} + +// isCodeFile returns true if the file has a recognized code extension. +func isCodeFile(filePath string) bool { + ext := strings.ToLower(path.Ext(filePath)) + switch ext { + case ".go", ".py", ".ts", ".tsx", ".js", ".jsx", ".java", ".rs", + ".rb", ".php", ".c", ".cpp", ".h", ".hpp", ".cs", ".swift", + ".kt", ".kts", ".scala", ".sh", ".bash", ".zsh": + return true + } + return false +} + +// clampScore ensures the score is within [0, riskScoreMax]. +func clampScore(score int) int { + if score < 0 { + return 0 + } + if score > riskScoreMax { + return riskScoreMax + } + return score +} diff --git a/internal/analyzer/risk_test.go b/internal/analyzer/risk_test.go new file mode 100644 index 0000000..a23e0e3 --- /dev/null +++ b/internal/analyzer/risk_test.go @@ -0,0 +1,356 @@ +package analyzer + +import ( + "strings" + "testing" + + "github.com/kehoej/contextception/internal/model" +) + +func TestComputeRiskScore_AddedFile(t *testing.T) { + file := model.ChangedFile{File: "pkg/new.go", Status: "added", Indexed: false} + score, factors := ComputeRiskScore(file, nil, nil, nil, 10) + if score < 5 || score > 25 { + t.Errorf("added file score=%d, want 5-25", score) + } + if len(factors) == 0 { + t.Error("expected at least one risk factor") + } + found := false + for _, f := range factors { + if strings.Contains(f, "new file") { + found = true + } + } + if !found { + t.Errorf("expected 'new file' factor, got %v", factors) + } +} + +func TestComputeRiskScore_ModifiedWithImporters(t *testing.T) { + analysis := &model.AnalysisOutput{ + MustRead: []model.MustReadEntry{ + {File: "a.go", Direction: "imported_by"}, + {File: "b.go", Direction: "imported_by"}, + {File: "c.go", Direction: "imported_by"}, + }, + BlastRadius: &model.BlastRadius{Level: "high", Fragility: 0.7}, + Tests: []model.TestEntry{}, + } + file := model.ChangedFile{File: "pkg/core.go", Status: "modified", Indexed: true} + score, factors := ComputeRiskScore(file, analysis, nil, nil, 10) + // Base 30 + structural risk (importers + fragility) + no-tests multiplier + if score < 40 { + t.Errorf("modified file with 3 importers + fragility: score=%d, want >= 40", score) + } + t.Logf("score=%d, factors=%v", score, factors) +} + +func TestComputeRiskScore_DivByZero_CaPlsCeZero(t *testing.T) { + analysis := &model.AnalysisOutput{ + BlastRadius: &model.BlastRadius{Level: "low", Fragility: 0.0}, + Tests: []model.TestEntry{{File: "test.go", Direct: true}}, + } + file := model.ChangedFile{File: "pkg/leaf.go", Status: "modified", Indexed: true} + score, _ := ComputeRiskScore(file, analysis, nil, nil, 10) + // Should not panic and should produce a reasonable score. + if score < 0 || score > 100 { + t.Errorf("score=%d, want 0-100", score) + } +} + +func TestComputeRiskScore_ClampAt100(t *testing.T) { + // Simulate extreme risk: many importers, high fragility, cycles, mutual deps. + analysis := &model.AnalysisOutput{ + MustRead: []model.MustReadEntry{ + {File: "a.go", Direction: "imported_by"}, + {File: "b.go", Direction: "imported_by"}, + {File: "c.go", Direction: "imported_by"}, + {File: "d.go", Direction: "imported_by"}, + {File: "e.go", Direction: "imported_by"}, + {File: "f.go", Direction: "mutual"}, + {File: "g.go", Direction: "mutual"}, + {File: "h.go", Direction: "mutual"}, + }, + BlastRadius: &model.BlastRadius{Level: "high", Fragility: 0.95}, + CircularDeps: [][]string{{"a", "b", "a"}, {"c", "d", "c"}, {"e", "f", "e"}}, + Tests: []model.TestEntry{}, + } + file := model.ChangedFile{File: "pkg/hub.go", Status: "modified", Indexed: true} + score, _ := ComputeRiskScore(file, analysis, nil, nil, 5) + if score != 100 { + t.Errorf("extreme risk score=%d, want 100 (clamped)", score) + } +} + +func TestComputeRiskScore_NilAnalysis(t *testing.T) { + file := model.ChangedFile{File: "pkg/unknown.go", Status: "modified", Indexed: true} + score, factors := ComputeRiskScore(file, nil, nil, nil, 10) + // Nil analysis: base score only (30 for modified). + if score != 30 { + t.Errorf("nil analysis score=%d, want 30", score) + } + if len(factors) == 0 { + t.Error("expected factors even with nil analysis") + } +} + +func TestComputeRiskScore_MaxIndegreeZero(t *testing.T) { + analysis := &model.AnalysisOutput{ + MustRead: []model.MustReadEntry{ + {File: "a.go", Direction: "imported_by"}, + }, + BlastRadius: &model.BlastRadius{Level: "medium"}, + Tests: []model.TestEntry{}, + } + file := model.ChangedFile{File: "pkg/core.go", Status: "modified", Indexed: true} + // maxIndegree=0 should not panic and normalizedImporters should be 0. + score, _ := ComputeRiskScore(file, analysis, nil, nil, 0) + if score < 0 || score > 100 { + t.Errorf("maxIndegree=0: score=%d, want 0-100", score) + } +} + +func TestComputeRiskScore_DeletedFile(t *testing.T) { + file := model.ChangedFile{File: "pkg/old.go", Status: "deleted", Indexed: false} + score, _ := ComputeRiskScore(file, nil, nil, nil, 10) + if score != 5 { + t.Errorf("deleted file score=%d, want 5", score) + } +} + +func TestComputeRiskScore_RenamedFile(t *testing.T) { + file := model.ChangedFile{File: "pkg/renamed.go", Status: "renamed", Indexed: false} + score, _ := ComputeRiskScore(file, nil, nil, nil, 10) + if score != 5 { + t.Errorf("renamed file score=%d, want 5", score) + } +} + +func TestAssignRiskTier(t *testing.T) { + tests := []struct { + score int + want string + }{ + {0, TierSafe}, + {10, TierSafe}, + {20, TierSafe}, + {21, TierReview}, + {50, TierReview}, + {51, TierTest}, + {75, TierTest}, + {76, TierCritical}, + {100, TierCritical}, + } + for _, tt := range tests { + got := AssignRiskTier(tt.score) + if got != tt.want { + t.Errorf("AssignRiskTier(%d) = %q, want %q", tt.score, got, tt.want) + } + } +} + +func TestGenerateNarrative_ModifiedWithSignals(t *testing.T) { + analysis := &model.AnalysisOutput{ + MustRead: []model.MustReadEntry{ + {File: "a.go", Direction: "imported_by"}, + {File: "b.go", Direction: "imported_by"}, + {File: "c.go", Direction: "imports"}, + {File: "d.go", Direction: "imports"}, + {File: "e.go", Direction: "imports"}, + }, + BlastRadius: &model.BlastRadius{Level: "medium", Fragility: 0.73}, + Tests: []model.TestEntry{}, + } + file := model.ChangedFile{File: "pkg/core.go", Status: "modified"} + narrative := GenerateNarrative(file, analysis, nil) + if !strings.Contains(narrative, "Modified") { + t.Errorf("narrative should contain 'Modified': %q", narrative) + } + if !strings.Contains(narrative, "2 files import") { + t.Errorf("narrative should mention importers: %q", narrative) + } + if !strings.Contains(narrative, "no direct tests") { + t.Errorf("narrative should mention test gap: %q", narrative) + } + if !strings.Contains(narrative, "fragility") { + t.Errorf("narrative should mention fragility: %q", narrative) + } +} + +func TestGenerateNarrative_NoSignals(t *testing.T) { + analysis := &model.AnalysisOutput{ + BlastRadius: &model.BlastRadius{Level: "low"}, + Tests: []model.TestEntry{{File: "test.go", Direct: true}}, + } + file := model.ChangedFile{File: "pkg/leaf.go", Status: "modified"} + narrative := GenerateNarrative(file, analysis, nil) + if !strings.Contains(narrative, "Modified") { + t.Errorf("narrative should contain 'Modified': %q", narrative) + } + if !strings.Contains(narrative, "has direct tests") { + t.Errorf("narrative should mention tests: %q", narrative) + } +} + +func TestGenerateNarrative_DeletedFile(t *testing.T) { + analysis := &model.AnalysisOutput{ + MustRead: []model.MustReadEntry{ + {File: "a.go", Direction: "imported_by"}, + {File: "b.go", Direction: "imported_by"}, + }, + } + file := model.ChangedFile{File: "pkg/old.go", Status: "deleted"} + narrative := GenerateNarrative(file, analysis, nil) + if !strings.Contains(narrative, "Deleted") { + t.Errorf("narrative should say 'Deleted': %q", narrative) + } + if !strings.Contains(narrative, "2 files imported") { + t.Errorf("narrative should mention importers: %q", narrative) + } +} + +func TestGenerateNarrative_AddedFile(t *testing.T) { + file := model.ChangedFile{File: "pkg/new.go", Status: "added"} + narrative := GenerateNarrative(file, nil, nil) + if !strings.Contains(narrative, "New file") { + t.Errorf("narrative should say 'New file': %q", narrative) + } +} + +func TestGenerateNarrative_Empty(t *testing.T) { + file := model.ChangedFile{} + narrative := GenerateNarrative(file, nil, nil) + if narrative != "No changes detected." { + t.Errorf("empty file narrative = %q, want 'No changes detected.'", narrative) + } +} + +func TestGenerateNarrative_Truncation(t *testing.T) { + // Create analysis that would produce a very long narrative. + entries := make([]model.MustReadEntry, 50) + for i := range entries { + entries[i] = model.MustReadEntry{File: "f.go", Direction: "imported_by"} + } + analysis := &model.AnalysisOutput{ + MustRead: entries, + BlastRadius: &model.BlastRadius{Level: "high", Fragility: 0.99}, + Tests: []model.TestEntry{}, + } + file := model.ChangedFile{File: "pkg/core.go", Status: "modified"} + narrative := GenerateNarrative(file, analysis, nil) + if len(narrative) > narrativeMaxLen { + t.Errorf("narrative length=%d, want <= %d", len(narrative), narrativeMaxLen) + } +} + +func TestGenerateTestSuggestions_UntestedImport(t *testing.T) { + changedFiles := []model.ChangedFile{ + {File: "pkg/handler.go", Status: "modified", RiskScore: 70, RiskTier: TierTest}, + } + analyses := map[string]*model.AnalysisOutput{ + "pkg/handler.go": { + MustRead: []model.MustReadEntry{ + {File: "pkg/db.go", Direction: "imports", Symbols: []string{"Query", "Connect"}}, + }, + Tests: []model.TestEntry{}, + }, + } + suggestions := GenerateTestSuggestions(changedFiles, analyses) + if len(suggestions) == 0 { + t.Fatal("expected at least one test suggestion") + } + if suggestions[0].File != "pkg/handler.go" { + t.Errorf("suggestion file = %q, want pkg/handler.go", suggestions[0].File) + } + if !strings.Contains(suggestions[0].SuggestedTest, "Query") { + t.Errorf("suggestion should reference imported symbols: %q", suggestions[0].SuggestedTest) + } +} + +func TestGenerateTestSuggestions_AllTested(t *testing.T) { + changedFiles := []model.ChangedFile{ + {File: "pkg/handler.go", Status: "modified", RiskScore: 70, RiskTier: TierTest}, + } + analyses := map[string]*model.AnalysisOutput{ + "pkg/handler.go": { + Tests: []model.TestEntry{{File: "pkg/handler_test.go", Direct: true}}, + }, + } + suggestions := GenerateTestSuggestions(changedFiles, analyses) + if len(suggestions) != 0 { + t.Errorf("expected no suggestions for tested file, got %d", len(suggestions)) + } +} + +func TestGenerateTestSuggestions_NoImports(t *testing.T) { + changedFiles := []model.ChangedFile{ + {File: "pkg/standalone.go", Status: "modified", RiskScore: 80, RiskTier: TierCritical}, + } + analyses := map[string]*model.AnalysisOutput{ + "pkg/standalone.go": { + MustRead: []model.MustReadEntry{}, // no imports + Tests: []model.TestEntry{}, + }, + } + suggestions := GenerateTestSuggestions(changedFiles, analyses) + // Should generate a generic suggestion. + if len(suggestions) == 0 { + t.Fatal("expected a generic test suggestion") + } + if !strings.Contains(suggestions[0].Reason, "no direct test") { + t.Errorf("reason should mention no tests: %q", suggestions[0].Reason) + } +} + +func TestGenerateTestSuggestions_EmptyAnalyses(t *testing.T) { + changedFiles := []model.ChangedFile{ + {File: "pkg/handler.go", Status: "modified", RiskScore: 80, RiskTier: TierCritical}, + } + suggestions := GenerateTestSuggestions(changedFiles, map[string]*model.AnalysisOutput{}) + if len(suggestions) != 0 { + t.Errorf("expected no suggestions with empty analyses, got %d", len(suggestions)) + } +} + +func TestGenerateTestSuggestions_SafeTier(t *testing.T) { + changedFiles := []model.ChangedFile{ + {File: "pkg/util.go", Status: "modified", RiskScore: 15, RiskTier: TierSafe}, + } + analyses := map[string]*model.AnalysisOutput{ + "pkg/util.go": {Tests: []model.TestEntry{}}, + } + suggestions := GenerateTestSuggestions(changedFiles, analyses) + if len(suggestions) != 0 { + t.Errorf("SAFE tier should not get suggestions, got %d", len(suggestions)) + } +} + +func TestComputeRiskScore_NonCodeFile(t *testing.T) { + file := model.ChangedFile{File: "README.md", Status: "modified", Indexed: false} + score, factors := ComputeRiskScore(file, nil, nil, nil, 10) + if score > 20 { + t.Errorf("non-code unindexed file score=%d, want <= 20 (SAFE tier)", score) + } + found := false + for _, f := range factors { + if f == "non-code file" { + found = true + } + } + if !found { + t.Errorf("expected 'non-code file' factor, got %v", factors) + } +} + +func TestClampScore(t *testing.T) { + if clampScore(-5) != 0 { + t.Error("negative score should clamp to 0") + } + if clampScore(150) != 100 { + t.Error("score > 100 should clamp to 100") + } + if clampScore(50) != 50 { + t.Error("score 50 should stay 50") + } +} diff --git a/internal/change/change.go b/internal/change/change.go index f259253..e3fc52e 100644 --- a/internal/change/change.go +++ b/internal/change/change.go @@ -40,6 +40,20 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep return nil, fmt.Errorf("running git diff: %w", err) } + // Fallback: if no committed changes found, diff the working tree against base. + // This supports the pre-PR workflow where changes aren't committed yet. + workingTree := false + if len(diffs) == 0 { + diffs, err = GitDiffWorkingTree(cfg.RepoRoot, base) + if err != nil { + return nil, fmt.Errorf("running git diff (working tree): %w", err) + } + if len(diffs) > 0 { + workingTree = true + refRange = base + "..working-tree" + } + } + if len(diffs) == 0 { return &model.ChangeReport{ SchemaVersion: ChangeSchemaVersion, @@ -51,6 +65,7 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep Tests: []model.TestEntry{}, }, nil } + _ = workingTree // available for future use (e.g., labeling output) // Filter to analyzable files (existing, non-deleted, in the index). var analyzable []string @@ -91,13 +106,15 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep } } - // Run per-file analysis for individual blast radius. + // Run per-file analysis for individual blast radius and risk scoring. + perFileAnalysis := make(map[string]*model.AnalysisOutput) perFileBlast := make(map[string]*model.BlastRadius) for _, file := range analyzable { single, err := a.Analyze(file) if err != nil { continue } + perFileAnalysis[file] = single perFileBlast[file] = single.BlastRadius } @@ -108,6 +125,30 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep } } + // Compute per-file risk scores. + maxIndegree, _ := idx.GetMaxIndegree() + changedPaths := make([]string, len(diffs)) + for i, d := range diffs { + changedPaths[i] = d.Path + } + for i := range changedFiles { + pfa := perFileAnalysis[changedFiles[i].File] + score, factors := analyzer.ComputeRiskScore(changedFiles[i], pfa, idx, changedPaths, maxIndegree) + changedFiles[i].RiskScore = score + changedFiles[i].RiskTier = analyzer.AssignRiskTier(score) + changedFiles[i].RiskFactors = factors + changedFiles[i].RiskNarrative = analyzer.GenerateNarrative(changedFiles[i], pfa, idx) + } + + // Build risk triage. + triage := buildRiskTriage(changedFiles) + + // Build aggregate risk. + aggRisk := buildAggregateRisk(changedFiles, perFileAnalysis, triage) + + // Generate test suggestions. + testSuggestions := analyzer.GenerateTestSuggestions(changedFiles, perFileAnalysis) + // Detect coupling between changed files. coupling := DetectCoupling(idx, analyzable) @@ -126,13 +167,16 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep // Build the change report. report := &model.ChangeReport{ - SchemaVersion: "1.0", - RefRange: refRange, - ChangedFiles: changedFiles, - Summary: summary, - Coupling: coupling, - TestGaps: testGaps, - HiddenCoupling: hiddenCoupling, + SchemaVersion: "1.0", + RefRange: refRange, + ChangedFiles: changedFiles, + Summary: summary, + Coupling: coupling, + TestGaps: testGaps, + HiddenCoupling: hiddenCoupling, + RiskTriage: triage, + AggregateRisk: aggRisk, + TestSuggestions: testSuggestions, } if analysis != nil { @@ -153,6 +197,165 @@ func BuildReport(idx *db.Index, cfg Config, base, head string) (*model.ChangeRep return report, nil } +// buildRiskTriage groups changed files by risk tier. +func buildRiskTriage(files []model.ChangedFile) *model.RiskTriage { + triage := &model.RiskTriage{ + Critical: []string{}, + Test: []string{}, + Review: []string{}, + Safe: []string{}, + } + for _, f := range files { + switch f.RiskTier { + case analyzer.TierCritical: + triage.Critical = append(triage.Critical, f.File) + case analyzer.TierTest: + triage.Test = append(triage.Test, f.File) + case analyzer.TierReview: + triage.Review = append(triage.Review, f.File) + default: + triage.Safe = append(triage.Safe, f.File) + } + } + sort.Strings(triage.Critical) + sort.Strings(triage.Test) + sort.Strings(triage.Review) + sort.Strings(triage.Safe) + return triage +} + +// buildAggregateRisk computes the aggregate risk for the entire PR. +func buildAggregateRisk(files []model.ChangedFile, analyses map[string]*model.AnalysisOutput, triage *model.RiskTriage) *model.AggregateRisk { + maxScore := 0 + testedCount := 0 + totalNonTest := 0 + + for _, f := range files { + if f.RiskScore > maxScore { + maxScore = f.RiskScore + } + if !isTestFile(f.File) { + totalNonTest++ + a := analyses[f.File] + if a != nil { + for _, t := range a.Tests { + if t.Direct { + testedCount++ + break + } + } + } + } + } + + var coverageRatio float64 + if totalNonTest > 0 { + coverageRatio = float64(testedCount) / float64(totalNonTest) + } + + // Generate regression risk summary. + // Triggers for: CRITICAL tier files, or any modified file with 10+ importers. + regressionRisk := "" + var regressionFiles []string + if len(triage.Critical) > 0 { + regressionFiles = triage.Critical + } else { + // Find high-importer files across all tiers (foundation files). + for _, f := range files { + a := analyses[f.File] + if a == nil || f.Status != "modified" { + continue + } + importerCount := 0 + for _, entry := range a.MustRead { + if entry.Direction == "imported_by" { + importerCount++ + } + } + if importerCount >= 10 { + regressionFiles = append(regressionFiles, f.File) + } + } + } + if len(regressionFiles) > 0 { + regressionRisk = buildRegressionSummary(regressionFiles, analyses) + } else if allAdded(files) { + regressionRisk = "No modifications to existing code. Low regression risk." + } + + return &model.AggregateRisk{ + Score: maxScore, + RegressionRisk: regressionRisk, + TestCoverageRatio: coverageRatio, + } +} + +// buildRegressionSummary generates a brief summary of regression risk from critical files. +func buildRegressionSummary(criticalFiles []string, analyses map[string]*model.AnalysisOutput) string { + var parts []string + for _, file := range criticalFiles { + a := analyses[file] + if a == nil { + continue + } + importerCount := 0 + untestedImporters := 0 + for _, entry := range a.MustRead { + if entry.Direction == "imported_by" { + importerCount++ + } + } + // Count untested importers (rough heuristic: importers not in test files). + for _, entry := range a.MustRead { + if entry.Direction == "imported_by" { + hasTest := false + for _, t := range a.Tests { + if t.Direct && strings.Contains(t.File, strings.TrimSuffix(filepath.Base(entry.File), filepath.Ext(entry.File))) { + hasTest = true + break + } + } + if !hasTest { + untestedImporters++ + } + } + } + base := filepath.Base(file) + if untestedImporters > 0 { + parts = append(parts, fmt.Sprintf("%s: %d importers, %d untested", base, importerCount, untestedImporters)) + } else if importerCount > 0 { + parts = append(parts, fmt.Sprintf("%s: %d importers", base, importerCount)) + } + } + if len(parts) == 0 { + return "" + } + summary := strings.Join(parts, "; ") + if len(summary) > 300 { + summary = summary[:297] + "..." + } + return summary +} + +// allAdded returns true if all files in the changeset are added. +func allAdded(files []model.ChangedFile) bool { + for _, f := range files { + if f.Status != "added" { + return false + } + } + return true +} + +// isTestFile is a local helper to avoid circular import with classify. +func isTestFile(path string) bool { + base := filepath.Base(path) + return strings.HasSuffix(base, "_test.go") || + strings.HasPrefix(base, "test_") || + strings.Contains(base, ".test.") || + strings.Contains(base, ".spec.") +} + // DiffEntry represents a file changed in a git diff. type DiffEntry struct { Path string @@ -167,7 +370,23 @@ func GitDiffFiles(repoRoot, base, head string) ([]DiffEntry, error) { if err != nil { return nil, fmt.Errorf("git diff failed: %w", err) } + return parseDiffOutput(out), nil +} +// GitDiffWorkingTree returns files changed in the working tree vs a base ref. +// This includes both staged and unstaged changes — everything that differs from base. +func GitDiffWorkingTree(repoRoot, base string) ([]DiffEntry, error) { + cmd := exec.Command("git", "diff", "--name-status", "--no-renames", base) + cmd.Dir = repoRoot + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git diff (working tree) failed: %w", err) + } + return parseDiffOutput(out), nil +} + +// parseDiffOutput parses git diff --name-status output into DiffEntry slice. +func parseDiffOutput(out []byte) []DiffEntry { var entries []DiffEntry for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { if line == "" { @@ -185,8 +404,7 @@ func GitDiffFiles(repoRoot, base, head string) ([]DiffEntry, error) { entries = append(entries, DiffEntry{Path: path, Status: status}) } - - return entries, nil + return entries } // ParseGitStatus converts git's single-letter status code to a human-readable string. @@ -289,13 +507,24 @@ func DetectTestGaps(analyzable []string, analysis *model.AnalysisOutput) []strin } // HasTestForFile checks if any test file appears to test the given file. +// Uses stem matching first, then falls back to same-directory matching for Go files +// (where any _test.go in the same package tests all files in that package). func HasTestForFile(file string, analysis *model.AnalysisOutput) bool { if analysis == nil { return false } base := strings.TrimSuffix(filepath.Base(file), filepath.Ext(file)) + dir := filepath.Dir(file) for _, t := range analysis.Tests { - if t.Direct && strings.Contains(filepath.Base(t.File), base) { + if !t.Direct { + continue + } + // Stem match: test filename contains the source stem. + if strings.Contains(filepath.Base(t.File), base) { + return true + } + // Same-directory match for Go: any _test.go in the same package covers all files. + if strings.HasSuffix(file, ".go") && filepath.Dir(t.File) == dir { return true } } diff --git a/internal/cli/analyze.go b/internal/cli/analyze.go index 7fd184f..b5c7b36 100644 --- a/internal/cli/analyze.go +++ b/internal/cli/analyze.go @@ -122,7 +122,7 @@ func runAnalyze(files []string) error { level = output.BlastRadius.Level } fmt.Fprintf(os.Stderr, "contextception: %s blast_radius=%s\n", output.Subject, level) - handleCIExit(level) + handleCIExit(level, nil) return nil } diff --git a/internal/cli/analyze_change.go b/internal/cli/analyze_change.go index c773a01..6fa9ea8 100644 --- a/internal/cli/analyze_change.go +++ b/internal/cli/analyze_change.go @@ -12,6 +12,7 @@ import ( "github.com/kehoej/contextception/internal/change" "github.com/kehoej/contextception/internal/db" "github.com/kehoej/contextception/internal/history" + "github.com/kehoej/contextception/internal/indexer" "github.com/kehoej/contextception/internal/model" "github.com/spf13/cobra" ) @@ -59,6 +60,14 @@ func runAnalyzeChange(refRange string) error { } defer idx.Close() + // Auto-reindex to ensure the index reflects current files on disk. + // This is especially important for pre-PR workflows where new files + // (tests, source) exist on disk but aren't committed yet. + if err := autoReindex(idx); err != nil { + // Non-fatal: proceed with stale index rather than failing. + fmt.Fprintf(os.Stderr, "Warning: auto-reindex failed: %v\n", err) + } + // Determine the ref range. if refRange == "" { base, err := detectDefaultBranch() @@ -125,6 +134,26 @@ func runAnalyzeChange(refRange string) error { branch, _ := gitCurrentBranch() hist.RecordRun(report, commitSHA, branch) + // Record risk scores for percentile tracking. + var riskEntries []history.RiskScoreEntry + for _, cf := range report.ChangedFiles { + if cf.RiskScore > 0 { + riskEntries = append(riskEntries, history.RiskScoreEntry{ + FilePath: cf.File, + RiskScore: cf.RiskScore, + RefRange: refRange, + }) + } + } + _ = hist.StoreRiskScores(riskEntries) + + // Compute percentile for aggregate risk (best-effort). + if report.AggregateRisk != nil { + if pct, err := hist.ComputePercentile(report.AggregateRisk.Score); err == nil && pct > 0 { + report.AggregateRisk.Percentile = pct + } + } + // Record usage analytics. entry := history.UsageEntryFromChangeReport("cli", nil, report, durationMs, mode, tokenBudget) _, _ = hist.RecordUsage(entry) @@ -135,7 +164,12 @@ func runAnalyzeChange(refRange string) error { level := report.BlastRadius.Level fmt.Fprintf(os.Stderr, "contextception: %s blast_radius=%s files=%d\n", refRange, level, len(report.ChangedFiles)) - handleCIExit(level) + // Append risk badge. + badge := analyzer.FormatRiskBadge(report) + if badge != "" { + fmt.Fprintln(os.Stderr, badge) + } + handleCIExit(level, report) return nil } @@ -355,6 +389,21 @@ func gitCurrentBranch() (string, error) { return strings.TrimSpace(string(out)), nil } +// autoReindex runs an incremental index update to ensure new/modified files +// on disk are reflected in the analysis. This is silent — no output unless verbose. +func autoReindex(idx *db.Index) error { + ix, err := indexer.NewIndexer(indexer.Config{ + RepoRoot: repoRoot, + Index: idx, + Verbose: verbose, + Config: repoCfg, + }) + if err != nil { + return err + } + return ix.Run() +} + // formatBlastLevel returns a formatted blast radius level with visual indicator. func formatBlastLevel(level string) string { switch level { diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index a68abb4..495a92d 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -3,6 +3,7 @@ package cli import ( "bytes" "os" + "os/exec" "path/filepath" "strings" "testing" @@ -525,6 +526,210 @@ func TestAnalyzeChangeCmd_TooManyArgs(t *testing.T) { } } +// buildTestRepoWithGit creates a git repo with an initial commit, modifies a file, +// commits the change, and returns the repo root and the base commit SHA. +func buildTestRepoWithGit(t *testing.T) (root string, baseSHA string) { + t.Helper() + root = createTestRepo(t) + + // Initialize git and create initial commit. + for _, args := range [][]string{ + {"git", "init", "-b", "main"}, + {"git", "config", "user.email", "test@test.com"}, + {"git", "config", "user.name", "Test"}, + {"git", "add", "."}, + {"git", "commit", "-m", "initial"}, + } { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = root + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + + // Get base SHA. + cmd := exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = root + out, err := cmd.Output() + if err != nil { + t.Fatal(err) + } + baseSHA = strings.TrimSpace(string(out)) + + // Index. + idxPath := db.IndexPath(root) + idx, err := db.OpenIndex(idxPath) + if err != nil { + t.Fatal(err) + } + if _, err := idx.MigrateToLatest(); err != nil { + idx.Close() + t.Fatal(err) + } + ix, err := indexer.NewIndexer(indexer.Config{RepoRoot: root, Index: idx}) + if err != nil { + idx.Close() + t.Fatal(err) + } + if err := ix.Run(); err != nil { + idx.Close() + t.Fatal(err) + } + idx.Close() + + // Modify a file and commit. + if err := os.WriteFile(filepath.Join(root, "myapp/main.py"), + []byte("from .models import User\nfrom .utils import helper\nimport os\n# modified\n"), 0o644); err != nil { + t.Fatal(err) + } + for _, args := range [][]string{ + {"git", "add", "myapp/main.py"}, + {"git", "commit", "-m", "modify main.py"}, + } { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = root + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + + return root, baseSHA +} + +func TestAnalyzeChangeCmd_JSONWithRiskTriage(t *testing.T) { + root, baseSHA := buildTestRepoWithGit(t) + + // Capture stdout. + old := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + err := executeCmd("--repo", root, "analyze-change", baseSHA+"..HEAD", "--json") + + w.Close() + os.Stdout = old + + if err != nil { + t.Fatalf("analyze-change --json: %v", err) + } + + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Verify JSON contains risk_triage. + if !strings.Contains(output, "risk_triage") { + t.Errorf("JSON output should contain risk_triage: %.200s...", output) + } + if !strings.Contains(output, "aggregate_risk") { + t.Errorf("JSON output should contain aggregate_risk: %.200s...", output) + } + if !strings.Contains(output, "risk_score") { + t.Errorf("JSON output should contain per-file risk_score: %.200s...", output) + } +} + +func TestAnalyzeChangeCmd_CompactWithRiskBadge(t *testing.T) { + root, baseSHA := buildTestRepoWithGit(t) + + // Capture stdout. + old := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + err := executeCmd("--repo", root, "analyze-change", baseSHA+"..HEAD", "--compact") + + w.Close() + os.Stdout = old + + if err != nil { + t.Fatalf("analyze-change --compact: %v", err) + } + + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Compact output should include the risk badge. + if !strings.Contains(output, "RISK") { + t.Errorf("compact output should contain risk badge: %s", output) + } +} + +func TestAnalyzeChangeCmd_WorkingTreeFallback(t *testing.T) { + root := createTestRepo(t) + + // Initialize git and commit. + for _, args := range [][]string{ + {"git", "init", "-b", "main"}, + {"git", "config", "user.email", "test@test.com"}, + {"git", "config", "user.name", "Test"}, + {"git", "add", "."}, + {"git", "commit", "-m", "initial"}, + } { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = root + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + + // Index. + idxPath := db.IndexPath(root) + idx, err := db.OpenIndex(idxPath) + if err != nil { + t.Fatal(err) + } + if _, err := idx.MigrateToLatest(); err != nil { + idx.Close() + t.Fatal(err) + } + ix, err := indexer.NewIndexer(indexer.Config{RepoRoot: root, Index: idx}) + if err != nil { + idx.Close() + t.Fatal(err) + } + if err := ix.Run(); err != nil { + idx.Close() + t.Fatal(err) + } + idx.Close() + + // Modify a file WITHOUT committing. + if err := os.WriteFile(filepath.Join(root, "myapp/main.py"), + []byte("from .models import User\nfrom .utils import helper\nimport os\n# uncommitted change\n"), 0o644); err != nil { + t.Fatal(err) + } + + // Capture stdout. + old := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + // Run analyze-change with HEAD..HEAD — committed diff is empty, should fall back to working tree. + baseSHA := "HEAD" + err = executeCmd("--repo", root, "analyze-change", baseSHA+"..HEAD", "--json") + + w.Close() + os.Stdout = old + + if err != nil { + t.Fatalf("analyze-change with working tree: %v", err) + } + + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + // Should detect the uncommitted change via working tree fallback. + if !strings.Contains(output, "working-tree") { + t.Errorf("expected 'working-tree' in ref_range for uncommitted changes: %.200s...", output) + } + if !strings.Contains(output, "myapp/main.py") { + t.Errorf("should detect uncommitted myapp/main.py change: %.200s...", output) + } +} + // --- Unit tests for formatPretty --- func TestFormatPretty_MinimalOutput(t *testing.T) { diff --git a/internal/cli/root.go b/internal/cli/root.go index 2336845..6b1496d 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/kehoej/contextception/internal/config" + "github.com/kehoej/contextception/internal/model" "github.com/kehoej/contextception/internal/update" "github.com/kehoej/contextception/internal/version" "github.com/spf13/cobra" @@ -57,6 +58,11 @@ func NewRootCmd() *cobra.Command { } } + // Check if setup needs re-running after an update. + if msg := CheckSetupFreshness(); msg != "" { + fmt.Fprintln(os.Stderr, msg) + } + // Skip repo root detection for commands that don't need it. if cmd.Name() == "update" || cmd.Name() == "setup" || cmd.Name() == "hook-check" || cmd.Name() == "hook-context" || cmd.Name() == "contextception" { return nil @@ -164,7 +170,7 @@ func registerAnalysisFlags(cmd *cobra.Command) { cmd.Flags().IntVar(&maxTests, "max-tests", 0, "max test entries (default 5, 0 = default)") cmd.Flags().BoolVar(&signatures, "signatures", false, "include code signatures for must_read symbols") cmd.Flags().BoolVar(&ciMode, "ci", false, "CI mode: suppress output, exit code reflects blast radius") - cmd.Flags().StringVar(&failOn, "fail-on", "high", "blast radius level that triggers non-zero exit (high, medium)") + cmd.Flags().StringVar(&failOn, "fail-on", "high", "blast radius level that triggers non-zero exit (high, medium, critical)") cmd.Flags().StringVar(&mode, "mode", "", "workflow mode: plan, implement, review (adjusts caps)") cmd.Flags().IntVar(&tokenBudget, "token-budget", 0, "target token budget for output (adjusts caps automatically)") cmd.Flags().BoolVar(&compactOutput, "compact", false, "token-optimized text summary (~60-75% fewer tokens than JSON)") @@ -172,11 +178,16 @@ func registerAnalysisFlags(cmd *cobra.Command) { // handleCIExit checks the blast radius level and exits with an appropriate code // in CI mode. Returns true if the caller should return (CI mode handled the output). -func handleCIExit(level string) bool { +// The optional report parameter enables --fail-on=critical to check risk triage. +func handleCIExit(level string, report *model.ChangeReport) bool { if !ciMode { return false } switch failOn { + case "critical": + if report != nil && report.RiskTriage != nil && len(report.RiskTriage.Critical) > 0 { + os.Exit(1) + } case "medium": if level == "high" { os.Exit(2) diff --git a/internal/cli/setup.go b/internal/cli/setup.go index 6df58e5..17a6d0f 100644 --- a/internal/cli/setup.go +++ b/internal/cli/setup.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" + "github.com/kehoej/contextception/internal/version" "github.com/spf13/cobra" "github.com/tidwall/gjson" "github.com/tidwall/pretty" @@ -23,10 +24,11 @@ func newSetupCmd() *cobra.Command { cmd := &cobra.Command{ Use: "setup", Short: "Configure contextception for your AI code editor", - Long: `Automatically configure the MCP server and Claude Code hooks for contextception. + Long: `Automatically configure the MCP server, hooks, and slash commands for contextception. -Supports Claude Code (default), Cursor, and Windsurf. For Claude Code, also -installs PreToolUse hooks that remind the AI to call get_context before editing files. +Supports Claude Code, Cursor, and Windsurf. Use --editor=auto (default) to +auto-detect installed editors. For Claude Code, also installs PreToolUse hooks +and the /pr-risk slash command. Run with --uninstall to reverse setup and remove all configuration.`, RunE: func(cmd *cobra.Command, args []string) error { @@ -34,7 +36,7 @@ Run with --uninstall to reverse setup and remove all configuration.`, }, } - cmd.Flags().StringVar(&setupEditor, "editor", "claude", "target editor: claude, cursor, windsurf") + cmd.Flags().StringVar(&setupEditor, "editor", "auto", "target editor: auto, claude, cursor, windsurf") cmd.Flags().BoolVar(&setupDryRun, "dry-run", false, "show what would change without writing files") cmd.Flags().BoolVar(&setupUninstall, "uninstall", false, "remove contextception configuration") @@ -49,6 +51,26 @@ func runSetup(editor string, dryRun bool, uninstall bool) error { return fmt.Errorf("could not determine home directory: %w", err) } + // Auto-detect: find all installed editors and set up each one. + if editor == "auto" { + editors := detectInstalledEditors(home) + if len(editors) == 0 { + return fmt.Errorf("no supported editors detected (looked for Claude Code, Cursor, Windsurf)") + } + fmt.Printf("Detected: %s\n\n", strings.Join(editorNames(editors), ", ")) + for _, ed := range editors { + if err := runSetupForEditor(ed, home, dryRun, uninstall); err != nil { + fmt.Fprintf(os.Stderr, " Warning: %s setup failed: %v\n", editorDisplayName(ed), err) + } + fmt.Println() + } + return nil + } + + return runSetupForEditor(editor, home, dryRun, uninstall) +} + +func runSetupForEditor(editor, home string, dryRun, uninstall bool) error { // Validate editor. mcpPath, hookPath, err := editorPaths(home, editor) if err != nil { @@ -75,7 +97,44 @@ func runSetup(editor string, dryRun bool, uninstall bool) error { return runInstall(editor, mcpPath, hookPath, dryRun) } +// detectInstalledEditors checks for the presence of supported editors. +func detectInstalledEditors(home string) []string { + var found []string + + // Claude Code: ~/.claude/ directory exists. + if dirExists(filepath.Join(home, ".claude")) { + found = append(found, "claude") + } + + // Cursor: ~/.cursor/ directory exists. + if dirExists(filepath.Join(home, ".cursor")) { + found = append(found, "cursor") + } + + // Windsurf: ~/.codeium/windsurf/ directory exists. + if dirExists(filepath.Join(home, ".codeium", "windsurf")) { + found = append(found, "windsurf") + } + + return found +} + +func dirExists(path string) bool { + info, err := os.Stat(path) + return err == nil && info.IsDir() +} + +func editorNames(editors []string) []string { + names := make([]string, len(editors)) + for i, ed := range editors { + names[i] = editorDisplayName(ed) + } + return names +} + func runInstall(editor, mcpPath, hookPath string, dryRun bool) error { + home, _ := os.UserHomeDir() + changed, err := ensureMCPConfig(mcpPath, editor, dryRun) if err != nil { return fmt.Errorf("MCP config: %w", err) @@ -90,6 +149,27 @@ func runInstall(editor, mcpPath, hookPath string, dryRun bool) error { printStatus(changed, dryRun, "PreToolUse hooks", hookPath) } + // Install slash commands (version-stamped so they auto-update on upgrade). + ver := version.Version + for name, content := range slashCommands { + cmdPath := slashCommandDir(home, editor) + if cmdPath == "" { + break + } + fullPath := filepath.Join(cmdPath, name+".md") + stamped := versionStamp(content, ver) + changed, err = ensureSlashCommand(fullPath, stamped, dryRun) + if err != nil { + return fmt.Errorf("slash command %s: %w", name, err) + } + printStatus(changed, dryRun, "/"+name+" command", fullPath) + } + + // Record the version that was set up (for freshness checks). + if !dryRun { + writeSetupVersion(ver) + } + if dryRun { fmt.Println("\nDry run complete. No files were modified.") } else { @@ -99,6 +179,8 @@ func runInstall(editor, mcpPath, hookPath string, dryRun bool) error { } func runUninstall(editor, mcpPath, hookPath string, dryRun bool) error { + home, _ := os.UserHomeDir() + changed, err := removeMCPConfig(mcpPath, dryRun) if err != nil { return fmt.Errorf("MCP config: %w", err) @@ -113,6 +195,20 @@ func runUninstall(editor, mcpPath, hookPath string, dryRun bool) error { printRemoveStatus(changed, dryRun, "PreToolUse hooks", hookPath) } + // Remove slash commands. + for name := range slashCommands { + cmdDir := slashCommandDir(home, editor) + if cmdDir == "" { + break + } + fullPath := filepath.Join(cmdDir, name+".md") + changed, err = removeSlashCommand(fullPath, dryRun) + if err != nil { + return fmt.Errorf("slash command %s: %w", name, err) + } + printRemoveStatus(changed, dryRun, "/"+name+" command", fullPath) + } + if dryRun { fmt.Println("\nDry run complete. No files were modified.") } else { @@ -445,6 +541,304 @@ func printStatus(changed, dryRun bool, label, path string) { } } +// slashCommandDir returns the directory where slash command files are installed. +// Returns "" if the editor doesn't support slash commands. +func slashCommandDir(home, editor string) string { + switch editor { + case "claude": + return filepath.Join(home, ".claude", "commands") + case "cursor": + return filepath.Join(home, ".cursor", "rules") + case "windsurf": + return filepath.Join(home, ".windsurf", "rules") + default: + return "" + } +} + +// slashCommandPath returns the full path for a named command (used by tests). +func slashCommandPath(home, editor string) string { + dir := slashCommandDir(home, editor) + if dir == "" { + return "" + } + return filepath.Join(dir, "pr-risk.md") +} + +// versionStamp returns the content with a version comment prepended. +// This ensures command files update when the binary version changes. +func versionStamp(content, ver string) string { + if ver == "" || ver == "dev" { + return content + } + return "\n" + content +} + +// ensureSlashCommand writes a command file if it doesn't exist or has changed. +func ensureSlashCommand(cmdPath, content string, dryRun bool) (bool, error) { + existing, err := os.ReadFile(cmdPath) + if err == nil && string(existing) == content { + return false, nil // already installed and up to date + } + + if dryRun { + return true, nil + } + + dir := filepath.Dir(cmdPath) + if err := os.MkdirAll(dir, 0o755); err != nil { + return false, fmt.Errorf("creating directory %s: %w", dir, err) + } + return true, os.WriteFile(cmdPath, []byte(content), 0o644) +} + +// removeSlashCommand removes the /pr-risk command file. +func removeSlashCommand(cmdPath string, dryRun bool) (bool, error) { + if _, err := os.Stat(cmdPath); os.IsNotExist(err) { + return false, nil + } + + // Only remove if it's ours (contains "contextception"). + data, err := os.ReadFile(cmdPath) + if err != nil { + return false, err + } + if !strings.Contains(string(data), "contextception") { + return false, nil // not our file + } + + if dryRun { + return true, nil + } + return true, os.Remove(cmdPath) +} + +// slashCommands maps command names to their embedded content. +// Each is installed as a Claude Code command, Cursor rule, or Windsurf rule. +var slashCommands = map[string]string{ + "pr-risk": prRiskCommand, + "pr-fix": prFixCommand, +} + +// prRiskCommand is the embedded /pr-risk slash command content. +const prRiskCommand = `Run a risk analysis on the current branch and present an actionable review. + +## Instructions + +Run this command to get the raw risk data: + +` + "```" + ` +contextception analyze-change --json +` + "```" + ` + +If contextception is not installed or the command fails, tell the user to install it and stop. + +Parse the JSON output and present a review following this structure: + +### 1. One-Sentence Verdict + +Start with a single sentence answering: "Is this PR safe to merge?" + +Based on the aggregate_risk.score: +- 0-20: "This PR is low risk — safe to merge with standard review." +- 21-50: "This PR has moderate risk — a few files deserve closer attention." +- 51-75: "This PR is high risk — targeted testing recommended before merging." +- 76-100: "This PR has critical risk — careful review required, regressions likely without it." + +### 2. Files That Need Attention + +Only show files in REVIEW, TEST, or CRITICAL tiers. Skip SAFE files entirely. + +For each file worth discussing, explain in plain language: +- What it does (infer from path/name) +- Why it's flagged (translate risk_factors — don't say "fragility 0.83", say "this file imports many packages but few things depend on it, making it sensitive to upstream changes") +- What to check (specific advice) + +Group related files. If 5 CLI handlers share the same risk profile, summarize as a group. + +### 3. What's Safe to Skip + +One line: "N files are low risk (new files with tests, documentation, etc.) — no special attention needed." + +### 4. Test Coverage + +If test_coverage_ratio < 0.5, flag it. +Name the most important untested files from test_gaps (not all of them). +Present test_suggestions as actionable items. + +### 5. Offer Next Steps + +End with 2-3 concrete options: +- "Want me to look at [highest-risk file] more closely?" +- "Should I check [file with test gap] for missing test coverage?" +- "Should I run the tests to verify nothing broke?" + +Only offer options that make sense for the specific results. + +### Rules + +- Never show raw JSON, risk scores, or factor lists +- Never say "risk_score", "risk_tier", "risk_factors", or "fragility" +- Keep the entire review under 40 lines +- Use basenames when unambiguous +- If the PR is all SAFE files, keep the review to 3 lines +` + +// prFixCommand is the embedded /pr-fix slash command content. +const prFixCommand = `Analyze a PR or current branch for risk, then build a plan to fix every issue found. + +Use: /pr-fix (current branch vs main) or /pr-fix 123 (open PR #123) + +## Instructions + +### Step 1: Get the diff + +If the user provided a PR number as an argument: + +` + "```" + ` +gh pr view --json baseRefName,headRefName,url,title +` + "```" + ` + +Extract the base and head refs, then: + +` + "```" + ` +contextception analyze-change --json .. +` + "```" + ` + +If no PR number was provided (pre-PR mode on current branch): + +` + "```" + ` +contextception analyze-change --json +` + "```" + ` + +If contextception is not installed, tell the user to install it and stop. + +### Step 2: Brief Risk Assessment + +Present a short assessment (under 15 lines) following the /pr-risk format: +1. One-sentence verdict +2. Files needing attention (names + plain-language reasons) +3. Safe file count +4. Test coverage note + +Keep this brief — the plan is the main event. + +### Step 3: Build the Fix Plan + +Analyze every issue and create a concrete, ordered plan to resolve them. + +For each category, generate specific tasks: + +**Test gaps** (from test_gaps and test_suggestions): +- For each high-risk untested file, name the test file to create and what to test +- Example: "Create handler_test.go — test the analytics wiring by verifying RecordUsage is called correctly" + +**Coupling risks** (from coupling where direction is "mutual" or circular_deps): +- Suggest integration tests or dependency direction fixes +- Example: "server.go and tools.go have a mutual dependency — add a test covering the full MCP request path" + +**High-fragility files** (risk_factors containing "fragility" in TEST/CRITICAL tiers): +- Suggest reducing imports or adding interface boundaries +- Only for TEST/CRITICAL — don't over-optimize REVIEW files + +**Foundation file coverage** (files with 5+ importers and no direct tests): +- Prioritize testing these — a bug here affects many consumers +- Example: "history.go has 10 importers — add migration tests for backward compatibility" + +### Step 4: Order by Impact + +1. Tests for CRITICAL/TEST tier files (highest regression prevention value) +2. Tests for foundation files (high importer count) +3. Coupling fixes (mutual/circular deps) +4. Everything else + +### Step 5: Present the Plan + +` + "```" + ` +## Fix Plan (N tasks) + +Addressing these will bring the PR from [CURRENT LEVEL] toward lower risk. + +1. **[File]**: [What to do and why] +2. **[File]**: [What to do and why] +... +` + "```" + ` + +### Step 6: Execute on Request + +After presenting the plan, ask: + +"Want me to start working through this plan? I'll tackle them in order." + +If yes, work through tasks one at a time: +- Before each: say what you're about to do +- After each: re-run contextception analyze-change --json and verify improvement +- Report: "Task 1/N complete — risk moved from [LEVEL] toward [LEVEL]" + +### Rules + +- Never show raw JSON or numeric scores +- Translate all technical factors into plain language +- Each task must be specific enough to execute immediately +- Don't suggest fixes for SAFE files +- Don't suggest architectural refactors for REVIEW files +- If all files are SAFE, say "This PR looks clean — no fixes needed" and stop +- In PR mode, mention the PR title and link for context +` + +// setupVersionFile returns the path to the file that tracks which version last ran setup. +func setupVersionFile() string { + configDir, err := os.UserConfigDir() + if err != nil { + return "" + } + return filepath.Join(configDir, "contextception", "setup-version") +} + +// writeSetupVersion records the current binary version as the last setup version. +func writeSetupVersion(ver string) { + path := setupVersionFile() + if path == "" { + return + } + os.MkdirAll(filepath.Dir(path), 0o755) //nolint:errcheck + os.WriteFile(path, []byte(ver), 0o644) //nolint:errcheck +} + +// readSetupVersion returns the version that last ran setup, or "" if unknown. +func readSetupVersion() string { + path := setupVersionFile() + if path == "" { + return "" + } + data, err := os.ReadFile(path) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} + +// CheckSetupFreshness compares the current binary version to the last setup version. +// Returns a user-facing message if setup should be re-run, or "" if up to date. +func CheckSetupFreshness() string { + current := version.Version + if current == "" || current == "dev" { + return "" + } + last := readSetupVersion() + if last == "" { + // Never run setup — don't nag, they may not use editor integrations. + return "" + } + if last == current { + return "" + } + return fmt.Sprintf( + "contextception updated (%s → %s). Run 'contextception setup' to update editor commands.", + last, current, + ) +} + func printRemoveStatus(changed, dryRun bool, label, path string) { display := tildeDisplay(path) if dryRun { diff --git a/internal/cli/setup_test.go b/internal/cli/setup_test.go index 11e9e9d..68a1573 100644 --- a/internal/cli/setup_test.go +++ b/internal/cli/setup_test.go @@ -603,3 +603,145 @@ func TestSetup_UnsupportedEditor(t *testing.T) { t.Fatalf("unexpected error: %v", err) } } + +func TestSetup_SlashCommand_Install(t *testing.T) { + home := t.TempDir() + cmdPath := filepath.Join(home, ".claude", "commands", "pr-risk.md") + + changed, err := ensureSlashCommand(cmdPath, prRiskCommand, false) + if err != nil { + t.Fatal(err) + } + if !changed { + t.Fatal("expected changed=true for fresh install") + } + + // Verify file was written. + data, err := os.ReadFile(cmdPath) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(data), "contextception analyze-change") { + t.Fatal("command file should contain contextception analyze-change") + } + if !strings.Contains(string(data), "One-Sentence Verdict") { + t.Fatal("command file should contain review structure") + } +} + +func TestSetup_SlashCommand_Idempotent(t *testing.T) { + home := t.TempDir() + cmdPath := filepath.Join(home, ".claude", "commands", "pr-risk.md") + + // First install. + _, err := ensureSlashCommand(cmdPath, prRiskCommand, false) + if err != nil { + t.Fatal(err) + } + + // Second install should be no-op. + changed, err := ensureSlashCommand(cmdPath, prRiskCommand, false) + if err != nil { + t.Fatal(err) + } + if changed { + t.Fatal("expected changed=false on second install") + } +} + +func TestSetup_SlashCommand_Remove(t *testing.T) { + home := t.TempDir() + cmdPath := filepath.Join(home, ".claude", "commands", "pr-risk.md") + + // Install first. + _, err := ensureSlashCommand(cmdPath, prRiskCommand, false) + if err != nil { + t.Fatal(err) + } + + // Remove. + changed, err := removeSlashCommand(cmdPath, false) + if err != nil { + t.Fatal(err) + } + if !changed { + t.Fatal("expected changed=true for removal") + } + + // File should be gone. + if _, err := os.Stat(cmdPath); !os.IsNotExist(err) { + t.Fatal("command file should be removed") + } +} + +func TestSetup_SlashCommand_RemoveNonExistent(t *testing.T) { + home := t.TempDir() + cmdPath := filepath.Join(home, ".claude", "commands", "pr-risk.md") + + changed, err := removeSlashCommand(cmdPath, false) + if err != nil { + t.Fatal(err) + } + if changed { + t.Fatal("expected changed=false when file doesn't exist") + } +} + +func TestSetup_SlashCommand_DryRun(t *testing.T) { + home := t.TempDir() + cmdPath := filepath.Join(home, ".claude", "commands", "pr-risk.md") + + changed, err := ensureSlashCommand(cmdPath, prRiskCommand, true) + if err != nil { + t.Fatal(err) + } + if !changed { + t.Fatal("dry-run should report changed=true") + } + + // File should NOT be written. + if _, err := os.Stat(cmdPath); !os.IsNotExist(err) { + t.Fatal("dry-run should not write the file") + } +} + +func TestSetup_DetectEditors(t *testing.T) { + home := t.TempDir() + + // No editors installed. + editors := detectInstalledEditors(home) + if len(editors) != 0 { + t.Fatalf("expected 0 editors, got %v", editors) + } + + // Install Claude Code. + os.MkdirAll(filepath.Join(home, ".claude"), 0o755) + editors = detectInstalledEditors(home) + if len(editors) != 1 || editors[0] != "claude" { + t.Fatalf("expected [claude], got %v", editors) + } + + // Also install Cursor. + os.MkdirAll(filepath.Join(home, ".cursor"), 0o755) + editors = detectInstalledEditors(home) + if len(editors) != 2 { + t.Fatalf("expected 2 editors, got %v", editors) + } +} + +func TestSetup_SlashCommandPaths(t *testing.T) { + tests := []struct { + editor string + expect string + }{ + {"claude", ".claude/commands/pr-risk.md"}, + {"cursor", ".cursor/rules/pr-risk.md"}, + {"windsurf", ".windsurf/rules/pr-risk.md"}, + } + for _, tt := range tests { + got := slashCommandPath("/home/test", tt.editor) + if !strings.HasSuffix(got, tt.expect) { + t.Errorf("editor=%s: got %q, want suffix %q", tt.editor, got, tt.expect) + } + } +} diff --git a/internal/cli/update.go b/internal/cli/update.go index 50f490d..710e69c 100644 --- a/internal/cli/update.go +++ b/internal/cli/update.go @@ -2,6 +2,7 @@ package cli import ( "fmt" + "os" "github.com/kehoej/contextception/internal/update" "github.com/kehoej/contextception/internal/version" @@ -52,6 +53,13 @@ func newUpdateCmd() *cobra.Command { } fmt.Printf("Updated contextception from %s to %s\n", currentVersion, latestVersion) + + // Auto-run setup to install updated slash commands. + fmt.Println("\nUpdating editor integrations...") + if err := runSetup("auto", false, false); err != nil { + fmt.Fprintf(os.Stderr, "Warning: auto-setup failed: %v\n", err) + fmt.Fprintln(os.Stderr, "Run 'contextception setup' manually to update editor commands.") + } return nil }, } diff --git a/internal/history/history.go b/internal/history/history.go index bce0217..a4b316e 100644 --- a/internal/history/history.go +++ b/internal/history/history.go @@ -146,6 +146,18 @@ func migrate(db *sql.DB) error { CREATE INDEX IF NOT EXISTS idx_feedback_file ON feedback(file_path); CREATE INDEX IF NOT EXISTS idx_feedback_usefulness ON feedback(usefulness); + + CREATE TABLE IF NOT EXISTS risk_scores ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + file_path TEXT NOT NULL, + risk_score INTEGER NOT NULL, + score_version INTEGER NOT NULL DEFAULT 1, + ref_range TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_risk_scores_version ON risk_scores(score_version); + CREATE INDEX IF NOT EXISTS idx_risk_scores_file ON risk_scores(file_path); ` _, err := db.Exec(schema) return err diff --git a/internal/history/percentile.go b/internal/history/percentile.go new file mode 100644 index 0000000..296fc8c --- /dev/null +++ b/internal/history/percentile.go @@ -0,0 +1,85 @@ +package history + +import "fmt" + +// CurrentScoreVersion is the current risk score formula version. +// Bump this when the scoring formula changes to invalidate old percentiles. +const CurrentScoreVersion = 1 + +// minRecordsForPercentile is the minimum number of records needed to compute percentiles. +const minRecordsForPercentile = 10 + +// StoreRiskScore records a per-file risk score in the history database. +func (s *Store) StoreRiskScore(filePath string, riskScore int, refRange string) error { + _, err := s.db.Exec( + `INSERT INTO risk_scores (file_path, risk_score, score_version, ref_range) VALUES (?, ?, ?, ?)`, + filePath, riskScore, CurrentScoreVersion, refRange, + ) + if err != nil { + return fmt.Errorf("storing risk score: %w", err) + } + return nil +} + +// StoreRiskScores records multiple per-file risk scores in a single transaction. +func (s *Store) StoreRiskScores(scores []RiskScoreEntry) error { + if len(scores) == 0 { + return nil + } + tx, err := s.db.Begin() + if err != nil { + return fmt.Errorf("beginning transaction: %w", err) + } + defer tx.Rollback() + + stmt, err := tx.Prepare( + `INSERT INTO risk_scores (file_path, risk_score, score_version, ref_range) VALUES (?, ?, ?, ?)`, + ) + if err != nil { + return fmt.Errorf("preparing statement: %w", err) + } + defer stmt.Close() + + for _, entry := range scores { + if _, err := stmt.Exec(entry.FilePath, entry.RiskScore, CurrentScoreVersion, entry.RefRange); err != nil { + return fmt.Errorf("inserting risk score for %s: %w", entry.FilePath, err) + } + } + + return tx.Commit() +} + +// RiskScoreEntry holds data for a single risk score record. +type RiskScoreEntry struct { + FilePath string + RiskScore int + RefRange string +} + +// ComputePercentile returns the percentile ranking for a given score. +// Returns 0 if fewer than minRecordsForPercentile records exist with the current score version. +func (s *Store) ComputePercentile(score int) (int, error) { + var count int + err := s.db.QueryRow( + `SELECT COUNT(*) FROM risk_scores WHERE score_version = ?`, + CurrentScoreVersion, + ).Scan(&count) + if err != nil { + return 0, fmt.Errorf("counting risk scores: %w", err) + } + if count < minRecordsForPercentile { + return 0, nil + } + + var belowCount int + err = s.db.QueryRow( + `SELECT COUNT(*) FROM risk_scores WHERE score_version = ? AND risk_score < ?`, + CurrentScoreVersion, score, + ).Scan(&belowCount) + if err != nil { + return 0, fmt.Errorf("computing percentile: %w", err) + } + + percentile := (belowCount * 100) / count + return percentile, nil +} diff --git a/internal/history/percentile_test.go b/internal/history/percentile_test.go new file mode 100644 index 0000000..5dfffe6 --- /dev/null +++ b/internal/history/percentile_test.go @@ -0,0 +1,219 @@ +package history + +import ( + "os" + "path/filepath" + "testing" +) + +func openTestStore(t *testing.T) *Store { + t.Helper() + dir := t.TempDir() + // Create .contextception directory. + ccDir := filepath.Join(dir, ".contextception") + if err := os.MkdirAll(ccDir, 0o755); err != nil { + t.Fatalf("creating dir: %v", err) + } + store, err := Open(dir) + if err != nil { + t.Fatalf("opening store: %v", err) + } + t.Cleanup(func() { store.Close() }) + return store +} + +func TestStoreRiskScore_Insert(t *testing.T) { + store := openTestStore(t) + err := store.StoreRiskScore("pkg/core.go", 72, "main..HEAD") + if err != nil { + t.Fatalf("StoreRiskScore: %v", err) + } + + // Verify it was inserted. + var count int + err = store.db.QueryRow("SELECT COUNT(*) FROM risk_scores").Scan(&count) + if err != nil { + t.Fatalf("query: %v", err) + } + if count != 1 { + t.Errorf("count=%d, want 1", count) + } +} + +func TestStoreRiskScores_Batch(t *testing.T) { + store := openTestStore(t) + entries := []RiskScoreEntry{ + {FilePath: "a.go", RiskScore: 50, RefRange: "main..HEAD"}, + {FilePath: "b.go", RiskScore: 30, RefRange: "main..HEAD"}, + {FilePath: "c.go", RiskScore: 80, RefRange: "main..HEAD"}, + } + err := store.StoreRiskScores(entries) + if err != nil { + t.Fatalf("StoreRiskScores: %v", err) + } + + var count int + err = store.db.QueryRow("SELECT COUNT(*) FROM risk_scores").Scan(&count) + if err != nil { + t.Fatalf("query: %v", err) + } + if count != 3 { + t.Errorf("count=%d, want 3", count) + } +} + +func TestStoreRiskScores_Empty(t *testing.T) { + store := openTestStore(t) + err := store.StoreRiskScores(nil) + if err != nil { + t.Fatalf("StoreRiskScores with nil: %v", err) + } +} + +func TestComputePercentile_SufficientData(t *testing.T) { + store := openTestStore(t) + + // Insert 15 records with various scores. + for i := 0; i < 15; i++ { + score := (i + 1) * 6 // 6, 12, 18, 24, ..., 90 + err := store.StoreRiskScore("file.go", score, "ref") + if err != nil { + t.Fatalf("insert: %v", err) + } + } + + pct, err := store.ComputePercentile(72) + if err != nil { + t.Fatalf("ComputePercentile: %v", err) + } + // 72 is higher than scores 6,12,18,24,30,36,42,48,54,60,66 (11 of 15) = 73% + if pct < 50 || pct > 90 { + t.Errorf("percentile=%d, want 50-90 for score 72 out of 15 records", pct) + } + t.Logf("percentile=%d", pct) +} + +func TestComputePercentile_InsufficientData(t *testing.T) { + store := openTestStore(t) + + // Insert only 5 records. + for i := 0; i < 5; i++ { + err := store.StoreRiskScore("file.go", (i+1)*20, "ref") + if err != nil { + t.Fatalf("insert: %v", err) + } + } + + pct, err := store.ComputePercentile(50) + if err != nil { + t.Fatalf("ComputePercentile: %v", err) + } + if pct != 0 { + t.Errorf("percentile=%d, want 0 (insufficient data)", pct) + } +} + +func TestComputePercentile_EmptyTable(t *testing.T) { + store := openTestStore(t) + + pct, err := store.ComputePercentile(50) + if err != nil { + t.Fatalf("ComputePercentile: %v", err) + } + if pct != 0 { + t.Errorf("percentile=%d, want 0 (empty table)", pct) + } +} + +func TestComputePercentile_ScoreVersionFilter(t *testing.T) { + store := openTestStore(t) + + // Insert 10 records with version 1 (current). + for i := 0; i < 10; i++ { + err := store.StoreRiskScore("file.go", (i+1)*10, "ref") + if err != nil { + t.Fatalf("insert v1: %v", err) + } + } + + // Insert 10 records with a different version (simulate old formula). + for i := 0; i < 10; i++ { + _, err := store.db.Exec( + `INSERT INTO risk_scores (file_path, risk_score, score_version, ref_range) VALUES (?, ?, ?, ?)`, + "file.go", (i+1)*10, 999, "ref", + ) + if err != nil { + t.Fatalf("insert v999: %v", err) + } + } + + // Percentile should only consider current version. + pct, err := store.ComputePercentile(50) + if err != nil { + t.Fatalf("ComputePercentile: %v", err) + } + // Score 50 is above 10,20,30,40 (4 of 10) = 40% + if pct != 40 { + t.Errorf("percentile=%d, want 40", pct) + } +} + +func TestRiskScoresTable_Migration(t *testing.T) { + store := openTestStore(t) + + // Verify the table exists by querying its schema. + var tableName string + err := store.db.QueryRow( + `SELECT name FROM sqlite_master WHERE type='table' AND name='risk_scores'`, + ).Scan(&tableName) + if err != nil { + t.Fatalf("risk_scores table not found: %v", err) + } + if tableName != "risk_scores" { + t.Errorf("table name: got %q, want 'risk_scores'", tableName) + } + + // Verify all expected columns exist via insert+query. + _, err = store.db.Exec( + `INSERT INTO risk_scores (file_path, risk_score, score_version, ref_range) + VALUES ('test.go', 50, 1, 'main..HEAD')`, + ) + if err != nil { + t.Fatalf("insert: %v", err) + } + + var filePath string + var riskScore, scoreVersion int + var refRange string + var createdAt string + err = store.db.QueryRow( + `SELECT file_path, risk_score, score_version, ref_range, created_at FROM risk_scores WHERE id=1`, + ).Scan(&filePath, &riskScore, &scoreVersion, &refRange, &createdAt) + if err != nil { + t.Fatalf("query: %v", err) + } + if filePath != "test.go" { + t.Errorf("file_path: got %q, want 'test.go'", filePath) + } + if riskScore != 50 { + t.Errorf("risk_score: got %d, want 50", riskScore) + } + if scoreVersion != 1 { + t.Errorf("score_version: got %d, want 1", scoreVersion) + } + if createdAt == "" { + t.Error("created_at should be auto-populated") + } + + // Verify indexes exist. + var indexCount int + err = store.db.QueryRow( + `SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name LIKE 'idx_risk_scores%'`, + ).Scan(&indexCount) + if err != nil { + t.Fatalf("index query: %v", err) + } + if indexCount < 2 { + t.Errorf("expected at least 2 risk_scores indexes, got %d", indexCount) + } +} diff --git a/internal/mcpserver/server_test.go b/internal/mcpserver/server_test.go index 7ac394b..d87d891 100644 --- a/internal/mcpserver/server_test.go +++ b/internal/mcpserver/server_test.go @@ -1288,11 +1288,41 @@ func TestAnalyzeChangeMCP(t *testing.T) { for _, f := range report.ChangedFiles { if f.File == "myapp/main.py" { found = true + // Verify risk fields are populated. + if f.RiskScore == 0 { + t.Error("expected non-zero risk_score for modified myapp/main.py") + } + if f.RiskTier == "" { + t.Error("expected non-empty risk_tier") + } + if len(f.RiskFactors) == 0 { + t.Error("expected non-empty risk_factors") + } } } if !found { t.Error("expected myapp/main.py in changed_files") } + + // Verify risk triage is populated. + if report.RiskTriage == nil { + t.Error("expected risk_triage to be present") + } + if report.AggregateRisk == nil { + t.Error("expected aggregate_risk to be present") + } else if report.AggregateRisk.Score == 0 { + t.Error("expected non-zero aggregate risk score") + } + + // Verify MCP sorts changed files by risk score descending. + for i := 1; i < len(report.ChangedFiles); i++ { + prev := report.ChangedFiles[i-1].RiskScore + curr := report.ChangedFiles[i].RiskScore + if prev < curr { + t.Errorf("changed_files not sorted by risk: file[%d]=%d < file[%d]=%d", + i-1, prev, i, curr) + } + } } func TestAnalyzeChangeAutoDetect(t *testing.T) { diff --git a/internal/mcpserver/tools.go b/internal/mcpserver/tools.go index 60d7953..2ca9871 100644 --- a/internal/mcpserver/tools.go +++ b/internal/mcpserver/tools.go @@ -649,12 +649,40 @@ func (cs *ContextServer) handleAnalyzeChange(ctx context.Context, req *mcp.CallT } durationMs := time.Since(start).Milliseconds() - // Record usage analytics (best-effort). + // Record usage analytics and risk scores (best-effort). if hist := cs.ensureHistory(); hist != nil { + // Store risk scores for percentile tracking. + var riskEntries []history.RiskScoreEntry + for _, cf := range report.ChangedFiles { + if cf.RiskScore > 0 { + riskEntries = append(riskEntries, history.RiskScoreEntry{ + FilePath: cf.File, + RiskScore: cf.RiskScore, + RefRange: base + ".." + head, + }) + } + } + _ = hist.StoreRiskScores(riskEntries) + + // Compute percentile for aggregate risk. + if report.AggregateRisk != nil { + if pct, err := hist.ComputePercentile(report.AggregateRisk.Score); err == nil && pct > 0 { + report.AggregateRisk.Percentile = pct + } + } + entry := history.UsageEntryFromChangeReport("mcp", nil, report, durationMs, "", 0) _, _ = hist.RecordUsage(entry) } + // Sort changed files by risk score descending, then alphabetically. + sort.Slice(report.ChangedFiles, func(i, j int) bool { + if report.ChangedFiles[i].RiskScore != report.ChangedFiles[j].RiskScore { + return report.ChangedFiles[i].RiskScore > report.ChangedFiles[j].RiskScore + } + return report.ChangedFiles[i].File < report.ChangedFiles[j].File + }) + return jsonResult(report), nil, nil } diff --git a/internal/model/change.go b/internal/model/change.go index d4c5a92..19c49fe 100644 --- a/internal/model/change.go +++ b/internal/model/change.go @@ -8,3 +8,6 @@ type ChangedFile = schema.ChangedFile type ChangeSummary = schema.ChangeSummary type CouplingPair = schema.CouplingPair type HiddenCouplingEntry = schema.HiddenCouplingEntry +type RiskTriage = schema.RiskTriage +type AggregateRisk = schema.AggregateRisk +type TestSuggestion = schema.TestSuggestion diff --git a/internal/model/change_test.go b/internal/model/change_test.go new file mode 100644 index 0000000..bca5c98 --- /dev/null +++ b/internal/model/change_test.go @@ -0,0 +1,58 @@ +package model + +import ( + "testing" + + "github.com/kehoej/contextception/schema" +) + +// TestTypeAliases verifies that model type aliases resolve to the correct schema types. +func TestTypeAliases(t *testing.T) { + // Verify new risk-related aliases are assignable from schema types. + // The explicit type on the left is intentional — it's a compile-time check + // that the alias resolves correctly. If the type doesn't match, this won't build. + var rt RiskTriage = schema.RiskTriage{Critical: []string{"a.go"}} //nolint:staticcheck + if len(rt.Critical) != 1 { + t.Errorf("RiskTriage.Critical: got %d, want 1", len(rt.Critical)) + } + + var ar AggregateRisk = schema.AggregateRisk{Score: 80} //nolint:staticcheck + if ar.Score != 80 { + t.Errorf("AggregateRisk.Score: got %d, want 80", ar.Score) + } + + var ts TestSuggestion = schema.TestSuggestion{File: "b.go"} //nolint:staticcheck + if ts.File != "b.go" { + t.Errorf("TestSuggestion.File: got %q, want b.go", ts.File) + } + + // Verify ChangedFile risk fields are accessible via model alias. + cf := ChangedFile{ + RiskScore: 42, + RiskTier: "REVIEW", + } + if cf.RiskScore != 42 { + t.Errorf("RiskScore: got %d, want 42", cf.RiskScore) + } + if cf.RiskTier != "REVIEW" { + t.Errorf("RiskTier: got %q, want REVIEW", cf.RiskTier) + } + + // Verify ChangeReport risk fields are accessible. + report := ChangeReport{ + RiskTriage: &RiskTriage{Critical: []string{"a.go"}}, + AggregateRisk: &AggregateRisk{Score: 80}, + TestSuggestions: []TestSuggestion{ + {File: "b.go", SuggestedTest: "test", Reason: "reason"}, + }, + } + if len(report.RiskTriage.Critical) != 1 { + t.Errorf("RiskTriage.Critical: got %d items, want 1", len(report.RiskTriage.Critical)) + } + if report.AggregateRisk.Score != 80 { + t.Errorf("AggregateRisk.Score: got %d, want 80", report.AggregateRisk.Score) + } + if len(report.TestSuggestions) != 1 { + t.Errorf("TestSuggestions: got %d items, want 1", len(report.TestSuggestions)) + } +} diff --git a/protocol/change-schema.json b/protocol/change-schema.json index cdfd9c9..abb0330 100644 --- a/protocol/change-schema.json +++ b/protocol/change-schema.json @@ -4,6 +4,27 @@ "title": "ChangeReport", "description": "Contextception change report (schema v1.0). Describes the impact analysis for a branch diff.", "$defs": { + "AggregateRisk": { + "type": "object", + "properties": { + "percentile": { + "type": "integer" + }, + "regression_risk": { + "type": "string" + }, + "score": { + "type": "integer" + }, + "test_coverage_ratio": { + "type": "number" + } + }, + "required": [ + "score", + "test_coverage_ratio" + ] + }, "BlastRadius": { "type": "object", "properties": { @@ -25,6 +46,16 @@ "ChangeReport": { "type": "object", "properties": { + "aggregate_risk": { + "oneOf": [ + { + "$ref": "#/$defs/AggregateRisk" + }, + { + "type": "null" + } + ] + }, "blast_radius": { "$ref": "#/$defs/BlastRadius" }, @@ -79,6 +110,16 @@ "ref_range": { "type": "string" }, + "risk_triage": { + "oneOf": [ + { + "$ref": "#/$defs/RiskTriage" + }, + { + "type": "null" + } + ] + }, "schema_version": { "type": "string" }, @@ -101,6 +142,12 @@ "type": "string" } }, + "test_suggestions": { + "type": "array", + "items": { + "$ref": "#/$defs/TestSuggestion" + } + }, "tests": { "type": "array", "items": { @@ -177,6 +224,21 @@ "indexed": { "type": "boolean" }, + "risk_factors": { + "type": "array", + "items": { + "type": "string" + } + }, + "risk_narrative": { + "type": "string" + }, + "risk_score": { + "type": "integer" + }, + "risk_tier": { + "type": "string" + }, "status": { "type": "string" } @@ -310,6 +372,41 @@ "file" ] }, + "RiskTriage": { + "type": "object", + "properties": { + "critical": { + "type": "array", + "items": { + "type": "string" + } + }, + "review": { + "type": "array", + "items": { + "type": "string" + } + }, + "safe": { + "type": "array", + "items": { + "type": "string" + } + }, + "test": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "critical", + "test", + "review", + "safe" + ] + }, "TestEntry": { "type": "object", "properties": { @@ -324,6 +421,25 @@ "file", "direct" ] + }, + "TestSuggestion": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "reason": { + "type": "string" + }, + "suggested_test": { + "type": "string" + } + }, + "required": [ + "file", + "suggested_test", + "reason" + ] } }, "$ref": "#/$defs/ChangeReport" diff --git a/schema/change.go b/schema/change.go index 2ce0715..a602180 100644 --- a/schema/change.go +++ b/schema/change.go @@ -40,10 +40,42 @@ type ChangeReport struct { // Hidden coupling: co-change partners not in the diff. HiddenCoupling []HiddenCouplingEntry `json:"hidden_coupling,omitempty"` + // Risk triage: files grouped by risk tier. + RiskTriage *RiskTriage `json:"risk_triage,omitempty"` + + // Aggregate risk score across all changed files. + AggregateRisk *AggregateRisk `json:"aggregate_risk,omitempty"` + + // Test suggestions for high-risk untested files. + TestSuggestions []TestSuggestion `json:"test_suggestions,omitempty"` + // Index stats at time of analysis. Stats *IndexStats `json:"stats,omitempty"` } +// RiskTriage groups files by risk tier. +type RiskTriage struct { + Critical []string `json:"critical"` + Test []string `json:"test"` + Review []string `json:"review"` + Safe []string `json:"safe"` +} + +// AggregateRisk summarizes overall PR risk. +type AggregateRisk struct { + Score int `json:"score"` + Percentile int `json:"percentile,omitempty"` + RegressionRisk string `json:"regression_risk,omitempty"` + TestCoverageRatio float64 `json:"test_coverage_ratio"` +} + +// TestSuggestion suggests a test for a file missing coverage. +type TestSuggestion struct { + File string `json:"file"` + SuggestedTest string `json:"suggested_test"` + Reason string `json:"reason"` +} + // ChangedFile represents a single file in the diff with its individual analysis. type ChangedFile struct { File string `json:"file"` @@ -54,6 +86,12 @@ type ChangedFile struct { // Whether this file is indexed (new files may not be). Indexed bool `json:"indexed"` + + // Per-file risk scoring. + RiskScore int `json:"risk_score,omitempty"` + RiskTier string `json:"risk_tier,omitempty"` + RiskFactors []string `json:"risk_factors,omitempty"` + RiskNarrative string `json:"risk_narrative,omitempty"` } // ChangeSummary provides aggregate statistics about the change. diff --git a/schema/change_test.go b/schema/change_test.go new file mode 100644 index 0000000..3b3fec6 --- /dev/null +++ b/schema/change_test.go @@ -0,0 +1,196 @@ +package schema + +import ( + "encoding/json" + "testing" +) + +func TestRiskTriage_JSONRoundTrip(t *testing.T) { + original := &RiskTriage{ + Critical: []string{"core.go"}, + Test: []string{"handler.go"}, + Review: []string{"util.go", "config.go"}, + Safe: []string{"readme.md"}, + } + + data, err := json.Marshal(original) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var decoded RiskTriage + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if len(decoded.Critical) != 1 || decoded.Critical[0] != "core.go" { + t.Errorf("critical: got %v, want [core.go]", decoded.Critical) + } + if len(decoded.Test) != 1 || decoded.Test[0] != "handler.go" { + t.Errorf("test: got %v, want [handler.go]", decoded.Test) + } + if len(decoded.Review) != 2 { + t.Errorf("review: got %d items, want 2", len(decoded.Review)) + } + if len(decoded.Safe) != 1 { + t.Errorf("safe: got %d items, want 1", len(decoded.Safe)) + } + + // Verify JSON field names are correct. + var raw map[string]any + json.Unmarshal(data, &raw) + for _, key := range []string{"critical", "test", "review", "safe"} { + if _, ok := raw[key]; !ok { + t.Errorf("missing JSON key %q", key) + } + } +} + +func TestAggregateRisk_JSONRoundTrip(t *testing.T) { + original := &AggregateRisk{ + Score: 72, + Percentile: 85, + RegressionRisk: "history.go: 10 importers, 3 untested", + TestCoverageRatio: 0.57, + } + + data, err := json.Marshal(original) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var decoded AggregateRisk + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if decoded.Score != 72 { + t.Errorf("score: got %d, want 72", decoded.Score) + } + if decoded.Percentile != 85 { + t.Errorf("percentile: got %d, want 85", decoded.Percentile) + } + if decoded.RegressionRisk != original.RegressionRisk { + t.Errorf("regression_risk: got %q, want %q", decoded.RegressionRisk, original.RegressionRisk) + } + if decoded.TestCoverageRatio != 0.57 { + t.Errorf("test_coverage_ratio: got %f, want 0.57", decoded.TestCoverageRatio) + } +} + +func TestAggregateRisk_OmitemptyPercentile(t *testing.T) { + // Percentile=0 should be omitted from JSON. + original := &AggregateRisk{Score: 30, TestCoverageRatio: 0.5} + data, err := json.Marshal(original) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var raw map[string]any + json.Unmarshal(data, &raw) + if _, ok := raw["percentile"]; ok { + t.Error("percentile=0 should be omitted via omitempty") + } + // regression_risk="" should also be omitted. + if _, ok := raw["regression_risk"]; ok { + t.Error("empty regression_risk should be omitted via omitempty") + } + // score and test_coverage_ratio should always be present. + if _, ok := raw["score"]; !ok { + t.Error("score should always be present") + } + if _, ok := raw["test_coverage_ratio"]; !ok { + t.Error("test_coverage_ratio should always be present") + } +} + +func TestTestSuggestion_JSONRoundTrip(t *testing.T) { + original := &TestSuggestion{ + File: "pkg/handler.go", + SuggestedTest: "Test handler.go's use of Query from db.go", + Reason: "handler.go imports Query but has no direct test", + } + + data, err := json.Marshal(original) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var decoded TestSuggestion + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if decoded.File != original.File { + t.Errorf("file: got %q, want %q", decoded.File, original.File) + } + if decoded.SuggestedTest != original.SuggestedTest { + t.Errorf("suggested_test: got %q, want %q", decoded.SuggestedTest, original.SuggestedTest) + } + if decoded.Reason != original.Reason { + t.Errorf("reason: got %q, want %q", decoded.Reason, original.Reason) + } + + // Verify JSON field names. + var raw map[string]any + json.Unmarshal(data, &raw) + for _, key := range []string{"file", "suggested_test", "reason"} { + if _, ok := raw[key]; !ok { + t.Errorf("missing JSON key %q", key) + } + } +} + +func TestChangedFile_RiskFieldsOmitempty(t *testing.T) { + // A changed file with no risk data should omit risk fields. + cf := ChangedFile{File: "readme.md", Status: "modified", Indexed: false} + data, err := json.Marshal(cf) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var raw map[string]any + json.Unmarshal(data, &raw) + for _, key := range []string{"risk_score", "risk_tier", "risk_factors", "risk_narrative"} { + if _, ok := raw[key]; ok { + t.Errorf("zero-value %q should be omitted via omitempty", key) + } + } + + // A changed file with risk data should include them. + cf2 := ChangedFile{ + File: "core.go", Status: "modified", Indexed: true, + RiskScore: 72, RiskTier: "TEST", + RiskFactors: []string{"modified", "5 importers"}, + RiskNarrative: "Modified. 5 importers.", + } + data2, _ := json.Marshal(cf2) + var raw2 map[string]any + json.Unmarshal(data2, &raw2) + for _, key := range []string{"risk_score", "risk_tier", "risk_factors", "risk_narrative"} { + if _, ok := raw2[key]; !ok { + t.Errorf("non-zero %q should be present", key) + } + } +} + +func TestChangeReport_RiskTriageOmitempty(t *testing.T) { + // Report with nil risk_triage should omit the field. + report := ChangeReport{SchemaVersion: "1.0", RefRange: "a..b"} + data, err := json.Marshal(report) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + var raw map[string]any + json.Unmarshal(data, &raw) + if _, ok := raw["risk_triage"]; ok { + t.Error("nil risk_triage should be omitted") + } + if _, ok := raw["aggregate_risk"]; ok { + t.Error("nil aggregate_risk should be omitted") + } + if _, ok := raw["test_suggestions"]; ok { + t.Error("nil test_suggestions should be omitted") + } +}