diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1c68258..0a26f40 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,5 +36,18 @@ jobs: - name: Run ruff run: python -m ruff check src/ tests/ - - name: Run tests - run: python -m pytest tests/ -v --tb=short + - name: Run tests (with coverage on ubuntu x py3.11) + run: | + if [ "${{ matrix.os }}" = "ubuntu-latest" ] && [ "${{ matrix.python-version }}" = "3.11" ]; then + python -m pytest tests/ -v --tb=short --cov --cov-report=term-missing --cov-report=xml + else + python -m pytest tests/ -v --tb=short + fi + shell: bash + + - name: Upload coverage artifact + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' + uses: actions/upload-artifact@v4 + with: + name: coverage-attune-author + path: coverage.xml diff --git a/docs/specs/regen-pipeline/design.md b/docs/specs/regen-pipeline/design.md new file mode 100644 index 0000000..718abf3 --- /dev/null +++ b/docs/specs/regen-pipeline/design.md @@ -0,0 +1,224 @@ +# Spec: Regen Pipeline — Design + +## Phase 2: Design + +**Status**: in-review + +--- + +### Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ attune-gui React UI │ +│ CorpusSetup → path input + Browse button + Load button │ +│ App → DashboardSummaryBar + "Regen all stale" button │ +│ StaleBadge → per-row refresh (unchanged) │ +└────────────────────────┬─────────────────────────────────────────┘ + │ HTTP / WebSocket +┌────────────────────────▼─────────────────────────────────────────┐ +│ attune-gui FastAPI sidecar │ +│ GET /api/config ← read current corpus root │ +│ POST /api/config ← set corpus root + reload │ +│ GET /api/browse/directory ← native dir picker (macOS) │ +│ POST /api/templates/refresh-all ← bulk regen, returns job IDs │ +│ (existing endpoints unchanged) │ +└──────────┬──────────────────────────────┬────────────────────────┘ + │ library call │ library call +┌──────────▼───────────┐ ┌──────────────▼────────────────────────┐ +│ attune-rag │ │ attune-author │ +│ DirectoryCorpus │ │ regen_template(path, corpus_root) │ +│ (unchanged) │ │ _regen → polish + summary + write │ +└──────────────────────┘ └───────────────────────────────────────┘ +``` + +--- + +### API changes + +#### New: `GET /api/config` + +``` +Response 200: +{ + "corpus_root": "/abs/path/to/templates" | null +} +``` + +Returns the currently loaded corpus root. `null` if no corpus is loaded. + +--- + +#### New: `POST /api/config` + +``` +Request: { "corpus_root": "/abs/path/to/templates" } +Response 200: +{ + "corpus_root": "/abs/path/to/templates", + "template_count": 26 +} +``` + +Validates the path exists, calls `load_corpus(corpus_root)`, returns the count. +Returns 422 if the path does not exist or is not a directory. + +--- + +#### New: `GET /api/browse/directory` + +Opens a native macOS Finder directory-picker dialog (via `tkinter.filedialog`) in a +thread, waits for the user to select a folder, and returns the chosen path. + +``` +Response 200: { "path": "/abs/path/to/templates" } +Response 204: {} ← user cancelled the dialog +``` + +This endpoint blocks until the dialog closes (typically < 5s). The frontend fires it +on Browse button click and populates the path input with the result. + +--- + +#### New: `POST /api/templates/refresh-all` + +Creates refresh jobs for every template whose current staleness is `"stale"` or +`"warning"`. Returns all job IDs immediately (202); the client connects to each WS +individually using the existing `WS /ws/refresh/{job_id}` endpoint. + +``` +Response 202: +{ + "jobs": [ + { "job_id": "uuid", "path": "concepts/auth.md", "status": "pending" }, + ... + ], + "total": 8 +} +``` + +--- + +### attune-author: `regen_template` signature change + +```python +def regen_template( + template_path: str, + corpus_root: str | Path | None = None, +) -> None: +``` + +Resolution order for `corpus_root`: +1. Explicit parameter +2. `ATTUNE_CORPUS_ROOT` environment variable +3. `.env` file in the current working directory (`python-dotenv` loads it at call time) +4. `RuntimeError` — cannot proceed + +`_regen` implementation flow: + +``` +1. _resolve_corpus_root(corpus_root) → Path +2. load .env (python-dotenv) if present +3. check ANTHROPIC_API_KEY — raise RuntimeError if missing +4. full_path = corpus_root / template_path — raise FileNotFoundError if missing +5. post = frontmatter.load(full_path) +6. client = anthropic.Anthropic() +7. polish_response = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=4096, + system=[{"type":"text","text":SYSTEM_POLISH,"cache_control":{"type":"ephemeral"}}], + messages=[{"role":"user","content": post.content}] + ) +8. improved = polish_response.content[0].text +9. summary_response = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=128, + messages=[{"role":"user","content": f"One sentence summary:\n\n{improved}"}] + ) +10. post.content = improved +11. post.metadata["summary"] = summary_response.content[0].text.strip() +12. atomic_write(full_path, frontmatter.dumps(post)) ← temp → rename +13. _patch_summaries_json(corpus_root, template_path, post.metadata["summary"]) +``` + +`atomic_write`: write to `full_path.with_suffix(".tmp")`, then `os.replace()`. +`_patch_summaries_json`: load `corpus_root/summaries.json` if present, update the +matching key, write back. No-op if file absent. + +--- + +### Data model changes + +**New: `ConfigState`** (in `attune_gui/config.py`, module-level singleton) + +```python +class ConfigState(BaseModel): + corpus_root: Path | None = None +``` + +Replaces the current implicit `_corpus` in `corpus_adapter.py`. Both modules share it. + +**No changes** to `TemplateEntry`, `JobState`, or `summaries.json` schema. + +--- + +### UI/UX + +#### Corpus setup screen (shown when `corpus_root is None`) + +``` +┌──────────────────────────────────────────────────────────────┐ +│ Attune Template Dashboard │ +│ │ +│ No corpus loaded. │ +│ ┌─────────────────────────────────────┐ [Browse] [Load] │ +│ │ /path/to/templates │ │ +│ └─────────────────────────────────────┘ │ +│ ← inline error if path invalid │ +└──────────────────────────────────────────────────────────────┘ +``` + +- **Browse** fires `GET /api/browse/directory`; populates the text input with returned path. + Disabled + shows spinner while the dialog is open. +- **Load** fires `POST /api/config`; on success replaces the setup screen with the + template list. On 422, shows the error message inline below the input. + +#### "Regen all stale" button (shown when `summary.stale > 0 || summary.warning > 0`) + +Placed in `DashboardSummaryBar`, after the counts: + +``` +● 3 stale · ● 5 warning · 26 total [Regen all stale] +``` + +Click flow: +1. Button fires `POST /api/templates/refresh-all`. +2. For each returned job, connects a WebSocket as normal. +3. Button shows "Regenerating 8…" with a running count of completed jobs. +4. Button re-enables when all jobs reach `done` or `error`. +5. Rows update badge-by-badge via existing `onDone` / `onError` logic. + +--- + +### Cross-layer impact + +| Order | Layer | Change | +|-------|-------|--------| +| 1 | attune-author | `regen_template` signature + `_regen` implementation; add `python-dotenv` dep | +| 2 | attune-gui sidecar | `config.py` module; 3 new routes; `_run_regen` passes corpus root; sidecar startup auto-loads from env | +| 3 | attune-gui UI | `CorpusSetup` component; `DashboardSummaryBar` gains Regen-all button; `App` checks corpus state on mount | + +attune-rag and attune-help: no changes. + +--- + +### Tradeoffs & alternatives + +| Option | Pros | Cons | Chosen? | +|--------|------|------|---------| +| tkinter dir picker via sidecar endpoint | Native macOS dialog, no Electron | Blocks sidecar thread; must run in thread pool; won't work headless | **Yes** | +| Web File System Access API | Pure browser, no sidecar change | Returns `FileSystemDirectoryHandle`, not a path string — useless for sidecar | No | +| Startup flag only (`--corpus`) | Simplest | No in-app reconfiguration; fails the UX requirement | No | +| Bulk regen via individual POST per badge | Reuses existing flow | N clicks, no single "regen all" affordance | No | +| Bulk regen `refresh-all` endpoint | Single click, server manages job creation | Slightly more server code | **Yes** | +| python-dotenv for API key | Dev-friendly; key lives in `.env` alongside code | Adds a dep to attune-author | **Yes** | diff --git a/docs/specs/regen-pipeline/requirements.md b/docs/specs/regen-pipeline/requirements.md new file mode 100644 index 0000000..f94af8d --- /dev/null +++ b/docs/specs/regen-pipeline/requirements.md @@ -0,0 +1,75 @@ +# Spec: Regen Pipeline + +> Extends the staleness-badge feature. Completes task 18 (smoke test) by implementing +> `attune_author.regen._regen` and wiring corpus root through the sidecar + UI. + +--- + +## Phase 1: Requirements + +**Status**: approved + +### Problem statement + +The staleness-badge feature (tasks 1–17) is complete. Task 18 requires `_regen` to +be callable end-to-end: clicking a stale badge in the dashboard must regenerate the +template file on disk and clear the badge to "fresh". Currently `_regen` raises +`NotImplementedError`, so the smoke test cannot run. + +Additionally, the sidecar has no way to know where templates live on disk, and the +dashboard has no UI for pointing it to the right corpus root. + +### Scope + +**In scope:** + +- Implement `attune_author.regen._regen(template_path, corpus_root)`: + - Load the existing template file + - Call Claude (Sonnet) to polish the Markdown content + - Call Claude (Haiku) to generate a fresh one-sentence summary + - Write the result back atomically (temp file → rename) + - Update the `summary` field in the file's YAML frontmatter + - Patch the matching entry in `summaries.json` (if present in corpus root) +- Add `corpus_root: str | Path | None = None` to `regen_template` public signature + (env var `ATTUNE_CORPUS_ROOT` as fallback) +- Sidecar: auto-load corpus from `ATTUNE_CORPUS_ROOT` at startup; expose + `GET /api/config` and `POST /api/config` to read/set the corpus root at runtime +- Sidecar WS handler: pass corpus root to `regen_template` +- Dashboard UI: if corpus root is not loaded on startup, show a text input + "Load" + button at the top of the template list; once set, templates appear normally +- Native directory picker (Browse button — text input + "Load" ) +- Bulk regen (regenning all stale templates at once) + +**Out of scope:** + +- Rollback history / undo +- Embedding freshness into a separate freshness-score field (staleness is mtime-based) +- Updating `summaries.json` entries for templates other than the one being regenned + +### User stories + +1. As a developer, I click a stale badge and the template is polished by Claude and + saved to disk, so my corpus stays current without manual editing. +2. As a developer running the dashboard for the first time, I can type my corpus root + path in a field and click "Load" so templates appear without needing to set an env var. +3. As a developer who sets `ATTUNE_CORPUS_ROOT` before starting the sidecar, templates + load immediately on first open — no setup screen. + +### Edge cases & open questions + +| Question / Edge case | Resolution | +|---|---| +| Template file has no existing `summary` in frontmatter | Create the field; don't fail | +| `summaries.json` does not exist in corpus root | Skip the update; don't create the file | +| Claude API key is in the projects .env files | Fail with `RuntimeError("ANTHROPIC_API_KEY not set")`; sidecar emits error frame | +| Template file is missing from corpus root | Fail with `FileNotFoundError`; sidecar emits error frame | +| Polish call returns content that drops YAML-looking lines | Replace only `post.content`; frontmatter metadata is never touched by the LLM | +| User types a non-existent path in the corpus root field | Sidecar returns 422; UI shows inline error | +| Atomic write: process killed between temp write and rename | Temp file left behind (acceptable); original intact | + +### Affected layers + +- [x] attune-rag — no changes +- [x] attune-gui (sidecar + UI) +- [x] attune-author +- [ ] attune-help — no changes diff --git a/docs/specs/regen-pipeline/tasks.md b/docs/specs/regen-pipeline/tasks.md new file mode 100644 index 0000000..9cc0f78 --- /dev/null +++ b/docs/specs/regen-pipeline/tasks.md @@ -0,0 +1,48 @@ +# Spec: Regen Pipeline — Tasks + +## Phase 3: Tasks + +**Status**: complete + +> Shipped: `attune-author regenerate` CLI lives in `src/attune_author/cli.py:507` (handler) with the parser registered around line 154. Core logic in `maintenance.py` and `maintenance_batch.py`. CHANGELOG documents the batch variant. + +### Implementation order + +| # | Task | Layer | Status | Notes | +|---|------|-------|--------|-------| +| 1 | Add `python-dotenv` to attune-author deps | attune-author | done | `pyproject.toml` required + ai extras | +| 2 | Add `_resolve_corpus_root(corpus_root)` helper | attune-author | done | param → `ATTUNE_CORPUS_ROOT` env → `.env` file → `RuntimeError` | +| 3 | Add `atomic_write(path, text)` helper | attune-author | done | Write to `path.with_suffix(".tmp")`, then `os.replace()` | +| 4 | Add `_patch_summaries_json(corpus_root, template_path, summary)` helper | attune-author | done | Load, update key, write back; no-op if file absent | +| 5 | Update `regen_template` signature to `(template_path, corpus_root=None)` | attune-author | done | Call `_resolve_corpus_root`; pass root to `_regen` | +| 6 | Implement `_regen(template_path, corpus_root)` | attune-author | done | Load .env, check API key, load template, Sonnet polish, Haiku summary, atomic write, patch summaries.json | +| 7 | Update `test_regen.py` for new signature | attune-author | done | Fix `assert_called_once_with` args | +| 8 | Add tests for `_resolve_corpus_root` | attune-author | done | param wins; falls back to env var; falls back to .env; raises when all missing | +| 9 | Add tests for `_regen` | attune-author | done | Mock `anthropic.Anthropic`; verify atomic write creates correct file; verify summaries.json patched; verify no-op when summaries.json absent | +| 10 | Add `attune_gui/config.py` — `ConfigState` singleton + `get_config()` / `set_corpus_root()` | attune-gui | done | Module-level `_config: ConfigState`; `set_corpus_root` calls `load_corpus` and stores root | +| 11 | Refactor `corpus_adapter.py` to use `config.get_config().corpus_root` | attune-gui | done | Remove module-level `_corpus`; delegate root tracking to config module | +| 12 | Add `attune_gui/routes/config.py` — `GET /api/config` and `POST /api/config` | attune-gui | done | POST validates path exists + is dir (422 otherwise); calls `set_corpus_root`; returns count | +| 13 | Add `GET /api/browse/directory` to config routes | attune-gui | done | osascript subprocess (replaced tkinter — crashes on macOS from non-main thread) | +| 14 | Wire config router into `main.py`; auto-load `ATTUNE_CORPUS_ROOT` on startup | attune-gui | done | Call `set_corpus_root` in FastAPI `lifespan` if env var is set | +| 15 | Update `_run_regen` in `ws.py` to pass `corpus_root` from config | attune-gui | done | `from attune_gui.config import get_config; root = get_config().corpus_root` | +| 16 | Add `POST /api/templates/refresh-all` to templates routes | attune-gui | done | Create jobs for all entries whose staleness is stale or warning; return 202 + job list | +| 17 | Add tests for `GET /api/config` and `POST /api/config` | attune-gui | done | GET returns null when unset; POST valid path returns count; POST missing path returns 422 | +| 18 | Add test for `GET /api/browse/directory` | attune-gui | done | Mocks osascript subprocess; asserts 200 with path and 204 on cancel | +| 19 | Add test for `POST /api/templates/refresh-all` | attune-gui | done | Mock `get_entries` with mixed staleness; assert only stale + warning get jobs; 202 response shape | +| 20 | Build `CorpusSetup` React component | attune-gui UI | done | Props: `onLoaded(corpusRoot)`; text input + Browse button + Load button; inline error on 422 | +| 21 | Update `App.jsx` — check `GET /api/config` on mount; show `CorpusSetup` when `corpus_root` is null | attune-gui UI | done | Replace template-list render with `` until corpus is set | +| 22 | Update `DashboardSummaryBar` — add "Regen all stale" button | attune-gui UI | done | Shown when `stale + warning > 0`; fires `POST /api/templates/refresh-all`; shows running count | +| 23 | Update `App.jsx` — handle bulk regen response (connect WS per job, update badge per `done`/`error`) | attune-gui UI | done | Reuse `handleDone` / `handleError`; track count of completed jobs in summary bar button | +| 24 | Manual smoke test | attune-gui UI | done | Corpus loaded, stale badge clicked → spinner → fresh, "Regen all stale" confirmed working end-to-end | + +### Testing strategy + +- **attune-author (tasks 7–9)**: pytest unit tests. Mock `anthropic.Anthropic` to avoid real API calls. Use `tmp_path` for file I/O assertions. +- **attune-gui sidecar (tasks 17–19)**: pytest + FastAPI `TestClient`. Mock `tkinter.filedialog`, mock `get_entries`, mock `set_corpus_root` where needed. +- **attune-gui UI (tasks 20–23)**: No automated test suite yet. Task 24 (manual smoke) is the v1 acceptance gate. + +### Rollback plan + +- **attune-author (tasks 1–9)**: New helpers + signature change with default param — fully backwards compatible. Revert commits. +- **attune-gui sidecar (tasks 10–19)**: `config.py` is new; routes are additive; `_run_regen` change is small. Reverting does not break the existing staleness-badge feature. +- **attune-gui UI (tasks 20–23)**: `CorpusSetup` is a new component; `App.jsx` falls back gracefully if `GET /api/config` 404s. Revert commits. diff --git a/pyproject.toml b/pyproject.toml index 7d58974..af71e3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,9 @@ plugin = [ rich = ["rich>=13.0.0"] dev = [ "pytest>=7.0", + "pytest-asyncio>=0.21,<2.0", # required for @pytest.mark.asyncio in tests/test_mcp_handlers_integration.py "pytest-cov>=4.0", + "syrupy>=4.0", "ruff>=0.4.0", "black>=24.0", "mcp>=0.9.0", @@ -102,9 +104,24 @@ select = ["E", "F", "W", "I", "UP", "BLE"] [tool.pytest.ini_options] testpaths = ["tests"] +addopts = "-ra" +asyncio_mode = "auto" # pytest-asyncio: auto-collect @pytest.mark.asyncio functions markers = [ "live: opt-in tests that hit the live Anthropic API. Skipped by default; require ANTHROPIC_API_KEY and any test-specific env flags.", ] [tool.coverage.run] source = ["attune_author"] +branch = true +omit = ["*/tests/*", "*/conftest.py"] + +[tool.coverage.report] +show_missing = true +skip_covered = false +fail_under = 85 +exclude_lines = [ + "pragma: no cover", + "raise NotImplementedError", + "if TYPE_CHECKING:", + "if __name__ == .__main__.:", +] diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..94468c0 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,78 @@ +# attune-author tests + +attune-author is the **reference layer for LLM mocking** in the attune +product family. The patterns documented below are what attune-rag, +attune-gui, and attune-help's `tests/README.md` files point at. + +## Running locally + +```bash +# Install dev deps (includes pytest-cov + syrupy) +pip install -e ".[dev]" + +# Full suite (~76s — slowest of the four layers) +pytest + +# With coverage (matches CI's ubuntu x py3.11 cell) +pytest --cov --cov-report=term-missing + +# Update golden snapshots after a deliberate template change +pytest tests/test_generated_templates_golden.py --snapshot-update + +# Opt in to live API tests (require ANTHROPIC_API_KEY) +pytest -m live +``` + +## LLM mocking standard + +Three autouse fixtures in `conftest.py` form the reference pattern: + +1. `_lenient_polish_by_default` — sets + `ATTUNE_AUTHOR_STRICT_POLISH=false` and strips `ANTHROPIC_API_KEY` + so a misconfigured test never reaches the network. +2. `_reset_rag_pipeline` — clears the module-level RagPipeline singleton + between tests, so a leaked patch from one test doesn't poison + subsequent tests. +3. Per-test patches use `unittest.mock.patch("anthropic.Anthropic")` at + the **import boundary**, never at the call site. + +The `live` marker gates real-API tests (`pytest -m live`), so they +never run by default. + +## Test layout + +| File | Purpose | +|------|---------| +| `test_generator.py` | `generate_feature_templates` happy paths, depth selection, frontmatter contract | +| `test_generated_templates_golden.py` | **NEW** syrupy snapshots of rendered concept/task/reference output (deterministic via timestamp + hash stripping) | +| `test_mcp_handlers_integration.py` | **NEW** AttuneAuthorHandlers full request → orchestration → response lifecycle | +| `test_parallel_polish_errors.py` | **NEW** `_parallel_polish` error injection (PolishError, TimeoutError, all-fail cascade) | +| `test_polish_*.py` | polish.py system-prompt selection, retry logic, source-summary assembly | +| `test_staleness*.py` | hash + frontmatter footer parsing; semantic-hash regen detection | +| `test_rag_hook.py` | `ground_polish_context` — singleton lifecycle, lazy import path | + +## Snapshot policy + +`test_generated_templates_golden.py` uses **syrupy** to pin generated +template output. Snapshots live in `tests/__snapshots__/`. To update +after a deliberate template change: + +```bash +pytest tests/test_generated_templates_golden.py --snapshot-update +``` + +Snapshot diffs surface in PR review like a normal text file. The +helper `_stable()` strips timestamps and `source_hash` values before +comparison so reruns stay deterministic without `--snapshot-update`. + +## What's tested vs. not + +After pass 1, the highest-value remaining gaps are: + +- `mcp/server.py` (~67%) — server lifecycle paths +- `cli.py` (~80%) — CLI error paths and help output +- Native-citations end-to-end (still gated on the weekly + `rag-gate.yml`, which spends real Anthropic credits — pass 2 will + audit + tighten that gate). + +Pass 2 will revisit thresholds and target areas above. diff --git a/tests/__snapshots__/test_generated_templates_golden.ambr b/tests/__snapshots__/test_generated_templates_golden.ambr new file mode 100644 index 0000000..2aef7e4 --- /dev/null +++ b/tests/__snapshots__/test_generated_templates_golden.ambr @@ -0,0 +1,126 @@ +# serializer version: 1 +# name: test_concept_template_matches_snapshot + ''' + --- + type: concept + name: auth-concept + feature: auth + depth: concept + generated_at: + source_hash: + status: generated + --- + + # Auth + + ## How it works + + Authentication and authorization. + + The main entry points are: + + - **`authenticate()`** — Authenticate a user. + + Under the hood, this feature spans 2 source + files covering: + + - Login handler. + + ## What connects to it + + This feature relates to: security. + + Other parts of the codebase call into + auth through these functions: + + | Function | Purpose | File | + |----------|---------|------| + | `authenticate()` | Authenticate a user. | `src/auth/login.py` | + + ''' +# --- +# name: test_reference_template_matches_snapshot + ''' + --- + type: reference + name: auth-reference + feature: auth + depth: reference + generated_at: + source_hash: + status: generated + --- + + # Auth reference + + + ## Functions + + | Function | Description | File | + |----------|-------------|------| + | `authenticate()` | Authenticate a user. | `src/auth/login.py` | + + + ## Source files + + - `src/auth/**` + + ## Tags + + `security` + + ''' +# --- +# name: test_task_template_matches_snapshot + ''' + --- + type: task + name: auth-task + feature: auth + depth: task + generated_at: + source_hash: + status: generated + --- + + # Work with auth + + Use auth when you need to authentication and authorization. + + ## Prerequisites + + - Access to the project source code + - Familiarity with the files under src/auth/** + + ## Steps + + 1. **Understand the current behavior.** + Read the entry points to see what auth + does today before making changes. + The primary functions are: + - `authenticate()` in `src/auth/login.py` — Authenticate a user. + 2. **Locate the right function to change.** + Each function has a single responsibility. Read its + docstring, parameters, and return type to confirm it + owns the behavior you need to modify. + + 3. **Make your change.** + Follow existing patterns in the file — naming + conventions, error handling style, and logging. + + 4. **Run the related tests.** + This catches regressions before they reach other + developers. Target with `pytest -k "auth"`. + + ## Key files + + - `src/auth/**` + + ## Common modifications + + Functions you are most likely to modify: + + - `authenticate()` in `src/auth/login.py` + + ''' +# --- diff --git a/tests/test_generated_templates_golden.py b/tests/test_generated_templates_golden.py new file mode 100644 index 0000000..0b19fd3 --- /dev/null +++ b/tests/test_generated_templates_golden.py @@ -0,0 +1,137 @@ +"""Golden snapshots for generated templates. + +Pins the rendered output of ``generate_feature_templates`` for the three +core depths (concept / task / reference). Schema drift in the Jinja2 +templates or in the frontmatter contract gets caught at PR time rather +than weeks later via the cross-repo workflow. + +Snapshots live in ``tests/__snapshots__/`` (syrupy default). Updating a +snapshot requires the deliberate ``pytest --snapshot-update`` run, so +unintentional output changes show up in PR review as snapshot diffs. + +The polish pass is disabled across the suite by the autouse +``_lenient_polish_by_default`` fixture in conftest.py, so the snapshots +capture the deterministic Jinja2-only output (no LLM calls). +""" + +from __future__ import annotations + +import re +from pathlib import Path + +import pytest + +from attune_author.generator import generate_feature_templates +from attune_author.manifest import Feature + +# syrupy's SnapshotAssertion is provided via the ``snapshot`` fixture +# pulled in automatically when syrupy is installed. + +# Volatile frontmatter fields: timestamps + content-hash. Stripped before +# comparing so snapshots stay deterministic across runs. +_TIMESTAMP_RE = re.compile( + r"(\w+_at: )\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:[+-]\d{2}:?\d{2}|Z)?" +) +_HASH_RE = re.compile(r"(source_hash: )[0-9a-f]+") + + +def _stable(text: str) -> str: + """Replace timestamps + hashes with placeholders for snapshot stability.""" + text = _TIMESTAMP_RE.sub(r"\1", text) + text = _HASH_RE.sub(r"\1", text) + return text + + +@pytest.fixture +def auth_feature() -> Feature: + return Feature( + name="auth", + description="Authentication and authorization", + files=["src/auth/**"], + tags=["security"], + ) + + +def _read_template(result, kind: str) -> str: + """Load a generated template by kind from a GenerationResult.""" + matches = [t for t in result.templates if t.depth == kind] + assert matches, ( + f"no {kind} template generated; got depths: " f"{[t.depth for t in result.templates]}" + ) + return matches[0].path.read_text(encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Per-depth golden snapshots +# --------------------------------------------------------------------------- + + +def test_concept_template_matches_snapshot( + help_dir: Path, project_root: Path, auth_feature: Feature, snapshot +) -> None: + """Concept template — high-level conceptual overview.""" + result = generate_feature_templates( + feature=auth_feature, + help_dir=help_dir, + project_root=project_root, + depths=["concept"], + use_rag=False, + ) + rendered = _stable(_read_template(result, "concept")) + assert rendered == snapshot + + +def test_task_template_matches_snapshot( + help_dir: Path, project_root: Path, auth_feature: Feature, snapshot +) -> None: + """Task template — step-by-step procedure.""" + result = generate_feature_templates( + feature=auth_feature, + help_dir=help_dir, + project_root=project_root, + depths=["task"], + use_rag=False, + ) + rendered = _stable(_read_template(result, "task")) + assert rendered == snapshot + + +def test_reference_template_matches_snapshot( + help_dir: Path, project_root: Path, auth_feature: Feature, snapshot +) -> None: + """Reference template — exhaustive API/option detail.""" + result = generate_feature_templates( + feature=auth_feature, + help_dir=help_dir, + project_root=project_root, + depths=["reference"], + use_rag=False, + ) + rendered = _stable(_read_template(result, "reference")) + assert rendered == snapshot + + +# --------------------------------------------------------------------------- +# Frontmatter schema invariants — independent of snapshot exact-match +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("kind", ["concept", "task", "reference"]) +def test_generated_templates_have_required_frontmatter_fields( + help_dir: Path, project_root: Path, auth_feature: Feature, kind: str +) -> None: + """All core templates must carry name + feature + source_hash, regardless + of the snapshot exact-match diff. Catches schema regressions even if a + snapshot is updated by mistake.""" + result = generate_feature_templates( + feature=auth_feature, + help_dir=help_dir, + project_root=project_root, + depths=[kind], + use_rag=False, + ) + rendered = _read_template(result, kind) + assert rendered.startswith("---\n"), "missing frontmatter open fence" + head, _, _ = rendered[4:].partition("\n---\n") + for required in ("name:", "feature: auth", "source_hash:"): + assert required in head, f"{kind} template frontmatter missing {required!r}; got:\n{head}" diff --git a/tests/test_mcp_handlers_integration.py b/tests/test_mcp_handlers_integration.py new file mode 100644 index 0000000..dfb22cc --- /dev/null +++ b/tests/test_mcp_handlers_integration.py @@ -0,0 +1,195 @@ +"""MCP handler integration tests. + +Targets the uncovered lines in ``src/attune_author/mcp/handlers.py`` +(audit reported ~29 uncovered) by exercising the handler methods through +their full request → orchestration → response lifecycle. The LLM call is +mocked at the boundary so no API tokens are spent; every other layer +(manifest loading, staleness checks, manifest saving) runs for real. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from attune_author.mcp.handlers import AttuneAuthorHandlers, _PathValidationError + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def handlers(tmp_path: Path) -> AttuneAuthorHandlers: + return AttuneAuthorHandlers(workspace_root=str(tmp_path)) + + +# --------------------------------------------------------------------------- +# _validated_paths — input contract +# --------------------------------------------------------------------------- + + +def test_validated_paths_accepts_in_bounds_path( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + sub = tmp_path / "ok" + sub.mkdir() + paths = handlers._validated_paths({"project_root": str(sub)}, {"project_root": "."}) + assert paths["project_root"] == sub.resolve() + + +def test_validated_paths_uses_default_when_missing( + handlers: AttuneAuthorHandlers, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Default ``"."`` resolves against cwd, validated against workspace_root.""" + monkeypatch.chdir(tmp_path) + paths = handlers._validated_paths({}, {"project_root": "."}) + assert paths["project_root"] == tmp_path.resolve() + + +def test_validated_paths_raises_on_traversal( + handlers: AttuneAuthorHandlers, +) -> None: + with pytest.raises(_PathValidationError): + handlers._validated_paths({"project_root": "../../etc"}, {"project_root": "."}) + + +# --------------------------------------------------------------------------- +# author_init — bootstrap path +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_author_init_returns_already_initialized_when_manifest_exists( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + help_dir = tmp_path / ".help" + help_dir.mkdir() + (help_dir / "features.yaml").write_text("version: 1\nfeatures: {}\n") + result = await handlers.author_init({"project_root": str(tmp_path)}) + assert result["success"] is True + assert result.get("already_initialized") is True + + +@pytest.mark.asyncio +async def test_author_init_returns_zero_when_nothing_discovered( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + """Empty project — scan_project returns no proposals.""" + with patch("attune_author.bootstrap.scan_project", return_value=[]): + result = await handlers.author_init({"project_root": str(tmp_path)}) + assert result["success"] is True + assert result["discovered"] == 0 + + +@pytest.mark.asyncio +async def test_author_init_writes_manifest_when_features_discovered( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + """When scan_project returns proposals, manifest is saved + counts surfaced.""" + fake_proposal = MagicMock( + name="auth", + description="Auth module", + confidence=0.9, + files=["src/auth/**"], + ) + fake_proposal.name = "auth" + fake_proposal.description = "Auth module" + + with ( + patch("attune_author.bootstrap.scan_project", return_value=[fake_proposal]), + patch( + "attune_author.bootstrap.proposals_to_manifest", + return_value=MagicMock(), + ), + patch( + "attune_author.manifest.save_manifest", + return_value=tmp_path / ".help" / "features.yaml", + ), + ): + result = await handlers.author_init({"project_root": str(tmp_path)}) + + assert result["success"] is True + assert result["discovered"] == 1 + assert "manifest_path" in result + assert isinstance(result["features"], list) + + +@pytest.mark.asyncio +async def test_author_init_returns_path_validation_error( + handlers: AttuneAuthorHandlers, +) -> None: + """Malformed project_root surfaces as a structured error envelope.""" + result = await handlers.author_init({"project_root": "../../etc"}) + assert result["success"] is False + assert "error" in result + + +# --------------------------------------------------------------------------- +# author_status — reports stale features +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_author_status_path_validation_error_is_structured( + handlers: AttuneAuthorHandlers, +) -> None: + result = await handlers.author_status({"help_dir": "../../"}) + assert result["success"] is False + + +@pytest.mark.asyncio +async def test_author_status_returns_envelope_when_manifest_present( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + """With a real (empty) manifest on disk, handler must return a structured + envelope rather than crashing — even if downstream maintenance returns + success=False, the envelope shape itself is the contract.""" + help_dir = tmp_path / ".help" + help_dir.mkdir() + (help_dir / "features.yaml").write_text("version: 1\nfeatures: {}\n") + result = await handlers.author_status( + {"help_dir": str(help_dir), "project_root": str(tmp_path)} + ) + assert isinstance(result, dict) + assert "success" in result + + +# --------------------------------------------------------------------------- +# author_generate — propagates LLM errors via envelope, doesn't crash +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_author_generate_path_validation_returns_structured_error( + handlers: AttuneAuthorHandlers, +) -> None: + result = await handlers.author_generate({"feature_name": "auth", "help_dir": "../../"}) + assert result["success"] is False + assert "error" in result + + +# --------------------------------------------------------------------------- +# author_lookup — read-only path +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_author_lookup_path_validation_returns_structured_error( + handlers: AttuneAuthorHandlers, +) -> None: + result = await handlers.author_lookup({"help_dir": "../../"}) + assert result["success"] is False + + +# --------------------------------------------------------------------------- +# Construction +# --------------------------------------------------------------------------- + + +def test_handlers_holds_workspace_root_reference( + handlers: AttuneAuthorHandlers, tmp_path: Path +) -> None: + assert handlers._workspace_root == str(tmp_path) diff --git a/tests/test_parallel_polish_errors.py b/tests/test_parallel_polish_errors.py new file mode 100644 index 0000000..3c2f6b7 --- /dev/null +++ b/tests/test_parallel_polish_errors.py @@ -0,0 +1,177 @@ +"""Error-path tests for ``_parallel_polish``. + +The audit identified the parallel-polish error paths as a real coverage +gap. ``_parallel_polish`` runs ``_maybe_polish`` across a ThreadPoolExecutor +and propagates the first exception via ``Future.result()``. These tests +inject failures into ``_maybe_polish`` and assert the cascade behavior +without spending API tokens. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest + +from attune_author.generator import _parallel_polish +from attune_author.manifest import Feature +from attune_author.polish import PolishError + + +@pytest.fixture +def feature() -> Feature: + return Feature( + name="auth", + description="Auth module", + files=["src/auth/**"], + tags=["security"], + ) + + +def _pending(tmp_path: Path, n: int) -> list[tuple[str, str, Path]]: + """Build a list of ``(depth, content, out_path)`` tuples for testing.""" + return [(f"depth_{i}", f"content_{i}", tmp_path / f"out_{i}.md") for i in range(n)] + + +# --------------------------------------------------------------------------- +# Happy path — establishes the baseline behavior +# --------------------------------------------------------------------------- + + +def test_parallel_polish_returns_dict_keyed_by_depth(tmp_path: Path, feature: Feature) -> None: + pending = _pending(tmp_path, 3) + with patch( + "attune_author.generator._maybe_polish", + side_effect=lambda content, *a, **kw: f"polished:{content}", + ): + results = _parallel_polish( + pending=pending, feature=feature, source_info=object(), use_rag=False + ) + assert set(results) == {"depth_0", "depth_1", "depth_2"} + for depth, (polished, out_path) in results.items(): + idx = depth.split("_")[1] + assert polished == f"polished:content_{idx}" + assert out_path == tmp_path / f"out_{idx}.md" + + +def test_parallel_polish_handles_single_pending_item(tmp_path: Path, feature: Feature) -> None: + pending = _pending(tmp_path, 1) + with patch( + "attune_author.generator._maybe_polish", + side_effect=lambda content, *a, **kw: content.upper(), + ): + results = _parallel_polish( + pending=pending, feature=feature, source_info=object(), use_rag=False + ) + assert len(results) == 1 + + +# --------------------------------------------------------------------------- +# Error injection — first failure propagates via Future.result() +# --------------------------------------------------------------------------- + + +def test_parallel_polish_propagates_polish_error(tmp_path: Path, feature: Feature) -> None: + """One worker raises PolishError → ``_parallel_polish`` propagates it.""" + pending = _pending(tmp_path, 3) + + def _flaky(content: str, *args, **kwargs) -> str: + if "content_1" in content: + raise PolishError("polish failed for depth_1") + return f"polished:{content}" + + with patch("attune_author.generator._maybe_polish", side_effect=_flaky): + with pytest.raises(PolishError, match="polish failed for depth_1"): + _parallel_polish( + pending=pending, + feature=feature, + source_info=object(), + use_rag=False, + ) + + +def test_parallel_polish_propagates_runtime_error(tmp_path: Path, feature: Feature) -> None: + """A non-PolishError exception (e.g. timeout) also bubbles up.""" + pending = _pending(tmp_path, 2) + with patch( + "attune_author.generator._maybe_polish", + side_effect=TimeoutError("anthropic API timeout"), + ): + with pytest.raises(TimeoutError, match="anthropic API timeout"): + _parallel_polish( + pending=pending, + feature=feature, + source_info=object(), + use_rag=False, + ) + + +def test_parallel_polish_all_workers_fail_propagates_one(tmp_path: Path, feature: Feature) -> None: + """When every worker fails, the first-completed error is what surfaces. + The exact exception identity isn't pinned — futures complete in + nondeterministic order — but *some* exception must propagate.""" + pending = _pending(tmp_path, 4) + call_counter = {"n": 0} + + def _all_fail(content: str, *args, **kwargs) -> str: + call_counter["n"] += 1 + raise RuntimeError(f"failed call #{call_counter['n']}") + + with patch("attune_author.generator._maybe_polish", side_effect=_all_fail): + with pytest.raises(RuntimeError, match="failed call"): + _parallel_polish( + pending=pending, + feature=feature, + source_info=object(), + use_rag=False, + ) + + +# --------------------------------------------------------------------------- +# Workers / concurrency +# --------------------------------------------------------------------------- + + +def test_parallel_polish_uses_at_most_max_workers(tmp_path: Path, feature: Feature) -> None: + """With more pending items than workers, all items still complete.""" + pending = _pending(tmp_path, 8) + with patch( + "attune_author.generator._maybe_polish", + side_effect=lambda content, *a, **kw: content, + ): + results = _parallel_polish( + pending=pending, feature=feature, source_info=object(), use_rag=False + ) + assert len(results) == 8 + + +def test_parallel_polish_passes_use_rag_through(tmp_path: Path, feature: Feature) -> None: + """use_rag must reach _maybe_polish unchanged so RAG grounding can be opted out.""" + pending = _pending(tmp_path, 1) + seen: list[bool] = [] + + def _capture(content, *args, use_rag: bool = False, **kwargs) -> str: + seen.append(use_rag) + return content + + with patch("attune_author.generator._maybe_polish", side_effect=_capture): + _parallel_polish(pending=pending, feature=feature, source_info=object(), use_rag=True) + assert seen == [True] + + +def test_parallel_polish_passes_template_type_through(tmp_path: Path, feature: Feature) -> None: + """Each worker invokes _maybe_polish with template_type=.""" + pending = [ + ("concept", "c", tmp_path / "c.md"), + ("reference", "r", tmp_path / "r.md"), + ] + seen: dict[str, str] = {} + + def _capture(content, *args, template_type: str = "", **kwargs) -> str: + seen[content] = template_type + return content + + with patch("attune_author.generator._maybe_polish", side_effect=_capture): + _parallel_polish(pending=pending, feature=feature, source_info=object(), use_rag=False) + assert seen == {"c": "concept", "r": "reference"}