From 4abba7e81d660087c28b0f3ad479b1a28e5ae3a0 Mon Sep 17 00:00:00 2001 From: GeneAI Date: Mon, 4 May 2026 15:42:51 -0400 Subject: [PATCH 1/2] fix(prefixes): add `gui` mapping for `guide` template kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The schema in attune-rag has carried `guide` in the type enum since v1, but `attune-help`'s prefix maps had no entry for it. A new `type: guide` template would have been silently dropped from cross-linking (`_find_template_file` would return None for any `gui-*` id; `_COMPOUND_PREFIXES` would not match a `gui-` template during retrieval ranking). Found while resolving an open question in the template-corpus-tidy spec — the spec asked which prefix `quickstart` should use, and the answer was already-correct (`qui`). The investigation surfaced the `guide` gap as a sibling problem. Changes: - `templates._PREFIX_MAP` gains `"gui": "guides"`. Map reordered alphabetically; behaviour unchanged. - `progression._COMPOUND_PREFIXES` gains `"gui-"`. Order tweaked so the base prefixes are alphabetical (compounds for ref/tas/con stay first per the longest-first contract documented in the comment above the list). - New `tests/test_template_prefixes.py` (4 tests) pins both maps against the expected enum and explicitly asserts `qui` and `gui`. Future schema-enum additions will fail this test until both maps are updated, surfacing the kind of silent drop this commit fixes. The schema enum lives in attune-rag and the prefix maps live here. The new test mirrors the enum as a literal set with a comment pointing at the schema; introducing a runtime dep on attune-rag would violate ADR-002 (attune-help ships with zero required deps beyond python-frontmatter). 250 attune-help tests pass; ruff clean. Co-Authored-By: Claude Opus 4.7 --- src/attune_help/progression.py | 8 ++- src/attune_help/templates.py | 18 ++++-- tests/test_template_prefixes.py | 97 +++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 tests/test_template_prefixes.py diff --git a/src/attune_help/progression.py b/src/attune_help/progression.py index 0e0b704..fdb7ec6 100644 --- a/src/attune_help/progression.py +++ b/src/attune_help/progression.py @@ -74,6 +74,7 @@ def _record_topic( # Ordered longest-first so compound prefixes match before # their shorter components (e.g. "ref-skill-" before "ref-"). +# Base prefixes here must stay in sync with `templates._PREFIX_MAP`. _COMPOUND_PREFIXES = [ "ref-skill-", "ref-tool-", @@ -83,14 +84,15 @@ def _record_topic( "tas-", "con-tool-", "con-", + "com-", "err-", - "war-", - "tip-", "faq-", + "gui-", "not-", "qui-", + "tip-", "tro-", - "com-", + "war-", ] diff --git a/src/attune_help/templates.py b/src/attune_help/templates.py index 95f8867..20ad46c 100644 --- a/src/attune_help/templates.py +++ b/src/attune_help/templates.py @@ -78,18 +78,24 @@ class PopulatedTemplate: # Template file resolution # ------------------------------------------------------------------ +# Maps the 3-letter `{prefix}-{slug}` template-id prefix to the +# directory the matching template lives in. Mirrors the enum in +# `attune-rag` at `src/attune_rag/editor/template_schema.json`. Adding +# a kind to that enum implies adding both an entry here AND the +# corresponding subdirectory under `templates/`. _PREFIX_MAP = { + "com": "comparisons", + "con": "concepts", "err": "errors", - "war": "warnings", - "tip": "tips", - "ref": "references", - "tas": "tasks", "faq": "faqs", + "gui": "guides", "not": "notes", "qui": "quickstarts", - "con": "concepts", + "ref": "references", + "tas": "tasks", + "tip": "tips", "tro": "troubleshooting", - "com": "comparisons", + "war": "warnings", } diff --git a/tests/test_template_prefixes.py b/tests/test_template_prefixes.py new file mode 100644 index 0000000..52c14ef --- /dev/null +++ b/tests/test_template_prefixes.py @@ -0,0 +1,97 @@ +"""Tests for the template-id prefix maps. + +attune-help resolves a `{prefix}-{slug}` template id to a file via +``templates._PREFIX_MAP`` (used by ``_find_template_file``) and +``progression._COMPOUND_PREFIXES`` (used by retrieval ranking). + +These two maps must stay in sync with each other AND with the +template-type enum in ``attune-rag/src/attune_rag/editor/template_schema.json`` +— the schema is the source of truth for which template kinds exist. +The schema can't be imported here without making attune-rag a +dependency (which would violate ADR-002), so we hard-code the +expected set with a comment pointing at the source. Update this list +in lockstep with the schema enum. +""" + +from __future__ import annotations + +from attune_help.progression import _COMPOUND_PREFIXES +from attune_help.templates import _PREFIX_MAP + +# Mirror of `attune-rag/src/attune_rag/editor/template_schema.json` +# `properties.type.enum`. If the enum changes there, change this list +# AND add the matching prefix below. +_EXPECTED_KINDS = { + "comparison", + "concept", + "error", + "faq", + "guide", + "note", + "quickstart", + "reference", + "task", + "tip", + "troubleshooting", + "warning", +} + +# Expected mapping: kind → (prefix, directory). Only the prefix and +# directory live in code; the kind is what the schema declares. +_EXPECTED_PREFIXES: dict[str, tuple[str, str]] = { + "comparison": ("com", "comparisons"), + "concept": ("con", "concepts"), + "error": ("err", "errors"), + "faq": ("faq", "faqs"), + "guide": ("gui", "guides"), + "note": ("not", "notes"), + "quickstart": ("qui", "quickstarts"), + "reference": ("ref", "references"), + "task": ("tas", "tasks"), + "tip": ("tip", "tips"), + "troubleshooting": ("tro", "troubleshooting"), + "warning": ("war", "warnings"), +} + + +def test_prefix_map_covers_every_schema_kind() -> None: + """Every kind in the schema enum has a prefix → dir mapping.""" + assert set(_EXPECTED_PREFIXES.keys()) == _EXPECTED_KINDS + + actual = dict(_PREFIX_MAP) + expected = {prefix: directory for prefix, directory in _EXPECTED_PREFIXES.values()} + assert actual == expected, ( + f"_PREFIX_MAP drift — missing prefixes={set(expected) - set(actual)}, " + f"extra prefixes={set(actual) - set(expected)}" + ) + + +def test_compound_prefixes_include_every_base_prefix() -> None: + """Every prefix in _PREFIX_MAP is reachable in retrieval ranking. + + `_COMPOUND_PREFIXES` is ordered longest-first; we only check + presence, not exact order — order is locked by other tests in + test_progression. + """ + base_prefixes = {f"{p}-" for p in _PREFIX_MAP} + compound_set = set(_COMPOUND_PREFIXES) + missing = base_prefixes - compound_set + assert not missing, f"_COMPOUND_PREFIXES is missing base prefixes: {missing}" + + +def test_quickstart_resolves_to_qui_prefix() -> None: + """Pin: ``quickstart`` → ``qui-`` (not ``qst-`` or anything else).""" + assert _PREFIX_MAP.get("qui") == "quickstarts" + assert "qui-" in _COMPOUND_PREFIXES + + +def test_guide_resolves_to_gui_prefix() -> None: + """Pin: ``guide`` → ``gui-``. + + Added when the schema enum was reconciled with the corpus + (attune-rag commit 06ebcec). No template uses ``type: guide`` yet + in the corpus, but the cross-linker would silently drop any new + one without this mapping. + """ + assert _PREFIX_MAP.get("gui") == "guides" + assert "gui-" in _COMPOUND_PREFIXES From 85b4b3bc127a009d73e01a3be5a213462289d946 Mon Sep 17 00:00:00 2001 From: GeneAI Date: Thu, 7 May 2026 21:11:21 -0400 Subject: [PATCH 2/2] perf(templates): mtime-aware caches for cross_links + parsed templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes from code-review 2026-05-07: 1. ``_load_cross_links`` cached cross_links.json by directory path alone — no mtime check. Long-running processes served stale data after a regen until callers remembered to invoke ``invalidate_cross_links_cache``. Switch the cache key to include ``st_mtime_ns`` so file rewrites invalidate themselves. 2. ``_parse_template_file`` had no cache at all. Every ``populate()`` call re-read and re-parsed the .md (frontmatter + section split). Add a module-level ``(path, mtime_ns) -> parsed_dict`` cache with a thread lock. Same self-invalidating mtime model as #1. Also expose ``invalidate_template_cache()`` for symmetry with the existing ``invalidate_cross_links_cache()`` — useful for tests and hard-reset scenarios; not needed for normal file edits. Tests: 8 new in test_cross_links_cache.py covering first-call load, mtime-stable cache hit, mtime-change reload, missing-file no-cache, manual invalidation. Tests use ``os.utime`` to bump mtime explicitly because macOS HFS+ stores 1s-resolution mtimes and naive rewrites in the same second can otherwise look unchanged. 258 passed. Co-Authored-By: Claude Opus 4.7 --- src/attune_help/templates.py | 81 ++++++++++++--- tests/test_cross_links_cache.py | 168 ++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+), 13 deletions(-) create mode 100644 tests/test_cross_links_cache.py diff --git a/src/attune_help/templates.py b/src/attune_help/templates.py index 20ad46c..0b5d1a3 100644 --- a/src/attune_help/templates.py +++ b/src/attune_help/templates.py @@ -20,6 +20,13 @@ _CROSS_LINKS_CACHE: dict[str, dict[str, Any]] = {} _CROSS_LINKS_LOCK = threading.Lock() +# Parsed-template cache: keyed by ``(str(filepath), mtime_ns)`` so an +# edit-and-reload picks up the new file automatically. Without this, +# every ``populate()`` call re-parsed the markdown + frontmatter, which +# made template lookups O(file-size) per request in long-lived processes. +_PARSED_TEMPLATE_CACHE: dict[tuple[str, int], dict[str, Any]] = {} +_PARSED_TEMPLATE_LOCK = threading.Lock() + def invalidate_cross_links_cache() -> None: """Clear the cross-links cache so the next lookup re-reads disk. @@ -31,6 +38,17 @@ def invalidate_cross_links_cache() -> None: _CROSS_LINKS_CACHE.clear() +def invalidate_template_cache() -> None: + """Clear the parsed-template cache. + + Normally not needed — the cache key includes mtime so file edits + invalidate themselves. Useful for tests that mock filesystems or + for processes that need a hard reset across all templates. + """ + with _PARSED_TEMPLATE_LOCK: + _PARSED_TEMPLATE_CACHE.clear() + + @dataclass(frozen=True) class TemplateContext: """Runtime parameters for template population.""" @@ -138,12 +156,32 @@ def _find_template_file( def _parse_template_file(filepath: Path) -> dict[str, Any]: """Parse a template file into structured data. + Cached by ``(filepath, mtime_ns)`` so repeated ``populate()`` calls + on the same template don't re-read and re-parse the file. The mtime + component means filesystem edits invalidate the cache automatically; + no explicit ``invalidate_template_cache()`` call is needed in normal + operation. + Args: filepath: Path to the template .md file. Returns: Dict with frontmatter fields and parsed sections. """ + try: + mtime_ns = filepath.stat().st_mtime_ns + except OSError: + # Path no longer readable; fall through to the parser so the + # caller sees the same exception they would have seen before. + mtime_ns = -1 + + cache_key = (str(filepath), mtime_ns) + if mtime_ns != -1: + with _PARSED_TEMPLATE_LOCK: + cached = _PARSED_TEMPLATE_CACHE.get(cache_key) + if cached is not None: + return cached + import frontmatter as fm post = fm.load(str(filepath)) @@ -176,7 +214,7 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]: else: tags = list(tags_raw) - return { + parsed: dict[str, Any] = { "type": post.get("type", ""), "subtype": post.get("subtype", ""), "name": post.get("name", filepath.stem), @@ -188,6 +226,10 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]: "sections": sections, "body": post.content, } + if mtime_ns != -1: + with _PARSED_TEMPLATE_LOCK: + _PARSED_TEMPLATE_CACHE[cache_key] = parsed + return parsed # ------------------------------------------------------------------ @@ -196,7 +238,14 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]: def _load_cross_links(generated_dir: Path) -> dict[str, Any]: - """Load cross-links index with caching. + """Load cross-links index with mtime-aware caching. + + Cached entries store the file's mtime alongside the parsed data. + When ``cross_links.json`` is rewritten (e.g. by a regen run in a + long-lived process), its mtime changes and the next call reloads + automatically. Without this, callers had to remember to invoke + :func:`invalidate_cross_links_cache` after every regen, and + forgetting it served stale results until process restart. Args: generated_dir: Path to generated/ directory. @@ -205,26 +254,32 @@ def _load_cross_links(generated_dir: Path) -> dict[str, Any]: Parsed cross_links.json, or empty dict. """ cache_key = str(generated_dir) - with _CROSS_LINKS_LOCK: - if cache_key in _CROSS_LINKS_CACHE: - return _CROSS_LINKS_CACHE[cache_key] + index_path = generated_dir / "cross_links.json" + try: + mtime_ns = index_path.stat().st_mtime_ns + except FileNotFoundError: + # File doesn't exist — return empty without caching so a later + # regen that creates the file is picked up on the next call. + return {} + except OSError as e: + logger.warning("Cannot stat cross_links.json: %s", e) + return {} - index_path = generated_dir / "cross_links.json" - if not index_path.exists(): - return {} + with _CROSS_LINKS_LOCK: + cached = _CROSS_LINKS_CACHE.get(cache_key) + if cached is not None and cached.get("__mtime_ns") == mtime_ns: + return cached["__data"] try: - data = json.loads( - index_path.read_text(encoding="utf-8"), - ) - _CROSS_LINKS_CACHE[cache_key] = data - return data + data = json.loads(index_path.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError) as e: logger.warning( "Failed to load cross_links.json: %s", e, ) return {} + _CROSS_LINKS_CACHE[cache_key] = {"__mtime_ns": mtime_ns, "__data": data} + return data def _resolve_related( diff --git a/tests/test_cross_links_cache.py b/tests/test_cross_links_cache.py new file mode 100644 index 0000000..2ce8045 --- /dev/null +++ b/tests/test_cross_links_cache.py @@ -0,0 +1,168 @@ +"""mtime-aware caching of cross_links.json. + +Verifies: + +- First call hits disk and caches. +- Subsequent calls with unchanged mtime hit cache. +- Rewriting the file (mtime change) triggers a reload. +- Missing file returns empty without poisoning the cache. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest + +from attune_help.templates import ( + _CROSS_LINKS_CACHE, + _load_cross_links, + invalidate_cross_links_cache, +) + + +@pytest.fixture(autouse=True) +def _clear_cache() -> None: + """Ensure tests don't share cache state.""" + invalidate_cross_links_cache() + yield + invalidate_cross_links_cache() + + +def _bump_mtime(path: Path) -> None: + """Force mtime forward by at least one nanosecond. + + macOS HFS+ stores 1s-resolution mtimes, so naive rewrites in the + same second don't change ``st_mtime_ns``. This explicitly sets a + later mtime so the cache invalidation path is exercised. + """ + now_ns = path.stat().st_mtime_ns + later = (now_ns + 1_000_000_000, now_ns + 1_000_000_000) + os.utime(path, ns=later) + + +def test_first_call_loads_from_disk(tmp_path: Path) -> None: + (tmp_path / "cross_links.json").write_text(json.dumps({"foo": ["bar"]})) + result = _load_cross_links(tmp_path) + assert result == {"foo": ["bar"]} + + +def test_second_call_with_unchanged_file_hits_cache(tmp_path: Path, monkeypatch) -> None: + """When mtime hasn't changed, the second call must not re-read the file.""" + cl = tmp_path / "cross_links.json" + cl.write_text(json.dumps({"foo": ["bar"]})) + _load_cross_links(tmp_path) # prime the cache + + read_calls = 0 + real_read = Path.read_text + + def counting_read(self, *args, **kwargs): + nonlocal read_calls + if self == cl: + read_calls += 1 + return real_read(self, *args, **kwargs) + + monkeypatch.setattr(Path, "read_text", counting_read) + _load_cross_links(tmp_path) + _load_cross_links(tmp_path) + assert read_calls == 0, "cache hit should not re-read the file" + + +def test_mtime_change_triggers_reload(tmp_path: Path) -> None: + cl = tmp_path / "cross_links.json" + cl.write_text(json.dumps({"v": 1})) + first = _load_cross_links(tmp_path) + assert first == {"v": 1} + + cl.write_text(json.dumps({"v": 2})) + _bump_mtime(cl) + second = _load_cross_links(tmp_path) + assert second == {"v": 2} + + +def test_missing_file_returns_empty_without_caching(tmp_path: Path) -> None: + """Missing file returns empty AND doesn't seed the cache; a later + creation of the file is picked up on the next call. + """ + assert _load_cross_links(tmp_path) == {} + assert str(tmp_path) not in _CROSS_LINKS_CACHE + + # Now create the file — the next call must pick it up + (tmp_path / "cross_links.json").write_text(json.dumps({"new": ["entry"]})) + assert _load_cross_links(tmp_path) == {"new": ["entry"]} + + +def test_invalidate_clears_cache(tmp_path: Path) -> None: + cl = tmp_path / "cross_links.json" + cl.write_text(json.dumps({"x": 1})) + _load_cross_links(tmp_path) + assert str(tmp_path) in _CROSS_LINKS_CACHE + invalidate_cross_links_cache() + assert str(tmp_path) not in _CROSS_LINKS_CACHE + + +# --------------------------------------------------------------------------- +# Parsed-template (path, mtime) cache +# --------------------------------------------------------------------------- + + +def test_parsed_template_cached_by_mtime(tmp_path: Path, monkeypatch) -> None: + """``_parse_template_file`` must reuse cached output when mtime is stable.""" + from attune_help import templates + + templates.invalidate_template_cache() + + f = tmp_path / "concept.md" + f.write_text("---\ntype: concept\n---\n# Title\n\n## Section\n\nbody\n") + + parse_calls = 0 + real_load = None + + import frontmatter as fm + + real_load = fm.load + + def counting_load(*args, **kwargs): + nonlocal parse_calls + parse_calls += 1 + return real_load(*args, **kwargs) + + monkeypatch.setattr(fm, "load", counting_load) + + a = templates._parse_template_file(f) + b = templates._parse_template_file(f) + c = templates._parse_template_file(f) + + assert a == b == c + assert parse_calls == 1, "second/third call should hit cache, not re-parse" + + +def test_parsed_template_reloads_after_mtime_change(tmp_path: Path) -> None: + from attune_help import templates + + templates.invalidate_template_cache() + f = tmp_path / "concept.md" + f.write_text("---\ntype: concept\n---\n# v1\n") + + first = templates._parse_template_file(f) + assert first["title"] == "v1" + + f.write_text("---\ntype: concept\n---\n# v2\n") + _bump_mtime(f) + + second = templates._parse_template_file(f) + assert second["title"] == "v2" + + +def test_invalidate_template_cache_clears_entries(tmp_path: Path) -> None: + from attune_help import templates + + f = tmp_path / "x.md" + f.write_text("---\ntype: concept\n---\n# A\n") + templates._parse_template_file(f) + assert any(str(f) == k[0] for k in templates._PARSED_TEMPLATE_CACHE) + + templates.invalidate_template_cache() + assert templates._PARSED_TEMPLATE_CACHE == {}