Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/attune_help/progression.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _record_topic(

# Ordered longest-first so compound prefixes match before
# their shorter components (e.g. "ref-skill-" before "ref-").
# Base prefixes here must stay in sync with `templates._PREFIX_MAP`.
_COMPOUND_PREFIXES = [
"ref-skill-",
"ref-tool-",
Expand All @@ -83,14 +84,15 @@ def _record_topic(
"tas-",
"con-tool-",
"con-",
"com-",
"err-",
"war-",
"tip-",
"faq-",
"gui-",
"not-",
"qui-",
"tip-",
"tro-",
"com-",
"war-",
]


Expand Down
99 changes: 80 additions & 19 deletions src/attune_help/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@
_CROSS_LINKS_CACHE: dict[str, dict[str, Any]] = {}
_CROSS_LINKS_LOCK = threading.Lock()

# Parsed-template cache: keyed by ``(str(filepath), mtime_ns)`` so an
# edit-and-reload picks up the new file automatically. Without this,
# every ``populate()`` call re-parsed the markdown + frontmatter, which
# made template lookups O(file-size) per request in long-lived processes.
_PARSED_TEMPLATE_CACHE: dict[tuple[str, int], dict[str, Any]] = {}
_PARSED_TEMPLATE_LOCK = threading.Lock()


def invalidate_cross_links_cache() -> None:
"""Clear the cross-links cache so the next lookup re-reads disk.
Expand All @@ -31,6 +38,17 @@ def invalidate_cross_links_cache() -> None:
_CROSS_LINKS_CACHE.clear()


def invalidate_template_cache() -> None:
"""Clear the parsed-template cache.

Normally not needed — the cache key includes mtime so file edits
invalidate themselves. Useful for tests that mock filesystems or
for processes that need a hard reset across all templates.
"""
with _PARSED_TEMPLATE_LOCK:
_PARSED_TEMPLATE_CACHE.clear()


@dataclass(frozen=True)
class TemplateContext:
"""Runtime parameters for template population."""
Expand Down Expand Up @@ -78,18 +96,24 @@ class PopulatedTemplate:
# Template file resolution
# ------------------------------------------------------------------

# Maps the 3-letter `{prefix}-{slug}` template-id prefix to the
# directory the matching template lives in. Mirrors the enum in
# `attune-rag` at `src/attune_rag/editor/template_schema.json`. Adding
# a kind to that enum implies adding both an entry here AND the
# corresponding subdirectory under `templates/`.
_PREFIX_MAP = {
"com": "comparisons",
"con": "concepts",
"err": "errors",
"war": "warnings",
"tip": "tips",
"ref": "references",
"tas": "tasks",
"faq": "faqs",
"gui": "guides",
"not": "notes",
"qui": "quickstarts",
"con": "concepts",
"ref": "references",
"tas": "tasks",
"tip": "tips",
"tro": "troubleshooting",
"com": "comparisons",
"war": "warnings",
}


Expand Down Expand Up @@ -132,12 +156,32 @@ def _find_template_file(
def _parse_template_file(filepath: Path) -> dict[str, Any]:
"""Parse a template file into structured data.

Cached by ``(filepath, mtime_ns)`` so repeated ``populate()`` calls
on the same template don't re-read and re-parse the file. The mtime
component means filesystem edits invalidate the cache automatically;
no explicit ``invalidate_template_cache()`` call is needed in normal
operation.

Args:
filepath: Path to the template .md file.

Returns:
Dict with frontmatter fields and parsed sections.
"""
try:
mtime_ns = filepath.stat().st_mtime_ns
except OSError:
# Path no longer readable; fall through to the parser so the
# caller sees the same exception they would have seen before.
mtime_ns = -1

cache_key = (str(filepath), mtime_ns)
if mtime_ns != -1:
with _PARSED_TEMPLATE_LOCK:
cached = _PARSED_TEMPLATE_CACHE.get(cache_key)
if cached is not None:
return cached

import frontmatter as fm

post = fm.load(str(filepath))
Expand Down Expand Up @@ -170,7 +214,7 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]:
else:
tags = list(tags_raw)

return {
parsed: dict[str, Any] = {
"type": post.get("type", ""),
"subtype": post.get("subtype", ""),
"name": post.get("name", filepath.stem),
Expand All @@ -182,6 +226,10 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]:
"sections": sections,
"body": post.content,
}
if mtime_ns != -1:
with _PARSED_TEMPLATE_LOCK:
_PARSED_TEMPLATE_CACHE[cache_key] = parsed
return parsed


# ------------------------------------------------------------------
Expand All @@ -190,7 +238,14 @@ def _parse_template_file(filepath: Path) -> dict[str, Any]:


def _load_cross_links(generated_dir: Path) -> dict[str, Any]:
"""Load cross-links index with caching.
"""Load cross-links index with mtime-aware caching.

Cached entries store the file's mtime alongside the parsed data.
When ``cross_links.json`` is rewritten (e.g. by a regen run in a
long-lived process), its mtime changes and the next call reloads
automatically. Without this, callers had to remember to invoke
:func:`invalidate_cross_links_cache` after every regen, and
forgetting it served stale results until process restart.

Args:
generated_dir: Path to generated/ directory.
Expand All @@ -199,26 +254,32 @@ def _load_cross_links(generated_dir: Path) -> dict[str, Any]:
Parsed cross_links.json, or empty dict.
"""
cache_key = str(generated_dir)
with _CROSS_LINKS_LOCK:
if cache_key in _CROSS_LINKS_CACHE:
return _CROSS_LINKS_CACHE[cache_key]
index_path = generated_dir / "cross_links.json"
try:
mtime_ns = index_path.stat().st_mtime_ns
except FileNotFoundError:
# File doesn't exist — return empty without caching so a later
# regen that creates the file is picked up on the next call.
return {}
except OSError as e:
logger.warning("Cannot stat cross_links.json: %s", e)
return {}

index_path = generated_dir / "cross_links.json"
if not index_path.exists():
return {}
with _CROSS_LINKS_LOCK:
cached = _CROSS_LINKS_CACHE.get(cache_key)
if cached is not None and cached.get("__mtime_ns") == mtime_ns:
return cached["__data"]

try:
data = json.loads(
index_path.read_text(encoding="utf-8"),
)
_CROSS_LINKS_CACHE[cache_key] = data
return data
data = json.loads(index_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError) as e:
logger.warning(
"Failed to load cross_links.json: %s",
e,
)
return {}
_CROSS_LINKS_CACHE[cache_key] = {"__mtime_ns": mtime_ns, "__data": data}
return data


def _resolve_related(
Expand Down
168 changes: 168 additions & 0 deletions tests/test_cross_links_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""mtime-aware caching of cross_links.json.

Verifies:

- First call hits disk and caches.
- Subsequent calls with unchanged mtime hit cache.
- Rewriting the file (mtime change) triggers a reload.
- Missing file returns empty without poisoning the cache.
"""

from __future__ import annotations

import json
import os
from pathlib import Path

import pytest

from attune_help.templates import (
_CROSS_LINKS_CACHE,
_load_cross_links,
invalidate_cross_links_cache,
)


@pytest.fixture(autouse=True)
def _clear_cache() -> None:
"""Ensure tests don't share cache state."""
invalidate_cross_links_cache()
yield
invalidate_cross_links_cache()


def _bump_mtime(path: Path) -> None:
"""Force mtime forward by at least one nanosecond.

macOS HFS+ stores 1s-resolution mtimes, so naive rewrites in the
same second don't change ``st_mtime_ns``. This explicitly sets a
later mtime so the cache invalidation path is exercised.
"""
now_ns = path.stat().st_mtime_ns
later = (now_ns + 1_000_000_000, now_ns + 1_000_000_000)
os.utime(path, ns=later)


def test_first_call_loads_from_disk(tmp_path: Path) -> None:
(tmp_path / "cross_links.json").write_text(json.dumps({"foo": ["bar"]}))
result = _load_cross_links(tmp_path)
assert result == {"foo": ["bar"]}


def test_second_call_with_unchanged_file_hits_cache(tmp_path: Path, monkeypatch) -> None:
"""When mtime hasn't changed, the second call must not re-read the file."""
cl = tmp_path / "cross_links.json"
cl.write_text(json.dumps({"foo": ["bar"]}))
_load_cross_links(tmp_path) # prime the cache

read_calls = 0
real_read = Path.read_text

def counting_read(self, *args, **kwargs):
nonlocal read_calls
if self == cl:
read_calls += 1
return real_read(self, *args, **kwargs)

monkeypatch.setattr(Path, "read_text", counting_read)
_load_cross_links(tmp_path)
_load_cross_links(tmp_path)
assert read_calls == 0, "cache hit should not re-read the file"


def test_mtime_change_triggers_reload(tmp_path: Path) -> None:
cl = tmp_path / "cross_links.json"
cl.write_text(json.dumps({"v": 1}))
first = _load_cross_links(tmp_path)
assert first == {"v": 1}

cl.write_text(json.dumps({"v": 2}))
_bump_mtime(cl)
second = _load_cross_links(tmp_path)
assert second == {"v": 2}


def test_missing_file_returns_empty_without_caching(tmp_path: Path) -> None:
"""Missing file returns empty AND doesn't seed the cache; a later
creation of the file is picked up on the next call.
"""
assert _load_cross_links(tmp_path) == {}
assert str(tmp_path) not in _CROSS_LINKS_CACHE

# Now create the file — the next call must pick it up
(tmp_path / "cross_links.json").write_text(json.dumps({"new": ["entry"]}))
assert _load_cross_links(tmp_path) == {"new": ["entry"]}


def test_invalidate_clears_cache(tmp_path: Path) -> None:
cl = tmp_path / "cross_links.json"
cl.write_text(json.dumps({"x": 1}))
_load_cross_links(tmp_path)
assert str(tmp_path) in _CROSS_LINKS_CACHE
invalidate_cross_links_cache()
assert str(tmp_path) not in _CROSS_LINKS_CACHE


# ---------------------------------------------------------------------------
# Parsed-template (path, mtime) cache
# ---------------------------------------------------------------------------


def test_parsed_template_cached_by_mtime(tmp_path: Path, monkeypatch) -> None:
"""``_parse_template_file`` must reuse cached output when mtime is stable."""
from attune_help import templates

templates.invalidate_template_cache()

f = tmp_path / "concept.md"
f.write_text("---\ntype: concept\n---\n# Title\n\n## Section\n\nbody\n")

parse_calls = 0
real_load = None

import frontmatter as fm

real_load = fm.load

def counting_load(*args, **kwargs):
nonlocal parse_calls
parse_calls += 1
return real_load(*args, **kwargs)

monkeypatch.setattr(fm, "load", counting_load)

a = templates._parse_template_file(f)
b = templates._parse_template_file(f)
c = templates._parse_template_file(f)

assert a == b == c
assert parse_calls == 1, "second/third call should hit cache, not re-parse"


def test_parsed_template_reloads_after_mtime_change(tmp_path: Path) -> None:
from attune_help import templates

templates.invalidate_template_cache()
f = tmp_path / "concept.md"
f.write_text("---\ntype: concept\n---\n# v1\n")

first = templates._parse_template_file(f)
assert first["title"] == "v1"

f.write_text("---\ntype: concept\n---\n# v2\n")
_bump_mtime(f)

second = templates._parse_template_file(f)
assert second["title"] == "v2"


def test_invalidate_template_cache_clears_entries(tmp_path: Path) -> None:
from attune_help import templates

f = tmp_path / "x.md"
f.write_text("---\ntype: concept\n---\n# A\n")
templates._parse_template_file(f)
assert any(str(f) == k[0] for k in templates._PARSED_TEMPLATE_CACHE)

templates.invalidate_template_cache()
assert templates._PARSED_TEMPLATE_CACHE == {}
Loading
Loading