From abf7f4fe58f8691f82156d6dd32261bd2142c3e0 Mon Sep 17 00:00:00 2001 From: GeneAI Date: Thu, 7 May 2026 22:16:39 -0400 Subject: [PATCH] feat(rag): typed HelpCorpusAdapter protocol; localize dynamic import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A of specs/architecture-realignment (workspace-level spec 2026-05-08). Closes finding #4 from the 2026-05-07 code-review: ``importlib.import_module("attune_help")`` lived at the module body of attune_rag.corpus.attune_help, hiding the rag→help dependency from static analysis. Changes: - New ``attune_rag/corpus/help_adapter.py`` defines the ``HelpCorpusAdapter`` Protocol (``templates_root``, ``version``). Smaller surface than the design.md draft (no ``iter_entries``); iteration stays in DirectoryCorpus where it already lives. Honest- to-the-code wins. - ``AttuneHelpCorpus.__init__`` now takes an adapter explicitly. ``AttuneHelpCorpus.from_attune_help()`` is the convenience factory that does the dynamic import inside one classmethod call instead of the module body. Static analyzers see the rag→help boundary as a runtime-only dependency in this single function. - Module-level ``_BundledAdapter`` frozen dataclass implements the protocol for the bundled-help case. Lifted out of the factory body because Python's class-body scoping rules don't see enclosing- function locals (one test made me discover this the hard way). Two new tests: - ``test_corpus_works_with_injected_adapter`` exercises the protocol path end-to-end; verifies the adapter is the only import surface needed for a working corpus. - ``test_invalid_templates_root_raises`` verifies adapter validation fails at construction with a clear error. Existing 7 tests updated to use ``.from_attune_help()``. Full suite: 308 passed, 3 xpassed. Co-Authored-By: Claude Opus 4.7 --- src/attune_rag/corpus/attune_help.py | 77 ++++++++++++++++++++------- src/attune_rag/corpus/help_adapter.py | 49 +++++++++++++++++ src/attune_rag/pipeline.py | 2 +- tests/unit/test_corpus_attune_help.py | 52 +++++++++++++++--- 4 files changed, 153 insertions(+), 27 deletions(-) create mode 100644 src/attune_rag/corpus/help_adapter.py diff --git a/src/attune_rag/corpus/attune_help.py b/src/attune_rag/corpus/attune_help.py index 64c4c4c..7517abe 100644 --- a/src/attune_rag/corpus/attune_help.py +++ b/src/attune_rag/corpus/attune_help.py @@ -32,38 +32,47 @@ import json from collections.abc import Iterable -from importlib import import_module -from importlib.resources import as_file, files +from dataclasses import dataclass from pathlib import Path from .base import RetrievalEntry from .directory import DirectoryCorpus +from .help_adapter import HelpCorpusAdapter _OVERRIDES_PATH = Path(__file__).parent / "summaries_override.json" +@dataclass(frozen=True) +class _BundledAdapter: + """Default :class:`HelpCorpusAdapter` for the bundled attune-help. + + Module-level so it doesn't fall foul of Python's class-body scoping + rules (a class declared inside a function can't reference the + function's locals). + """ + + templates_root: Path + version: str + + class AttuneHelpCorpus: - """Loads the bundled ``attune_help`` templates as a corpus.""" + """Loads an attune-help-shaped corpus of templates. - def __init__(self) -> None: - try: - attune_help = import_module("attune_help") - except ImportError as exc: - raise RuntimeError( - "AttuneHelpCorpus requires the [attune-help] extra. " - "Install with: pip install 'attune-rag[attune-help]'" - ) from exc + Takes a :class:`HelpCorpusAdapter` so attune-rag never imports + attune-help at module level. Use :meth:`from_attune_help` for the + common case where you have attune-help installed and want the + bundled templates without writing your own adapter. + """ - templates = files("attune_help").joinpath("templates") - with as_file(templates) as templates_path: - root = Path(templates_path) - if not root.is_dir(): + def __init__(self, adapter: HelpCorpusAdapter) -> None: + if not adapter.templates_root.is_dir(): raise RuntimeError( - f"attune_help templates directory not found at {root}. " - "The attune-help package layout may have changed." + f"templates_root is not a directory: {adapter.templates_root}. " + "The corpus adapter may be misconfigured or the package " + "layout may have changed." ) - self._version = getattr(attune_help, "__version__", "unknown") + self._version = adapter.version overrides: dict[str, str | None] = {} if _OVERRIDES_PATH.is_file(): try: @@ -75,11 +84,41 @@ def __init__(self) -> None: # treats a missing file as an empty map, so this is safe # to pass unconditionally. self._inner = DirectoryCorpus( - root=root, + root=adapter.templates_root, summaries_file="summaries_by_path.json", extra_summaries=overrides, ) + @classmethod + def from_attune_help(cls) -> AttuneHelpCorpus: + """Construct using the bundled attune-help package as adapter. + + Localizes the dynamic import to one factory call instead of the + module body. Static analyzers see the rag→help boundary as a + runtime-only dependency in this single function. Callers that + want to avoid the implicit dep entirely (testing, alternate + corpora) should construct the adapter themselves and pass it + to ``__init__``. + """ + from importlib import import_module + from importlib.resources import as_file, files + + try: + attune_help = import_module("attune_help") + except ImportError as exc: + raise RuntimeError( + "AttuneHelpCorpus.from_attune_help() requires the " + "[attune-help] extra. Install with: " + "pip install 'attune-rag[attune-help]'" + ) from exc + + templates = files("attune_help").joinpath("templates") + with as_file(templates) as templates_path: + root = Path(templates_path) + + version = getattr(attune_help, "__version__", "unknown") + return cls(_BundledAdapter(templates_root=root, version=version)) + def entries(self) -> Iterable[RetrievalEntry]: return self._inner.entries() diff --git a/src/attune_rag/corpus/help_adapter.py b/src/attune_rag/corpus/help_adapter.py new file mode 100644 index 0000000..40fddea --- /dev/null +++ b/src/attune_rag/corpus/help_adapter.py @@ -0,0 +1,49 @@ +"""Typed protocol for plugging an attune-help (or similar) corpus into rag. + +This module exists to remove the module-level +``importlib.import_module("attune_help")`` from +:mod:`attune_rag.corpus.attune_help`. With the protocol in place, +attune-rag never imports attune-help — the consumer hands an adapter +in. Static analyzers can see the real dependency graph (rag does not +depend on help; help depends on rag's protocol). + +The protocol is intentionally minimal: the adapter only needs to point +to the templates directory and report a version string. attune-rag's +``AttuneHelpCorpus`` does the corpus work via the existing +``DirectoryCorpus`` against that root. + +The simpler protocol differs from the shape proposed in +``specs/architecture-realignment/design.md`` (which suggested +``iter_entries`` on the adapter). Iteration is already handled by +``DirectoryCorpus``; pushing it into the adapter would duplicate that +logic in attune-help. Honest-to-the-code wins. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Protocol, runtime_checkable + + +@runtime_checkable +class HelpCorpusAdapter(Protocol): + """Implemented by attune-help (or any other consumer that wants + its bundled corpus exposed to attune-rag's keyword retriever). + + Implementations are typically a small dataclass-like object that + points at a directory of markdown templates and exposes a version + string. attune-help ships such an adapter as + ``attune_help.adapters.rag.AttuneHelpAdapter``; downstream callers + can pass any object that satisfies this protocol. + """ + + @property + def templates_root(self) -> Path: + """Filesystem path to the directory of ``*.md`` templates.""" + ... + + @property + def version(self) -> str: + """Stable version string for the help corpus (e.g. package + ``__version__``). Used for cache-busting and provenance.""" + ... diff --git a/src/attune_rag/pipeline.py b/src/attune_rag/pipeline.py index 2538c52..218e5b7 100644 --- a/src/attune_rag/pipeline.py +++ b/src/attune_rag/pipeline.py @@ -108,7 +108,7 @@ def _default_corpus() -> CorpusProtocol: "Either pass a corpus= (e.g. DirectoryCorpus) or install " "'attune-rag[attune-help]'." ) from exc - return AttuneHelpCorpus() + return AttuneHelpCorpus.from_attune_help() def run( self, diff --git a/tests/unit/test_corpus_attune_help.py b/tests/unit/test_corpus_attune_help.py index 742ac04..bd1d6b5 100644 --- a/tests/unit/test_corpus_attune_help.py +++ b/tests/unit/test_corpus_attune_help.py @@ -14,7 +14,7 @@ def test_loads_bundled_corpus() -> None: - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() entries = list(corpus.entries()) # attune-help v0.5.x ships >=500 templates; assert a floor # that still catches regressions without being brittle. @@ -22,7 +22,7 @@ def test_loads_bundled_corpus() -> None: def test_has_expected_categories() -> None: - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() categories = {e.category for e in corpus.entries()} expected = { "concepts", @@ -38,13 +38,13 @@ def test_has_expected_categories() -> None: def test_name_and_version() -> None: import attune_help - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() assert corpus.name == "attune-help" assert corpus.version == attune_help.__version__ def test_get_returns_by_path() -> None: - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() some = next(iter(corpus.entries())) fetched = corpus.get(some.path) assert fetched is not None @@ -52,7 +52,7 @@ def test_get_returns_by_path() -> None: def test_get_unknown_returns_none() -> None: - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() assert corpus.get("does/not/exist.md") is None @@ -66,7 +66,7 @@ def test_path_keyed_summaries_load_from_attune_help_0_7_0() -> None: so some — not necessarily all — entries populate summaries. """ - corpus = AttuneHelpCorpus() + corpus = AttuneHelpCorpus.from_attune_help() entries = list(corpus.entries()) with_summary = sum(1 for e in entries if e.summary) # attune-help 0.7.0 ships 124 polished path-keyed @@ -95,7 +95,45 @@ def test_raises_helpful_error_when_attune_help_missing( try: with pytest.raises(RuntimeError, match=r"\[attune-help\] extra"): - AttuneHelpCorpus() + AttuneHelpCorpus.from_attune_help() finally: sys.modules.pop("attune_help", None) sys.modules.update(saved) + + +# --------------------------------------------------------------------------- +# HelpCorpusAdapter — protocol path (no attune-help required) +# --------------------------------------------------------------------------- + + +def test_corpus_works_with_injected_adapter(tmp_path) -> None: + """The protocol path lets callers wire any directory of templates + in without ever importing attune-help. Doubles as a contract test + for the HelpCorpusAdapter shape. + """ + from attune_rag.corpus.attune_help import _BundledAdapter + + # Minimal templates dir + (tmp_path / "concepts").mkdir() + (tmp_path / "concepts" / "alpha.md").write_text("# alpha\nbody\n") + + adapter = _BundledAdapter(templates_root=tmp_path, version="test-v1") + corpus = AttuneHelpCorpus(adapter=adapter) + entries = list(corpus.entries()) + + assert len(entries) == 1 + assert entries[0].path == "concepts/alpha.md" + assert corpus.version == "test-v1" + + +def test_invalid_templates_root_raises() -> None: + """Adapter pointing at a non-directory must fail loudly at construction.""" + from pathlib import Path as _P + + from attune_rag.corpus.attune_help import _BundledAdapter + + bad_adapter = _BundledAdapter( + templates_root=_P("/this/path/does/not/exist"), version="x" + ) + with pytest.raises(RuntimeError, match="templates_root is not a directory"): + AttuneHelpCorpus(adapter=bad_adapter)