Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""add rationale_cache

Revision ID: b4de974b9f54
Revises: 4ebdddf127cf
Create Date: 2026-05-23 19:59:51.000000

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'b4de974b9f54'
down_revision: Union[str, Sequence[str], None] = '4ebdddf127cf'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
op.create_table('rationale_cache',
sa.Column('path', sa.Text(), nullable=False),
sa.Column('line_start', sa.Integer(), nullable=False),
sa.Column('line_end', sa.Integer(), nullable=False),
sa.Column('provider', sa.Text(), nullable=False),
sa.Column('model', sa.Text(), nullable=False),
sa.Column('evidence_fingerprint', sa.Text(), nullable=False),
sa.Column('cached_at', sa.Text(), nullable=False),
sa.Column('purpose', sa.Text(), nullable=False),
sa.Column('why', sa.Text(), nullable=False),
sa.Column('constraints', sa.Text(), nullable=False),
sa.Column('tradeoffs', sa.Text(), nullable=False),
sa.Column('risks', sa.Text(), nullable=False),
sa.Column('input_tokens', sa.Integer(), nullable=True),
sa.Column('output_tokens', sa.Integer(), nullable=True),
sa.Column('actual_provider', sa.Text(), nullable=True),
sa.Column('actual_model', sa.Text(), nullable=True),
sa.Column('qualified_name', sa.Text(), nullable=True),
sa.PrimaryKeyConstraint('path', 'line_start', 'line_end', 'provider', 'model')
)


def downgrade() -> None:
"""Downgrade schema."""
op.drop_table('rationale_cache')
19 changes: 6 additions & 13 deletions src/whygraph/db/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,10 @@
* Float columns use :class:`sqlalchemy.REAL` for the same reason.
* Several columns are typed ``str`` even though they hold JSON-encoded
Python lists (e.g. ``Commit.parent_shas``, ``PullRequest.labels``,
``PullRequest.commit_titles``). Callers encode/decode with ``json`` at
the boundary. Moving to a proper JSON column type is a follow-up that
needs a real Alembic migration.

Current models cover the five tables whose auto-derived snake_case name
does *not* collide with the hand-rolled tables owned by
:mod:`whygraph.scan.db`: ``Author``, ``Commit``, ``Issue``,
``PullRequest``, ``PRIssueLink``. The remaining two scan-owned tables
(``rationale_cache``, ``scan_state``) intentionally have no SQLModel
yet — their natural snake_case names collide with scan/db.py, so they
will get models as part of the eventual scanner-side migration to ORM
rather than today.
``PullRequest.commit_titles``, ``RationaleCache.constraints``).
Callers encode/decode with ``json`` at the boundary. Moving to a
proper JSON column type is a follow-up that needs a real Alembic
migration.
"""

from __future__ import annotations
Expand All @@ -41,5 +33,6 @@
from whygraph.db.models.issue import Issue
from whygraph.db.models.pr_issue_link import PRIssueLink
from whygraph.db.models.pull_request import PullRequest
from whygraph.db.models.rationale_cache import RationaleCache

__all__ = ["Author", "Commit", "Issue", "PRIssueLink", "PullRequest"]
__all__ = ["Author", "Commit", "Issue", "PRIssueLink", "PullRequest", "RationaleCache"]
62 changes: 62 additions & 0 deletions src/whygraph/db/models/rationale_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""SQLModel for the ``rationale_cache`` table.

One row per cached LLM-generated rationale, keyed by target plus the
``(provider, model)`` identity of the LLM that produced it. Lookups
happen *after* evidence collection so that a change in the blamed-commit
set — a new commit landing on those lines — invalidates the cache via
the ``evidence_fingerprint`` column without needing TTLs.

Notes
-----
The list-shaped rationale fields (``constraints``, ``tradeoffs``,
``risks``) are stored as JSON-encoded strings, matching the convention
already used by :attr:`Commit.parent_shas`, :attr:`PullRequest.labels`,
and :attr:`Issue.labels`. Callers encode/decode at the boundary
(:mod:`whygraph.mcp.rationale_cache`).

``model`` is part of the composite PK; when
:attr:`whygraph.core.config.RationaleConfig.model` is ``None`` the cache
key uses the literal string ``"default"``. The LLM-reported model
identity lands in the separate ``actual_model`` column so rows keyed
under ``"default"`` retain provenance.
"""

from __future__ import annotations

from sqlalchemy import Text
from sqlmodel import Field

from whygraph.db.base import WhygraphTable


class RationaleCache(WhygraphTable, table=True):
"""Cached :class:`whygraph.analyze.Rationale` for a (target, LLM) pair.

The composite PK ``(path, line_start, line_end, provider, model)``
lets two LLMs cache their results for the same target side by side.
``qualified_name`` is observational only — a path/line target may
have no symbol attached, and including it in the PK would split the
cache between symbol and line-range lookups of the same lines.
"""

path: str = Field(primary_key=True, sa_type=Text)
line_start: int = Field(primary_key=True)
line_end: int = Field(primary_key=True)
provider: str = Field(primary_key=True, sa_type=Text)
model: str = Field(primary_key=True, sa_type=Text)

evidence_fingerprint: str = Field(sa_type=Text)
cached_at: str = Field(sa_type=Text)

purpose: str = Field(sa_type=Text)
why: str = Field(sa_type=Text)
constraints: str = Field(sa_type=Text) # JSON-encoded list[str]
tradeoffs: str = Field(sa_type=Text) # JSON-encoded list[str]
risks: str = Field(sa_type=Text) # JSON-encoded list[str]

input_tokens: int | None = Field(default=None)
output_tokens: int | None = Field(default=None)

actual_provider: str | None = Field(default=None, sa_type=Text)
actual_model: str | None = Field(default=None, sa_type=Text)
qualified_name: str | None = Field(default=None, sa_type=Text)
57 changes: 41 additions & 16 deletions src/whygraph/mcp/rationale.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
from whygraph.services.codegraph import CodeGraph, CodeGraphError, SymbolContext
from whygraph.services.llm import LlmError

from whygraph.analyze import CommitEvidence, Rationale

from .errors import WhyGraphError
from .targets import Target, repo_root, resolve_target, target_dict
from .evidence import collect_evidence
from .rationale_cache import lookup_cached, store_cached

_TOOL_DESCRIPTION = (
"Generate a structured rationale card (purpose / why / constraints / "
Expand Down Expand Up @@ -48,6 +51,31 @@ def _symbol_context(target: Target) -> SymbolContext | None:
return None


def _format_response(
target: Target,
rationale: Rationale,
evidence: list[CommitEvidence],
cached_at: str,
) -> dict:
"""Shape the MCP response payload around a (fresh or cached) rationale."""
return {
"target": target_dict(target),
"purpose": rationale.purpose,
"why": rationale.why,
"constraints": list(rationale.constraints),
"tradeoffs": list(rationale.tradeoffs),
"risks": list(rationale.risks),
"model": rationale.model,
"provider": rationale.provider,
"cached_at": cached_at,
"evidence_count": {
"commits": len(evidence),
"prs": sum(len(item.pull_requests) for item in evidence),
"issues": sum(len(item.issues) for item in evidence),
},
}


def whygraph_rationale_brief(
path: str | None = None,
line_start: int | None = None,
Expand All @@ -57,6 +85,10 @@ def whygraph_rationale_brief(
"""MCP tool — a rationale card for a chunk of code.

See :data:`_TOOL_DESCRIPTION` for the agent-facing summary.

A previously generated card is returned from the SQLite-backed cache
(see :mod:`whygraph.mcp.rationale_cache`) when the same target,
provider, model, and evidence fingerprint are all unchanged.
"""
target = resolve_target(
path=path,
Expand All @@ -71,27 +103,20 @@ def whygraph_rationale_brief(
"scanned commit. Run `whygraph scan` to populate the database."
)

config = get_config().rationale
cached = lookup_cached(target, evidence, config.provider, config.model)
if cached is not None:
rationale, cached_at = cached
return _format_response(target, rationale, evidence, cached_at)

try:
generator = RationaleGenerator.from_config(get_config().rationale)
generator = RationaleGenerator.from_config(config)
rationale = generator.generate(evidence, symbol_context=_symbol_context(target))
except (AnalyzeError, LlmError) as exc:
raise WhyGraphError.wrap("rationale generation failed", exc)

return {
"target": target_dict(target),
"purpose": rationale.purpose,
"why": rationale.why,
"constraints": list(rationale.constraints),
"tradeoffs": list(rationale.tradeoffs),
"risks": list(rationale.risks),
"model": rationale.model,
"provider": rationale.provider,
"evidence_count": {
"commits": len(evidence),
"prs": sum(len(item.pull_requests) for item in evidence),
"issues": sum(len(item.issues) for item in evidence),
},
}
cached_at = store_cached(target, evidence, rationale, config.provider, config.model)
return _format_response(target, rationale, evidence, cached_at)


def register(mcp: FastMCP) -> None:
Expand Down
Loading