Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 157 additions & 3 deletions neuralmind/context_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Reduction ratio: 30-50x typical
"""

import os
from dataclasses import dataclass, field
from pathlib import Path

Expand Down Expand Up @@ -90,6 +91,16 @@ class ContextSelector:
# Chars per token estimate
CHARS_PER_TOKEN = 4

# Synapse-driven recall (see _apply_synapse_boost / get_l2_context):
# number of top hits used to seed spreading activation, how strongly
# learned co-activation nudges relevance, the cap on neighbors pulled
# into L3 that vector search missed, and the minimum activation an
# absent neighbor needs before it's worth pulling in.
SYNAPSE_SEED_K = 3
SYNAPSE_BOOST_WEIGHT = 0.3
SYNAPSE_PULL_IN_MAX = 2
SYNAPSE_PULL_IN_MIN_ENERGY = 0.15

def __init__(self, embedder, project_path: str = None, enable_reranking: bool = True):
"""
Initialize context selector.
Expand Down Expand Up @@ -119,6 +130,12 @@ def __init__(self, embedder, project_path: str = None, enable_reranking: bool =
self._reranker: SemanticReranker | None = None
self._context_modules: list[str] = []

# Optional seed-based synapse recall, injected by NeuralMind.build().
# Signature: (seed_node_ids: list[str]) -> list[tuple[node_id, energy]].
# Left None here so a selector built without a synapse store (or on a
# cold graph) behaves exactly as it did before this layer existed.
self.synapse_recall = None

# Cache for layer content
self._l0_cache: str | None = None
self._l1_cache: str | None = None
Expand Down Expand Up @@ -342,9 +359,20 @@ def get_l2_context(self, query: str, max_communities: int = 3) -> tuple[str, lis
if comm >= 0:
community_scores[comm] = community_scores.get(comm, 0) + score

# Pull communities the agent has historically co-activated with these
# hits into contention, even when this query's vector matches alone
# wouldn't have surfaced them. Reinforcement records community_<id>
# pseudo-nodes, so spreading activation can return them directly.
# Budget-neutral: a co-activated community can win a slot by
# outscoring a vector one, but it can't grow how many we load — the
# cap stays at what vector search alone would have surfaced.
vector_community_count = len(community_scores)
self._boost_communities_from_synapses(search_results, community_scores)
community_budget = min(max_communities, vector_community_count)

# Get top communities
top_communities = sorted(community_scores.items(), key=lambda x: x[1], reverse=True)[
:max_communities
:community_budget
]

if not top_communities:
Expand Down Expand Up @@ -375,6 +403,123 @@ def get_l2_context(self, query: str, max_communities: int = 3) -> tuple[str, lis
context = self._truncate_to_tokens("\n".join(parts), self.L2_MAX_TOKENS)
return context, loaded_communities

def _synapse_disabled(self) -> bool:
"""True when synapse recall isn't wired or the kill switch is set."""
return not self.synapse_recall or os.environ.get("NEURALMIND_SYNAPSE_INJECT") == "0"

def _recall_energy(self, seeds: list[str]) -> dict[str, float]:
"""Spread from ``seeds`` and return {node_id: activation}, or {}."""
if not seeds:
return {}
try:
return dict(self.synapse_recall(seeds))
except Exception:
return {}

def _boost_communities_from_synapses(
self, search_results: list[dict], community_scores: dict[int, float]
) -> None:
"""Add co-activated communities' energy into ``community_scores``.

Mutates ``community_scores`` in place. No-op when recall is disabled
or the graph is cold, so cold-start L2 selection is unchanged.
"""
if self._synapse_disabled():
return
seeds = [r["id"] for r in search_results[: self.SYNAPSE_SEED_K] if r.get("id")]
for node_id, energy in self._recall_energy(seeds).items():
if not node_id.startswith("community_"):
continue
try:
comm = int(node_id[len("community_") :])
except ValueError:
continue
community_scores[comm] = (
community_scores.get(comm, 0.0) + energy * self.SYNAPSE_BOOST_WEIGHT
)

def _apply_synapse_boost(self, results: list[dict]) -> list[dict]:
"""Re-rank L3 hits using learned synapse co-activation.

Budget-neutral: never grows the result count. Seeds spreading
activation from the top hits, then (a) boosts and reorders results
the graph activates and (b) swaps the weakest vector hits for
strongly co-activated neighbors vector search missed — surfacing
nodes the agent keeps using together without spending extra tokens.

No-op (returns ``results`` unchanged) when recall isn't wired, the
kill switch is set, or the graph is cold — so cold-start behavior is
byte-identical to a build without a synapse store.
"""
if self._synapse_disabled():
return results

seeds = [r["id"] for r in results[: self.SYNAPSE_SEED_K] if r.get("id")]
energy = self._recall_energy(seeds)
if not energy:
return results

# Work on shallow copies: _fetch_search caches and reuses these dicts,
# so mutating score in place would compound across calls and corrupt
# the cached vector scores. Copies keep the boost idempotent.
results = [dict(r) for r in results]
seed_set = set(seeds)
present = {r.get("id") for r in results}

# (a) Boost results already present that the graph co-activates,
# then reorder by score. Token-neutral (same nodes).
boosted = False
for r in results:
nid = r.get("id")
if nid in seed_set or nid not in energy:
continue
boost = self.SYNAPSE_BOOST_WEIGHT * energy[nid]
r["score"] = r.get("score", 0.0) + boost
r["_synapse_boost"] = boost
boosted = True
Comment on lines +472 to +479
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in eb22466. _apply_synapse_boost now operates on shallow copies of the result dicts, so it never mutates the objects _fetch_search caches — the boost is idempotent and the cached vector scores stay clean. Added a regression test (test_boost_does_not_mutate_cached_results) asserting a repeated call is identical and the cached dict keeps its original score.


Generated by Claude Code

if boosted:
results = sorted(results, key=lambda r: r.get("score", 0.0), reverse=True)

# (b) Swap the weakest vector hits for the strongest absent neighbors.
# Displacement keeps the result count fixed, so the token budget
# is unchanged — we trade the least-relevant hits, not add to them.
# Requires the embedder to support id lookup; if it doesn't (e.g. a
# backend without get_nodes_by_ids), degrade to boost-only.
get_nodes_by_ids = getattr(self.embedder, "get_nodes_by_ids", None)
if not callable(get_nodes_by_ids):
return results

candidates = sorted(
(
(nid, e)
for nid, e in energy.items()
if nid not in present
and not nid.startswith("community_")
and e >= self.SYNAPSE_PULL_IN_MIN_ENERGY
),
key=lambda x: x[1],
reverse=True,
)[: self.SYNAPSE_PULL_IN_MAX]
if not candidates:
return results

# Keep at least one vector hit; only displace as many as we can fetch.
Comment on lines +496 to +506
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already addressed in eb22466 (this review was generated against the earlier commit). The pull-in now does exactly your option (2): get_nodes_by_ids = getattr(self.embedder, "get_nodes_by_ids", None) and returns boost-only if it isn't callable, so a backend without id lookup degrades gracefully instead of raising. Covered by test_pull_in_degrades_without_id_lookup.


Generated by Claude Code

num_swap = min(len(candidates), max(0, len(results) - 1))
if num_swap <= 0:
return results
energy_by_id = dict(candidates[:num_swap])
fetched = get_nodes_by_ids(list(energy_by_id))
if not fetched:
return results

kept = results[: len(results) - len(fetched)]
for node in fetched:
boost = self.SYNAPSE_BOOST_WEIGHT * energy_by_id.get(node.get("id"), 0.0)
node["score"] = boost
node["_synapse_boost"] = boost
node["_synapse_recalled"] = True
return kept + fetched

def get_l3_search(self, query: str, n: int = 4) -> tuple[str, int]:
"""
Layer 3: Deep semantic search results.
Expand All @@ -394,18 +539,27 @@ def get_l3_search(self, query: str, n: int = 4) -> tuple[str, int]:
if reranker.enabled:
results = reranker.rerank(results, context_modules=self._context_modules)

# Fold in the live synapse graph: results the agent has historically
# co-activated with this query's top hits get a relevance nudge, so
# learned association — not just vector similarity — shapes ranking.
results = self._apply_synapse_boost(results)

parts = ["## Search Results", ""]

for i, result in enumerate(results, 1):
meta = result.get("metadata", {})
score = result.get("score", 0)
boost = result.get("_reranker_boost", 0.0)
synapse = result.get("_synapse_boost", 0.0)

# Show boost in label if applied
# Show boosts in label if applied
boost_label = f" (+{boost:.2f} boost)" if boost > 0 else ""
synapse_label = f" (+{synapse:.2f} synapse)" if synapse > 0 else ""
recalled_label = " [recalled]" if result.get("_synapse_recalled") else ""

parts.append(
f"{i}. **{meta.get('label', 'unknown')}** (score: {score:.2f}{boost_label})"
f"{i}. **{meta.get('label', 'unknown')}**{recalled_label} "
f"(score: {score:.2f}{boost_label}{synapse_label})"
)
parts.append(f" Type: {meta.get('file_type', 'unknown')}")
parts.append(f" File: {meta.get('source_file', 'unknown')}")
Expand Down
20 changes: 20 additions & 0 deletions neuralmind/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ def build(self, force: bool = False) -> dict:
self.selector = ContextSelector(
self.embedder, str(self.project_path), enable_reranking=self.enable_reranking
)
# Let L3 retrieval consult the live synapse graph (seed-based spread,
# no extra embedder round trip — the seeds are hits already fetched).
self.selector.synapse_recall = self._recall_for_selection

# Get final stats
final_stats = self.embedder.get_stats()
Expand Down Expand Up @@ -772,6 +775,23 @@ def graph_data(self, synapse_min_weight: float = 0.05, synapse_limit: int = 2000
},
}

def _recall_for_selection(
self, seed_ids: list[str], depth: int = 2, top_k: int = 8
) -> list[tuple[str, float]]:
"""Seed-based spreading activation for the context selector.

Takes node ids the selector already fetched (so no second embedder
round trip) and returns their learned synapse neighbors. Empty on a
cold graph or when synapses are unavailable.
"""
store = self.synapses
if store is None or not seed_ids:
return []
try:
return store.spread(seed_ids, depth=depth, top_k=top_k)
except Exception:
return []

def synaptic_neighbors(
self, query: str, depth: int = 2, top_k: int = 10
) -> list[tuple[str, float]]:
Expand Down
27 changes: 27 additions & 0 deletions neuralmind/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,33 @@ def get_file_edges(self, source_file: str, node_ids: set[str] | None = None) ->
)
]

def get_nodes_by_ids(self, node_ids: list[str]) -> list[dict]:
"""Fetch indexed nodes by id, shaped like ``search`` results.

Used to pull synapse-recalled neighbors into L3 even when vector
search didn't surface them. Missing ids are skipped; ``score`` is
omitted (callers supply their own relevance for appended nodes).
"""
if not node_ids:
return []
try:
fetched = self.collection.get(ids=list(node_ids), include=["documents", "metadatas"])
except Exception:
return []
out = []
ids = fetched.get("ids") or []
docs = fetched.get("documents") or []
metas = fetched.get("metadatas") or []
for i, node_id in enumerate(ids):
out.append(
{
"id": node_id,
"document": docs[i] if i < len(docs) else "",
"metadata": metas[i] if i < len(metas) else {},
}
)
return out

def get_community_summary(self, community_id: int, max_nodes: int = 20) -> dict:
"""
Get a summary of nodes in a community for context injection.
Expand Down
Loading
Loading