diff --git a/README.md b/README.md index 816f5f3..9ed20c5 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,18 @@ cat error.log | grep "Timeout" | ctx task "Explain this failure cascade" --- +### Context Archaeology + +CoreTex can scan a local vault/project for hidden leverage patterns and write an evidence-backed brief: + +```bash +ctx archaeology ./notes --goal "Find the next useful artifact" +``` + +See [`docs/Context-Archaeology.md`](docs/Context-Archaeology.md). + +--- + ## 🚀 Installation (Zero-Debt & Frictionless) CoreTex features a unified, self-healing installation pipeline that automates all prerequisite matching, virtual environments, and secure container structures out of the box. Clone the repository and run the setup utility matching your host environment: diff --git a/System/cli.py b/System/cli.py index b12d89a..1f6e7c5 100644 --- a/System/cli.py +++ b/System/cli.py @@ -271,6 +271,12 @@ def compile(*args, **kwargs): return _compile(*args, **kwargs) +def archaeology(*args, **kwargs): + from System.cli_cognitive import archaeology as _archaeology + + return _archaeology(*args, **kwargs) + + def absorb(*args, **kwargs): from System.cli_cognitive import absorb as _absorb @@ -346,6 +352,26 @@ def run_compile(): compile() +@cognitive_app.command(name="archaeology") +@app.command(name="archaeology") +def run_archaeology( + path: Path = typer.Argument( + ..., help="Path to a local vault, project folder, or note file to inspect." + ), + goal: str = typer.Option( + None, "--goal", "-g", help="Optional objective to bias the archaeology brief." + ), + output: Path = typer.Option( + Path("Professional/context-archaeology-brief.md"), + "--output", + "-o", + help="Markdown file where the generated brief should be written.", + ), +): + """🗿 Scans local context for hidden leverage patterns and writes an actionable brief.""" + archaeology(path=path, goal=goal, output=output) + + @cognitive_app.command(name="absorb") @app.command(name="absorb") def run_absorb( diff --git a/System/cli_cognitive.py b/System/cli_cognitive.py index 176660f..67a1828 100644 --- a/System/cli_cognitive.py +++ b/System/cli_cognitive.py @@ -184,6 +184,31 @@ def compile(): ) +def archaeology( + path: Path = typer.Argument( + ..., help="Path to a local vault, project folder, or note file to inspect." + ), + goal: Optional[str] = typer.Option( + None, "--goal", "-g", help="Optional objective to bias the archaeology brief." + ), + output: Path = typer.Option( + Path("Professional/context-archaeology-brief.md"), + "--output", + "-o", + help="Markdown file where the generated brief should be written.", + ), +): + """🗿 Scans local context for hidden leverage patterns and writes an actionable brief.""" + from System.tools.context_archaeology import analyze_context, write_report + + report = analyze_context(path, goal=goal) + written = write_report(report, output) + console.print(report.to_markdown()) + console.print( + f"[bold green]✅ Context archaeology brief written to {written}[/bold green]" + ) + + def absorb( path: Path = typer.Argument( ..., help="Path to the folder, codebase, or file to absorb into memory." diff --git a/System/tests/tools/test_context_archaeology.py b/System/tests/tools/test_context_archaeology.py new file mode 100644 index 0000000..13d206b --- /dev/null +++ b/System/tests/tools/test_context_archaeology.py @@ -0,0 +1,36 @@ +from System.tools.context_archaeology import analyze_context, write_report + + +def test_context_archaeology_finds_hidden_leverage(tmp_path): + vault = tmp_path / "vault" + vault.mkdir() + (vault / "launch.md").write_text( + "Show HN launch needs a public demo artifact and reviewer trust.", + encoding="utf-8", + ) + (vault / "meeting.md").write_text( + "Client meeting decisions need owner action follow-up and a brief.", + encoding="utf-8", + ) + + report = analyze_context(vault, goal="Show HN") + + assert report.files_scanned == 2 + assert report.top_themes + assert "artifact" in report.to_markdown() + assert "Show HN" in report.to_markdown() + + +def test_context_archaeology_writes_markdown_report(tmp_path): + vault = tmp_path / "vault" + vault.mkdir() + (vault / "wedge.md").write_text( + "A repeatable leverage opportunity should become a playbook artifact.", + encoding="utf-8", + ) + output = tmp_path / "brief.md" + + written = write_report(analyze_context(vault), output) + + assert written == output + assert "CoreTex Context Archaeology Brief" in output.read_text(encoding="utf-8") diff --git a/System/tools/context_archaeology.py b/System/tools/context_archaeology.py new file mode 100644 index 0000000..2fc55f2 --- /dev/null +++ b/System/tools/context_archaeology.py @@ -0,0 +1,167 @@ +"""Local context archaeology for messy project/vault folders. + +This module is intentionally deterministic and provider-free. It gives CoreTex a +native command that can inspect scattered notes, find recurring leverage themes, +and write a concrete next-move brief without shelling out to grep/cat/sed. +""" + +from __future__ import annotations + +from collections import Counter, defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +TEXT_SUFFIXES = {".md", ".txt", ".rst", ".log"} +IGNORE_PARTS = {".git", ".venv", "node_modules", "__pycache__", ".obsidian"} + +THEME_LEXICON: dict[str, set[str]] = { + "launch": {"launch", "show hn", "public", "demo", "reviewer", "audience"}, + "trust": {"setup", "diagnostic", "quiet", "safe", "risk", "leak", "sandbox"}, + "follow_through": {"follow-up", "action", "owner", "decision", "client", "meeting"}, + "artifact": {"artifact", "brief", "summary", "checklist", "note", "playbook"}, + "leverage": {"leverage", "opportunity", "wedge", "arena", "reusable", "repeatable"}, +} + + +@dataclass(frozen=True) +class EvidenceHit: + path: str + terms: tuple[str, ...] + + +@dataclass(frozen=True) +class ArchaeologyReport: + source: Path + files_scanned: int + bytes_scanned: int + top_themes: list[tuple[str, int]] + evidence: dict[str, list[EvidenceHit]] + synthesis: str + next_moves: list[str] + + def to_markdown(self) -> str: + evidence_lines: list[str] = [] + for theme, _score in self.top_themes: + hits = self.evidence.get(theme, []) + if not hits: + continue + rendered = "; ".join( + f"`{hit.path}` ({', '.join(hit.terms)})" for hit in hits[:4] + ) + evidence_lines.append(f"- **{theme.replace('_', ' ')}**: {rendered}") + + return ( + "# CoreTex Context Archaeology Brief\n\n" + f"Source: `{self.source}`\n\n" + f"Scanned {self.files_scanned} files / {self.bytes_scanned} bytes.\n\n" + "## Hidden pattern\n\n" + f"{self.synthesis}\n\n" + "## Strongest signals\n\n" + + "\n".join( + f"- **{theme.replace('_', ' ')}** — score {score}" + for theme, score in self.top_themes + ) + + "\n\n## Evidence trail\n\n" + + "\n".join(evidence_lines) + + "\n\n## Next moves\n\n" + + "\n".join(f"- {move}" for move in self.next_moves) + + "\n" + ) + + +def iter_text_files(source: Path) -> Iterable[Path]: + if source.is_file(): + if source.suffix.lower() in TEXT_SUFFIXES: + yield source + return + + for path in sorted(source.rglob("*")): + if not path.is_file(): + continue + if any(part in IGNORE_PARTS for part in path.parts): + continue + if path.suffix.lower() in TEXT_SUFFIXES: + yield path + + +def _relative(path: Path, source: Path) -> str: + base = source if source.is_dir() else source.parent + try: + return path.relative_to(base).as_posix() + except ValueError: + return path.as_posix() + + +def analyze_context(source: Path, goal: str | None = None) -> ArchaeologyReport: + source = source.expanduser().resolve() + if not source.exists(): + raise FileNotFoundError(source) + + scores: Counter[str] = Counter() + evidence: dict[str, list[EvidenceHit]] = defaultdict(list) + files_scanned = 0 + bytes_scanned = 0 + + for path in iter_text_files(source): + text = path.read_text(encoding="utf-8", errors="replace") + if not text.strip(): + continue + files_scanned += 1 + bytes_scanned += len(text.encode("utf-8")) + normalized = text.lower() + for theme, terms in THEME_LEXICON.items(): + hits = tuple(sorted(term for term in terms if term in normalized)) + if hits: + scores[theme] += len(hits) + evidence[theme].append(EvidenceHit(_relative(path, source), hits)) + + top_themes = scores.most_common(5) + if not top_themes: + synthesis = "No strong recurring pattern surfaced from the scanned text yet." + next_moves = [ + "Add more notes or broaden the source folder, then rerun archaeology." + ] + else: + labels = [theme.replace("_", " ") for theme, _score in top_themes[:3]] + goal_clause = f" for `{goal}`" if goal else "" + synthesis = ( + f"The strongest hidden pattern{goal_clause} is the overlap between " + f"{', '.join(labels)}. CoreTex should turn that overlap into a concrete " + "artifact instead of stopping at summary." + ) + next_moves = _recommend_moves([theme for theme, _score in top_themes], goal) + + return ArchaeologyReport( + source=source, + files_scanned=files_scanned, + bytes_scanned=bytes_scanned, + top_themes=top_themes, + evidence=dict(evidence), + synthesis=synthesis, + next_moves=next_moves, + ) + + +def _recommend_moves(themes: list[str], goal: str | None) -> list[str]: + moves: list[str] = [] + if "follow_through" in themes and "artifact" in themes: + moves.append("Produce the owner/action/decision brief hiding in the notes.") + if "launch" in themes and "trust" in themes: + moves.append( + "Lead with trust-before-magic: show the evidence trail before the synthesis." + ) + if "leverage" in themes: + moves.append("Name the repeatable wedge and turn it into the next build bet.") + if goal: + moves.append(f"Package the result as a directly usable artifact for: {goal}.") + moves.append( + "Rerun after new notes land; this should become a living context radar." + ) + return moves + + +def write_report(report: ArchaeologyReport, output: Path) -> Path: + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text(report.to_markdown(), encoding="utf-8") + return output diff --git a/docs/Context-Archaeology.md b/docs/Context-Archaeology.md new file mode 100644 index 0000000..c3a756a --- /dev/null +++ b/docs/Context-Archaeology.md @@ -0,0 +1,36 @@ +# Context Archaeology + +`ctx archaeology` scans a local project, vault, or note folder for hidden leverage +patterns and writes a brief with an evidence trail and next moves. + +This is a provider-free CoreTex reflex: it does not call an LLM, shell out to +`grep`, or require external services. It is meant to be the deterministic first +step before a heavier `task` or `daydream` loop. + +## Usage + +```bash +ctx archaeology ./path/to/vault --goal "Launch planning" +``` + +Optional output path: + +```bash +ctx archaeology ./path/to/vault \ + --goal "Find the next commercial wedge" \ + --output Professional/context-archaeology-brief.md +``` + +## What it produces + +- files/bytes scanned, +- strongest recurring themes, +- evidence by source file, +- a short hidden-pattern synthesis, +- concrete next moves. + +## Why it exists + +A lot of useful work hides between notes rather than inside any single file. +Context Archaeology gives CoreTex a native way to surface those overlaps before +asking an LLM to reason over them. diff --git a/examples/context-archaeology-vault/client-meeting.md b/examples/context-archaeology-vault/client-meeting.md new file mode 100644 index 0000000..b339185 --- /dev/null +++ b/examples/context-archaeology-vault/client-meeting.md @@ -0,0 +1,2 @@ +# Client meeting +The meeting produced decisions, owners, and follow-up actions, but they are scattered. A client note would be valuable. diff --git a/examples/context-archaeology-vault/launch.md b/examples/context-archaeology-vault/launch.md new file mode 100644 index 0000000..31d35f9 --- /dev/null +++ b/examples/context-archaeology-vault/launch.md @@ -0,0 +1,2 @@ +# Launch +Show HN reviewers need a concrete demo artifact quickly. The public launch should show evidence, not magic. diff --git a/examples/context-archaeology-vault/product-wedge.md b/examples/context-archaeology-vault/product-wedge.md new file mode 100644 index 0000000..b16eb7c --- /dev/null +++ b/examples/context-archaeology-vault/product-wedge.md @@ -0,0 +1,2 @@ +# Product wedge +The opportunity is repeatable leverage: turn messy local context into a reusable artifact or playbook.