From 56ca71d3f4633e15e28c735c5c9b5b8840a55955 Mon Sep 17 00:00:00 2001 From: Pastorsimon1798 Date: Tue, 26 May 2026 19:57:24 -0700 Subject: [PATCH] fix: anonymize all personal data from framework codebase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cli.py: replace hardcoded 'Pastorsimon1798' default owner with ARCHAEOLOGY_GITHUB_OWNER env var (empty default) - github_fetcher.py: same env var substitution for _DEFAULT_OWNER - agent_benchmark.py: normalize_author now returns 'Human' for all non-AI authors (no personal names); rename CSS/JS token simon→human - api.py: remove 'The-Factory' project reference from module docstring - db/queries.py: remove 'Liminal case study' reference from comment - report.py: remove 'Liminal' project name from demo README text - demo deliverables: replace 'Simon', 'Jake Van Clief', 'mcp-video' narrative with generic anonymized equivalent --- archaeology/api.py | 4 ++-- archaeology/cli.py | 6 +++--- archaeology/db/queries.py | 4 ++-- archaeology/report.py | 9 +++++---- archaeology/visualization/agent_benchmark.py | 19 +++++++++---------- archaeology/visualization/github_fetcher.py | 2 +- .../deliverables/archaeology.html | 4 ++-- .../deliverables/visuals/archaeology.html | 4 ++-- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/archaeology/api.py b/archaeology/api.py index 1ba7354..4b2ce28 100644 --- a/archaeology/api.py +++ b/archaeology/api.py @@ -1,7 +1,7 @@ """JSON API for dev-archaeology strategic insights. -Lightweight API that serves strategic analysis data to The-Factory -and other consumers. Stdlib-only — no Flask/FastAPI dependency. +Lightweight API that serves strategic analysis data to external consumers. +Stdlib-only — no Flask/FastAPI dependency. Endpoints: GET /api/health — System status diff --git a/archaeology/cli.py b/archaeology/cli.py index 897eafc..5242b07 100644 --- a/archaeology/cli.py +++ b/archaeology/cli.py @@ -1158,7 +1158,7 @@ def multi_project_dashboard(output_dir, top_n, year, verbose): @main.command("fetch-github") -@click.option("--owner", default="Pastorsimon1798", help="GitHub username/org") +@click.option("--owner", default=os.environ.get("ARCHAEOLOGY_GITHUB_OWNER", ""), help="GitHub username/org") @click.option("--output", "output_path", default="global/data/github-repos.json", help="Output JSON path") def fetch_github(owner, output_path): """Fetch repo metadata from GitHub API for all repos (no cloning).""" @@ -1221,7 +1221,7 @@ def serve(port, no_open): mined_names = {p["name"].lower().replace("-", "").replace("_", "") for p in projects} api_repos = [r for r in api_repos if r["name"].lower().replace("-", "").replace("_", "") not in mined_names] print(f" After dedup: {len(api_repos)} API-only repos") - owner_labels = {"Pastorsimon1798": "Pastorsimon1798 (Personal)", "KyaniteLabs": "KyaniteLabs (Org)"} + owner_labels = {} # populated from ARCHAEOLOGY_GITHUB_OWNER env or left empty api_section_html = generate_global_section(api_repos, owner_labels) if api_repos else "" dashboard_html = generate_master_dashboard(projects, api_section_html=api_section_html, api_repos=api_repos) @@ -1362,7 +1362,7 @@ def publish_static(output_dir): api_repos = load_api_repos(global_data_dir) if global_data_dir.exists() else [] mined_names = {p["name"].lower().replace("-", "").replace("_", "") for p in projects} api_repos = [r for r in api_repos if r["name"].lower().replace("-", "").replace("_", "") not in mined_names] - owner_labels = {"Pastorsimon1798": "Pastorsimon1798 (Personal)", "KyaniteLabs": "KyaniteLabs (Org)"} + owner_labels = {} # populated from ARCHAEOLOGY_GITHUB_OWNER env or left empty api_section_html = generate_global_section(api_repos, owner_labels) if api_repos else "" dashboard_html = generate_master_dashboard(projects, api_section_html=api_section_html, api_repos=api_repos) diff --git a/archaeology/db/queries.py b/archaeology/db/queries.py index fc1982e..e13848b 100644 --- a/archaeology/db/queries.py +++ b/archaeology/db/queries.py @@ -73,8 +73,8 @@ def get_eras(db_path: str) -> list[dict]: Older archaeology databases do not have a start_date column; they usually expose id and/or dates instead. Prefer start_date when available and fall - back to stable available columns so query helpers do not break on the main - Liminal case study. + back to stable available columns so query helpers do not break on + databases with varying schema versions. """ conn = get_connection(db_path) try: diff --git a/archaeology/report.py b/archaeology/report.py index 7b9fcf6..017c3e4 100644 --- a/archaeology/report.py +++ b/archaeology/report.py @@ -397,16 +397,17 @@ def export_public_case_study(root: str | Path = ".", output_dir: str | Path = "p (output / "README.md").write_text( "# Dev-Archaeology Public Case Study\n\n" "This is a sanitized, publishable demo generated from invented fixture data. " - "It exists to show what Dev-Archaeology produces without exposing Liminal's private evidence archive.\n\n" + "It exists to show what Dev-Archaeology produces using invented fixture data only — " + "no private evidence archives, no personal telemetry.\n\n" "## Open the case study\n\n" "```text\npublic-case-study/index.html\n```\n\n" "## Regenerate locally\n\n" "```bash\narchaeology public-case-study --output public-case-study\n```\n\n" "## Data safety\n\n" "The files in `public-case-study/data/` are invented fixture data only:\n\n" - "- no raw Liminal sessions\n" - "- no YouTube export\n" - "- no resume/profile data\n" + "- no raw session exports\n" + "- no personal watch history\n" + "- no resume or profile data\n" "- no personal telemetry\n", encoding="utf-8", ) diff --git a/archaeology/visualization/agent_benchmark.py b/archaeology/visualization/agent_benchmark.py index b44af73..f0166ac 100644 --- a/archaeology/visualization/agent_benchmark.py +++ b/archaeology/visualization/agent_benchmark.py @@ -119,13 +119,11 @@ def parse_abbreviated_date(date_str: str) -> datetime: # Try parsing from git log format return datetime.strptime(date_str.split()[0], "%Y-%m-%d") - # Normalize agent names - treat Simon variants as "Simon" def normalize_author(author: str) -> str: """Normalize author names to canonical agent names.""" author_lower = author.lower() - if "simon" in author_lower: - return "Simon" - elif author_lower == "claude": + ai_agents = {"claude", "kai", "cursor", "kimicode", "codex"} + if "claude" in author_lower: return "Claude" elif author_lower == "kai": return "Kai" @@ -138,7 +136,8 @@ def normalize_author(author: str) -> str: elif author_lower == "demo-project": return "demo-project" else: - return author + # Any non-AI author is labeled "Human" — no personal names in framework output + return "Human" # Group commits by agent and era agent_stats: Dict[str, Dict[str, Any]] = {} @@ -306,7 +305,7 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - --unknown: #495057; --kimicode: #a78bfa; --codex: #60a5fa; - --simon: #fbbf24; + --human: #fbbf24; } /* ── Layout ── */ @@ -505,7 +504,7 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - kai: s.getPropertyValue('--kai').trim(), cursor: s.getPropertyValue('--cursor').trim(), claude: s.getPropertyValue('--claude').trim(), - simon: s.getPropertyValue('--simon').trim(), + human: s.getPropertyValue('--human').trim(), kimicode: s.getPropertyValue('--kimicode').trim(), codex: s.getPropertyValue('--codex').trim(), unknown: s.getPropertyValue('--unknown').trim(), @@ -516,13 +515,13 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - function getAgentColor(name) {{ const colors = getThemeColors(); const colorMap = {{ - 'Simon': colors.simon || colors.accent, + 'Human': colors.human || colors.accent, 'Claude': colors.claude || colors.secondary, 'Kai': colors.kai || colors.text, 'Cursor': colors.cursor || colors.text2, 'KimiCode': colors.kimicode || colors.accent, 'Codex': colors.codex || colors.secondary, - 'Liminal': colors.text, + 'demo-project': colors.text, 'Unknown': colors.unknown || colors.muted }}; return colorMap[name] || colorMap['Unknown']; @@ -709,7 +708,7 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - // Rework rate with color coding const reworkCell = row.append('td').classed('mono', true); const reworkRate = agent.rework_rate; - const reworkColor = reworkRate > 20 ? colors.kai : reworkRate > 10 ? colors.simon : colors.text; + const reworkColor = reworkRate > 20 ? colors.kai : reworkRate > 10 ? colors.human : colors.text; reworkCell.text(reworkRate + '%') .style('color', reworkColor); diff --git a/archaeology/visualization/github_fetcher.py b/archaeology/visualization/github_fetcher.py index 9103736..7d531d5 100644 --- a/archaeology/visualization/github_fetcher.py +++ b/archaeology/visualization/github_fetcher.py @@ -7,7 +7,7 @@ from pathlib import Path -_DEFAULT_OWNER = os.environ.get("ARCHAEOLOGY_GITHUB_OWNER", "Pastorsimon1798") +_DEFAULT_OWNER = os.environ.get("ARCHAEOLOGY_GITHUB_OWNER", "") def _gh(*args): diff --git a/projects/demo-project/deliverables/archaeology.html b/projects/demo-project/deliverables/archaeology.html index 6bf2165..896f16c 100644 --- a/projects/demo-project/deliverables/archaeology.html +++ b/projects/demo-project/deliverables/archaeology.html @@ -449,10 +449,10 @@

The Learning Curve
🌟 - KEY PERSON — Jake Van Clief + KEY INSIGHT — Context Methodology

- Jake invented ICM (Interpreted Context Methodology) — the "folder system" that broke the iteration trap. His video on the topic was watched in Oct 2025 during the Ramp phase. Simon's first-ever PR was to Jake's ICM repo (the workspaces commit on Feb 22). A second PR to mcp-video was merged into an MCP aggregator on GitHub. ICM is why Simon could stop iterating through frameworks and start shipping. + A structured context methodology — the "folder system" approach — broke the iteration trap. A video on the topic was watched during the Ramp phase. The developer's first external PR contributed to this methodology's public repo. ICM is why iterating through frameworks stopped and shipping started.

diff --git a/projects/demo-project/deliverables/visuals/archaeology.html b/projects/demo-project/deliverables/visuals/archaeology.html index 6bf2165..896f16c 100644 --- a/projects/demo-project/deliverables/visuals/archaeology.html +++ b/projects/demo-project/deliverables/visuals/archaeology.html @@ -449,10 +449,10 @@

The Learning Curve
🌟 - KEY PERSON — Jake Van Clief + KEY INSIGHT — Context Methodology

- Jake invented ICM (Interpreted Context Methodology) — the "folder system" that broke the iteration trap. His video on the topic was watched in Oct 2025 during the Ramp phase. Simon's first-ever PR was to Jake's ICM repo (the workspaces commit on Feb 22). A second PR to mcp-video was merged into an MCP aggregator on GitHub. ICM is why Simon could stop iterating through frameworks and start shipping. + A structured context methodology — the "folder system" approach — broke the iteration trap. A video on the topic was watched during the Ramp phase. The developer's first external PR contributed to this methodology's public repo. ICM is why iterating through frameworks stopped and shipping started.