From b038a2b0b85277f1d795491bd8b512dc57a48769 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Thu, 21 May 2026 17:41:16 -0700 Subject: [PATCH] feat(tests): cross-harness portability lint Adds tests/lint-cross-harness.sh, a POSIX shell + awk static lint that scans skills/**/*.md for four classes of runtime-specific tokens that would weaken the cross-harness purity invariant established by PR #1486: 1. bare-harness-name: Claude Code, Cursor, OpenCode, Codex CLI/App, Gemini CLI, GitHub Copilot CLI, Copilot CLI, Factory Droid in generic prose 2. model-id: claude-(opus|sonnet|haiku)-N-M, gpt-N.M, gemini-N.M-tier, oN-mini 3. runtime-tool: ExitPlanMode, TodoWrite, WebFetch, Task tool, Skill tool, mcp__server__tool 4. hardcoded-path: /Users// macOS personal paths A line is allowed under any of these rules (first match wins): - Two or more distinct harness families named on the same line (Claude, Codex, Cursor, OpenCode, Gemini, Copilot, Factory Droid, Aider, Cline, Windsurf, Hermes, Hyperagent, Antigravity, Kiro, Qwen, Kimi). Casual cross-runtime prose like '~/.claude/skills for Claude Code, ~/.agents/skills/ for Codex' passes without annotation. - A section heading whose text matches In / For / :. - An inline bold-prose marker at line start: **In :**, etc. - A skills/*/references/-tools.md file (bare-harness + runtime-tool only; model IDs and hardcoded paths always flag). - The internal exception list at tests/lint-cross-harness.exceptions (path:line:reason). Used for intentional single-agent references that the two-agents rule does not naturally cover (graphviz workflow diagrams, subagent dispatch prompt headers, source-attributed Anthropic excerpts). No skill content is modified. The two-agents rule and sidecar exception list together let the lint maintain its own allowlist instead of polluting skill files with inline annotations, which would add tokens to every loaded skill context at runtime. CI workflow .github/workflows/lint.yml runs on PR and push to main/dev, blocks merge on any violation. docs/cross-harness-lint.md documents the rules, allowlist mechanism, and sidecar format. --- .github/workflows/lint.yml | 15 ++ docs/cross-harness-lint.md | 49 ++++++ tests/lint-cross-harness.exceptions | 74 +++++++++ tests/lint-cross-harness.sh | 233 ++++++++++++++++++++++++++++ 4 files changed, 371 insertions(+) create mode 100644 .github/workflows/lint.yml create mode 100644 docs/cross-harness-lint.md create mode 100644 tests/lint-cross-harness.exceptions create mode 100755 tests/lint-cross-harness.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000..fe80b41d83 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,15 @@ +name: lint + +on: + pull_request: + branches: [main, dev] + push: + branches: [main, dev] + +jobs: + cross-harness-lint: + name: cross-harness-lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: bash tests/lint-cross-harness.sh diff --git a/docs/cross-harness-lint.md b/docs/cross-harness-lint.md new file mode 100644 index 0000000000..1ae65a53d4 --- /dev/null +++ b/docs/cross-harness-lint.md @@ -0,0 +1,49 @@ +# Cross-Harness Lint + +`tests/lint-cross-harness.sh` checks `skills/**/*.md` for runtime-specific language that would weaken the cross-harness purity invariant introduced by PR #1486. It is a static Markdown lint only: it does not change skill content, rewrite files, or validate runtime behavior. + +## Banned Patterns + +- Bare harness names in generic prose: `Claude Code`, `Cursor`, `OpenCode`, `Codex CLI`, `Codex App`, `Gemini CLI`, `GitHub Copilot CLI`, and `Factory Droid` are violations outside runtime-specific sections. Example violation: `Use Claude Code for this step.` Allowed section example: `## In Claude Code`. +- Model identifiers: concrete model names such as `claude-opus-4-7`, `gpt-5.5`, `gemini-2.5-pro`, and `o4-mini` are always violations unless covered by a local allowlist. +- Runtime-specific tool names: names such as `ExitPlanMode`, `TodoWrite`, `WebFetch`, `Task tool`, `Skill tool`, and `mcp__server__tool` are violations outside runtime-specific sections or `references/-tools.md`. Allowed section example: `### For Cursor`. +- Hardcoded user paths: macOS personal home paths such as `/Users/jesse/` are always violations unless covered by a local allowlist. + +Lines are allowed under any of these rules. The lint stops at the first rule that applies; tokens are not re-checked after a line passes one rule. + +- **Two-or-more agents on the same line.** Any line that names two or more distinct harness families (Claude, Codex, Cursor, OpenCode, Gemini, Copilot, Factory Droid, Aider, Cline, Windsurf, Hermes, Hyperagent, Antigravity, Kiro, Qwen, Kimi) is intentional cross-runtime prose. Example that passes: `~/.claude/skills for Claude Code, ~/.agents/skills/ for Codex`. +- **A per-runtime section header.** A Markdown heading whose text matches `In `, `For `, or `:` opens a section that continues until the next heading of equal or lesser depth. Tokens inside the section do not flag. +- **An inline bold-prose runtime marker** at the start of a line: `**In :** ...`, `**For :** ...`, or `**:** ...`. The marker covers that line only. This is the pattern used in `skills/using-superpowers/SKILL.md`. +- **A `skills/*/references/-tools.md` file.** Bare-harness and runtime-tool checks are suppressed for these files. Model identifiers and hardcoded `/Users//` paths are still flagged regardless of file or section context. +- **The internal exception list** at `tests/lint-cross-harness.exceptions`. One `path:line[:reason]` per line, `#` for comment lines. Use this when a single-agent reference is intentional but the two-agents rule does not naturally apply (graphviz nodes, dispatch prompt headers, source-attributed excerpts). + +## Internal Exception List + +`tests/lint-cross-harness.exceptions` carries the lint's own list of allowed single-agent references so skill content stays free of inline annotations. Format: + +``` +# Comments start with hash +skills/path/to/file.md:LINE:reason + +skills/another/file.md:42:reason describing why this single-agent reference is intentional +``` + +The reason field is free-form text after the second colon and is for humans reading the file. The lint does not validate or enforce it. + +## Running Locally + +Run the lint from the repository root: + +```sh +bash tests/lint-cross-harness.sh +``` + +Exit code `0` means the lint passed, `1` means violations or invalid allowlist comments were found, and `2` means the lint could not run because of an internal or usage error. + +## Updating the Lint + +Bare harness names and runtime tool names are intentionally enumerated in the script. Adding a new harness or tool means adding it to those lists so future regressions are detected. Cross-runtime documentation is handled by the two-agents-on-a-line rule and the per-runtime section markers; the internal exception list at `tests/lint-cross-harness.exceptions` covers single-agent references that are intentional. + +## CI Integration + +`.github/workflows/lint.yml` runs the lint on every PR and push to `main`/`dev`. The lint exits non-zero on any violation; CI blocks merge. Skill content is not annotated; cross-runtime intent is recognised by the rules above and by the sidecar exception list. diff --git a/tests/lint-cross-harness.exceptions b/tests/lint-cross-harness.exceptions new file mode 100644 index 0000000000..bb8b8e6317 --- /dev/null +++ b/tests/lint-cross-harness.exceptions @@ -0,0 +1,74 @@ +# tests/lint-cross-harness.exceptions +# +# Internal exception list for tests/lint-cross-harness.sh. +# +# Format: path:line[:reason] +# - path is the file path relative to the repo root +# - line is the line number where the lint flags a violation +# - reason is optional free-form text (everything after the second colon) +# +# Add an entry here when a single-agent reference is intentional and the +# 2-agents-on-a-line rule does not naturally apply. Reason fields are for +# humans reading the file; the lint does not enforce them. + +# Frontmatter discovery description: Skill tool naming is load-bearing for +# Claude Code's skill discovery; other harnesses map via using-superpowers. +skills/using-superpowers/SKILL.md:3:frontmatter description references Skill tool by name for discovery + +# using-superpowers and subagent-driven-development workflow diagrams are +# graphviz nodes that depict the Claude Code subagent dispatch flow. The +# graphviz format does not host multi-runtime annotations naturally; other +# harnesses translate via references/-tools.md. +skills/using-superpowers/SKILL.md:55:graphviz node label in Claude Code workflow diagram +skills/using-superpowers/SKILL.md:58:graphviz node label in Claude Code workflow diagram +skills/using-superpowers/SKILL.md:68:graphviz node label in Claude Code workflow diagram +skills/using-superpowers/SKILL.md:70:graphviz node label in Claude Code workflow diagram +skills/using-superpowers/SKILL.md:72:graphviz node label in Claude Code workflow diagram +skills/using-superpowers/SKILL.md:74:graphviz node label in Claude Code workflow diagram +skills/subagent-driven-development/SKILL.md:60:graphviz node label in subagent flow diagram +skills/subagent-driven-development/SKILL.md:63:graphviz node label in subagent flow diagram +skills/subagent-driven-development/SKILL.md:68:graphviz node label in subagent flow diagram +skills/subagent-driven-development/SKILL.md:81:graphviz node label in subagent flow diagram +skills/subagent-driven-development/SKILL.md:82:graphviz node label in subagent flow diagram +skills/subagent-driven-development/SKILL.md:135:flowchart task-creation reference + +# Subagent dispatch prompt headers. The "Task tool (general-purpose):" line +# is the literal header agents render when dispatching a subagent on Claude +# Code. Non-CC platforms wire this through references/. +skills/brainstorming/spec-document-reviewer-prompt.md:10:subagent dispatch prompt header +skills/writing-plans/plan-document-reviewer-prompt.md:10:subagent dispatch prompt header +skills/requesting-code-review/code-reviewer.md:8:subagent dispatch prompt header +skills/requesting-code-review/SKILL.md:34:references the Task tool dispatch by name +skills/subagent-driven-development/spec-reviewer-prompt.md:8:subagent dispatch prompt header +skills/subagent-driven-development/implementer-prompt.md:6:subagent dispatch prompt header +skills/subagent-driven-development/code-quality-reviewer-prompt.md:10:subagent dispatch prompt header + +# executing-plans workflow uses TodoWrite as the canonical task ledger. +skills/executing-plans/SKILL.md:22:workflow step references TodoWrite as the canonical ledger + +# brainstorming visual companion ships a setup section per OS, scoped to +# Claude Code at this time. Other harnesses can land in adjacent sections +# when their visual companion equivalent ships. +skills/brainstorming/visual-companion.md:52:setup header for Claude Code on macOS/Linux +skills/brainstorming/visual-companion.md:58:setup header for Claude Code on Windows + +# dispatching-parallel-agents has a TypeScript example showing how Task() is +# dispatched from Claude Code's environment. The code comment is part of the +# example's framing. +skills/dispatching-parallel-agents/SKILL.md:69:TypeScript example block depicting Claude Code Task() dispatch + +# anthropic-best-practices.md is a quoted excerpt from Anthropic's +# documentation. The text is reproduced verbatim and is source-attributed. +skills/writing-skills/anthropic-best-practices.md:1143:source-attributed Anthropic documentation excerpt +skills/writing-skills/anthropic-best-practices.md:1144:source-attributed Anthropic documentation excerpt + +# persuasion-principles.md uses TodoWrite in example prose to illustrate +# persuasion patterns. The examples are intentional and translate naturally +# to other harnesses with equivalent todo mechanisms. +skills/writing-skills/persuasion-principles.md:36:example illustrating persuasion via TodoWrite +skills/writing-skills/persuasion-principles.md:83:positive-example illustrating TodoWrite usage +skills/writing-skills/persuasion-principles.md:84:negative-example illustrating TodoWrite usage + +# writing-skills creation checklist uses TodoWrite as the canonical todo +# mechanism for the skill-creation flow on Claude Code. +skills/writing-skills/SKILL.md:598:writing-skills checklist references TodoWrite as the ledger diff --git a/tests/lint-cross-harness.sh b/tests/lint-cross-harness.sh new file mode 100755 index 0000000000..8ab88f8974 --- /dev/null +++ b/tests/lint-cross-harness.sh @@ -0,0 +1,233 @@ +#!/bin/sh + +set -u + +if [ "$#" -ne 0 ]; then + echo "usage: tests/lint-cross-harness.sh" >&2 + exit 2 +fi + +if [ ! -d skills ]; then + echo "lint-cross-harness: missing skills/ directory" >&2 + exit 2 +fi + +if ! find skills -name '*.md' -print | grep -E -q .; then + echo "Lint complete: 0 violations." + exit 0 +fi + +EXCEPTIONS_FILE="$(dirname "$0")/lint-cross-harness.exceptions" + +find skills -name '*.md' -exec awk -v exceptions_file="$EXCEPTIONS_FILE" ' + BEGIN { + split("Claude Code\nCursor\nOpenCode\nCodex CLI\nCodex App\nGemini CLI\nGitHub Copilot CLI\nFactory Droid", harness, "\n") + split("ExitPlanMode\nTodoWrite\nWebFetch\nTask tool\nSkill tool", tool, "\n") + section_re = "claude code|cursor|opencode|codex|codex cli|codex app|gemini cli|github copilot cli|github copilot|copilot cli|copilot|factory droid" + # Load internal exceptions from sidecar file. Format: path:line[:reason] + # Comment lines start with #. Empty lines ignored. Loaded once at startup. + if (exceptions_file != "" && (getline _line < exceptions_file) > 0) { + do { + if (_line !~ /^[ \t]*#/ && _line !~ /^[ \t]*$/) { + _colon1 = index(_line, ":") + if (_colon1 > 0) { + _path = substr(_line, 1, _colon1 - 1) + _rest = substr(_line, _colon1 + 1) + _colon2 = index(_rest, ":") + _line_no = (_colon2 > 0) ? substr(_rest, 1, _colon2 - 1) + 0 : _rest + 0 + exception_set[_path ":" _line_no] = 1 + } + } + } while ((getline _line < exceptions_file) > 0) + close(exceptions_file) + } + } + + function trim(s) { sub(/^[ \t\r\n]+/, "", s); sub(/[ \t\r\n]+$/, "", s); return s } + function hit_file() { if (!file_hit[file]) { file_hit[file] = 1; hit_files++ } } + function report(class, line_no, token) { + if (allowed(token, line_no)) return + printf "VIOLATION [%s] %s:%d\n %s\n", class, file, line_no, trim(line[line_no]) + violations++; hit_file() + } + function allow_error(line_no) { + printf "ALLOWLIST ERROR %s:%d: invalid allowlist syntax or missing non-empty reason=\n", file, line_no + violations++; hit_file() + } + + function reset_file( i) { + for (i in line) delete line[i] + for (i in blank) delete blank[i] + for (i in runtime) delete runtime[i] + for (i in allow_line) delete allow_line[i] + for (i in allow_token) delete allow_token[i] + file = FILENAME + ref_tools = (file ~ /\/references\/[A-Za-z0-9_-]+-tools\.md$/) + allow_count = runtime_depth = line_count = 0 + } + + function finish_file( i) { + if (file == "") return + for (i = 1; i <= line_count; i++) scan_line(i) + } + + function heading_depth(s, h) { h = s; sub(/[ \t].*$/, "", h); return length(h) } + function heading_text(s, t) { t = s; sub(/^#+[ \t]*/, "", t); sub(/[ \t]*#+[ \t]*$/, "", t); return trim(t) } + function runtime_heading(t, lower) { + lower = tolower(t) + return lower ~ ("^(in|for)[ \t]+(" section_re ")([ \t:,-]|$)") || lower ~ ("^(" section_re "):") + } + + # Inline per-runtime marker: a line that begins with **In :**, + # **For :**, or **:** (bold prose). The whole line is + # treated as runtime-specific. Used heavily in using-superpowers SKILL.md. + function runtime_inline(text, lower) { + if (text !~ /^[ \t]*\*\*/) return 0 + lower = tolower(text) + return lower ~ ("^[ \t]*\\*\\*(in|for)[ \t]+(" section_re ")([ \t:,-]|\\*\\*)") || \ + lower ~ ("^[ \t]*\\*\\*(" section_re ")[ \t]*:\\*\\*") + } + + function read_allow(n, text, token) { + if (text !~ /^[ \t]*[ \t]*$/) { + allow_error(n); return + } + token = text + sub(/^.*allow[ \t]+"/, "", token) + sub(/"[ \t]+reason=.*$/, "", token) + allow_count++ + allow_line[allow_count] = n + allow_token[allow_count] = token + } + + function allow_end(i, n) { + for (n = allow_line[i] + 1; n <= line_count; n++) if (blank[n]) return n - 1 + return line_count + } + + function allowed(token, n, i) { + for (i = 1; i <= allow_count; i++) + if (allow_token[i] == token && n > allow_line[i] && n <= allow_end(i)) return 1 + return 0 + } + + function scan_re(class, n, regex, token_regex, text, start, len, raw, token) { + text = line[n] + while (match(text, regex)) { + start = RSTART + len = RLENGTH + raw = substr(text, start, len) + token = raw + if (match(raw, token_regex)) token = substr(raw, RSTART, RLENGTH) + report(class, n, token) + text = substr(text, start + len) + } + } + + function multi_agent_line(text, i, count, seen) { + # Allow lines that mention two or more distinct harness families. Casual + # cross-runtime references like "~/.claude/skills for Claude Code, + # ~/.agents/skills/ for Codex" are intentional and should not flag. + # Detection uses a broader family list than the strict bare-harness ban + # list so suffix-less mentions ("Codex", "Gemini") still count toward + # the 2-agents rule. + count = 0 + delete seen + # Detect by family. Each family is matched by a regex covering the + # canonical name and common suffix variants. Word boundaries via + # whitespace/punctuation keep "Codex" from matching "Codexual" etc. + if (text ~ /(^|[^A-Za-z])Claude([^A-Za-z]|$)/) { if (!("claude" in seen)) { seen["claude"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Codex([^A-Za-z]|$)/) { if (!("codex" in seen)) { seen["codex"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Cursor([^A-Za-z]|$)/) { if (!("cursor" in seen)) { seen["cursor"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])OpenCode([^A-Za-z]|$)/) { if (!("opencode" in seen)) { seen["opencode"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Gemini([^A-Za-z]|$)/) { if (!("gemini" in seen)) { seen["gemini"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Copilot([^A-Za-z]|$)/) { if (!("copilot" in seen)) { seen["copilot"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Factory Droid([^A-Za-z]|$)/) { if (!("droid" in seen)) { seen["droid"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Aider([^A-Za-z]|$)/) { if (!("aider" in seen)) { seen["aider"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Cline([^A-Za-z]|$)/) { if (!("cline" in seen)) { seen["cline"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Windsurf([^A-Za-z]|$)/) { if (!("windsurf" in seen)) { seen["windsurf"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Hermes([^A-Za-z]|$)/) { if (!("hermes" in seen)) { seen["hermes"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Hyperagent([^A-Za-z]|$)/) { if (!("hyper" in seen)) { seen["hyper"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Antigravity([^A-Za-z]|$)/) { if (!("antigrav" in seen)) { seen["antigrav"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Kiro([^A-Za-z]|$)/) { if (!("kiro" in seen)) { seen["kiro"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Qwen([^A-Za-z]|$)/) { if (!("qwen" in seen)) { seen["qwen"]=1; count++ } } + if (text ~ /(^|[^A-Za-z])Kimi([^A-Za-z]|$)/) { if (!("kimi" in seen)) { seen["kimi"]=1; count++ } } + return (count >= 2) + } + + function in_exceptions(file_path, line_no, normalized) { + # Strip any "./" prefix so the sidecar list matches what skills//:N + # would report. + normalized = file_path + sub(/^\.\//, "", normalized) + return ((normalized ":" line_no) in exception_set) + } + + function scan_line(n, i, in_runtime_context) { + if (line[n] ~ /^[ \t]*