diff --git a/.github/scripts/authorize-claude-workflow.sh b/.github/scripts/authorize-claude-workflow.sh new file mode 100644 index 0000000..5498182 --- /dev/null +++ b/.github/scripts/authorize-claude-workflow.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Decide whether the trigger (PR author, comment author, labeler) is +# authorized to spawn a Claude workflow on this repo. Driven by the +# `authorize` job in claude-review.yml / claude-mention.yml / +# claude-issue-to-pr.yml. +# +# Trust set: every `@HarperFast/` handle in this repo's +# `.github/CODEOWNERS`. Same set as the people we trust to review code, +# aligned by construction. Falls back to `@HarperFast/developers` if +# CODEOWNERS is missing, empty, unparseable, or contains no HarperFast +# handles. External-org handles in CODEOWNERS are deliberately ignored +# — only HarperFast members are admitted. +# +# Inputs: +# USERS_TO_CHECK — newline-separated logins; ALL must pass. +# Empty / whitespace-only entries are skipped. +# ADMIT_CLAUDE_BOT — "true" admits `claude[bot]` without a team +# check (used by claude-review for AI-authored +# PRs from the issue-to-PR pipeline). Anything +# else requires team membership for every user. +# DEFAULT_TOKEN — token for the CODEOWNERS read (typically +# $GITHUB_TOKEN; needs `contents: read`). +# ORG_TOKEN — token for `orgs/.../teams/.../memberships/...` +# (App-installation token with `Members: Read`, +# scoped to this `authorize` job only). +# GITHUB_REPOSITORY — owner/repo (auto-set by GitHub Actions). +# GITHUB_OUTPUT — output file path. +# +# Outputs (to $GITHUB_OUTPUT): +# authorized=true|false +set -uo pipefail + +# Resolve the trust set from CODEOWNERS. The default token reads the +# workflow repo's own .github/CODEOWNERS via the contents API. +# Anything missing / empty / unparseable / containing no HarperFast +# handles falls back to the default team. +CODEOWNERS=$(GH_TOKEN="$DEFAULT_TOKEN" gh api \ + "repos/${GITHUB_REPOSITORY}/contents/.github/CODEOWNERS" \ + --jq '.content' 2>/dev/null | base64 -d 2>/dev/null || true) +TEAMS=$(printf '%s' "$CODEOWNERS" | grep -oE '@HarperFast/[a-zA-Z0-9_-]+' | sort -u | sed 's|@HarperFast/||' || true) + +if [ -z "$TEAMS" ]; then + echo "::notice::No @HarperFast/ handles found in .github/CODEOWNERS (missing, empty, or only external orgs). Defaulting to developers." + TEAMS="developers" +fi + +# Fail closed if USERS_TO_CHECK is empty or whitespace-only. The +# main loop below skips empty entries with `[ -z "$user" ] && continue` +# and would otherwise fall through to `authorized=true` if there was +# nothing to check. An authorize job that forgot to set USERS_TO_CHECK +# (or a malicious change that removed it) must NOT silently admit +# every event — refuse here. +if [ -z "${USERS_TO_CHECK//[[:space:]]/}" ]; then + echo "::error::USERS_TO_CHECK is empty or whitespace-only — denying by default. The authorize job must explicitly pass at least one login (PR author, commenter, labeler, etc.)." + echo "authorized=false" >> "$GITHUB_OUTPUT" + exit 0 +fi + +echo "Trust set (HarperFast teams from CODEOWNERS):" +for t in $TEAMS; do echo " - @HarperFast/$t"; done + +# is_authorized +# Admits claude[bot] iff ADMIT_CLAUDE_BOT=true; otherwise tries each +# team in the trust set in order. Returns 0 on the first hit. +is_authorized() { + local user="$1" + + if [ "${ADMIT_CLAUDE_BOT:-false}" = "true" ] && [ "$user" = "claude[bot]" ]; then + echo " → admitted: claude[bot]" + return 0 + fi + + for team in $TEAMS; do + # /orgs/{org}/teams/{team_slug}/memberships/{username} + # returns 200 for active members, 404 otherwise. + if GH_TOKEN="$ORG_TOKEN" gh api "orgs/HarperFast/teams/${team}/memberships/${user}" --silent >/dev/null 2>&1; then + echo " → admitted via @HarperFast/${team} membership" + return 0 + fi + done + + echo " → not a member of any HarperFast team in the trust set" + return 1 +} + +while IFS= read -r raw_user; do + user="$(printf '%s' "$raw_user" | awk '{$1=$1;print}')" + [ -z "$user" ] && continue + echo "Checking: $user" + if ! is_authorized "$user"; then + echo "User '$user' not authorized. Skipping the gated job." + echo "authorized=false" >> "$GITHUB_OUTPUT" + exit 0 + fi +done <<< "${USERS_TO_CHECK:-}" + +echo "authorized=true" >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/compose-review-scope.sh b/.github/scripts/compose-review-scope.sh new file mode 100644 index 0000000..87c02ce --- /dev/null +++ b/.github/scripts/compose-review-scope.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Compose the layered review scope from individual layer files into a +# single markdown blob, and emit it as the `composed` output via +# $GITHUB_OUTPUT. Driven by claude-review.yml's "Compose review scope +# from layers" step. +# +# Inputs: +# LAYERS — newline-separated layer names (e.g. "universal\nharper/v5") +# GITHUB_OUTPUT — path to the GitHub Actions output file +# +# Layer files live at .ai-review-prompts/.md (the path the +# `Clone review prompts` step checks out into). Missing layers emit +# a workflow warning and continue; an empty composed result fails +# the step (no review scope = no review discipline). +set -euo pipefail + +OUT=/tmp/composed-scope.md +: > "$OUT" +while IFS= read -r raw_layer; do + # Trim whitespace around each layer name. + layer="$(printf '%s' "$raw_layer" | awk '{$1=$1;print}')" + [ -z "$layer" ] && continue + file=".ai-review-prompts/${layer}.md" + if [ ! -f "$file" ]; then + echo "::warning::Review layer '$layer' not found at $file; skipping." + continue + fi + { + cat "$file" + printf '\n\n' + } >> "$OUT" +done <<< "${LAYERS:-}" + +BYTES=$(wc -c < "$OUT") +echo "Composed ${BYTES} bytes from review layers" +if [ "$BYTES" -eq 0 ]; then + echo "::error::Composed review scope is empty — all layers missing or unreadable." + exit 1 +fi + +# Random heredoc delimiter — collision-proof against any content a +# future layer file might include. $GITHUB_OUTPUT uses heredoc +# syntax; a fixed marker could be forged (or coincidentally appear) +# in layer content and corrupt the output. +DELIM="EOF_$(openssl rand -hex 16)" +{ + echo "composed<<${DELIM}" + cat "$OUT" + echo "${DELIM}" +} >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/find-prior-review-comment.sh b/.github/scripts/find-prior-review-comment.sh new file mode 100644 index 0000000..1ace423 --- /dev/null +++ b/.github/scripts/find-prior-review-comment.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Find the prior `claude-review:v1`-marker'd top-level review comment +# on a PR (if any) and write its integer database ID to +# $GITHUB_OUTPUT under key `id`. Empty when no prior exists. +# +# Why marker-based lookup: `--edit-last` filters by authenticated +# identity (`claude[bot]`) only — so after a `@claude` mention, the +# most recent claude[bot] comment is the mention response, and +# `--edit-last` clobbers it. Every review comment starts with +# ``; mention responses never carry the +# marker, so this lookup targets only the review comment. +# +# Inputs: +# GH_TOKEN — token with `pull-requests: read` +# GITHUB_REPOSITORY — owner/repo (auto-set by GitHub Actions) +# PR_NUMBER — pull request number +# GITHUB_OUTPUT — output file path +set -uo pipefail + +EXISTING_ID=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + --jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith(""))] | last | .id // empty') + +if [ -n "$EXISTING_ID" ]; then + echo "Prior review comment: $EXISTING_ID" +else + echo "No prior review comment found — agent will post fresh." +fi +echo "id=${EXISTING_ID}" >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/log-review-to-ai-review-log.sh b/.github/scripts/log-review-to-ai-review-log.sh new file mode 100644 index 0000000..2aac0db --- /dev/null +++ b/.github/scripts/log-review-to-ai-review-log.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash +# Log this run's PR review to the central HarperFast/ai-review-log +# tracker — finds the per-PR issue by stable title prefix and +# appends a comment, or creates a new issue if none exists. Driven +# by claude-review.yml's "Log review to ai-review-log" step. +# +# Best-effort: never fails the job. A missing `AI_REVIEW_LOG_TOKEN` +# secret, an absent claude review comment, or a stale comment all +# exit cleanly with a notice/warning rather than failing. +# +# Inputs: +# GH_TOKEN — token with `pull-requests: read` +# AI_REVIEW_LOG_TOKEN — fine-grained PAT scoped to ai-review-log +# with `issues: write` (optional — missing +# skips logging with a warning) +# PR_NUMBER — pull request number +# PR_URL — html URL of the PR +# REVIEW_STATUS — outcome of the Claude review step +# (success / failure / cancelled / etc.) +# REPO_SHORT — short repo name (e.g. "harper") +# GITHUB_REPOSITORY — owner/repo of the PR's repo +# GITHUB_RUN_ID — current Actions run ID (for staleness +# guard) +# RUNNER_TEMP — runner temp dir (where the agent's +# optional run-notes file lives) +set -uo pipefail + +if [ -z "${AI_REVIEW_LOG_TOKEN:-}" ]; then + echo "::warning::AI_REVIEW_LOG_TOKEN secret not set; skipping log entry." + exit 0 +fi + +# When this workflow job started. Used to filter out stale Claude +# review comments from previous runs so a cancelled in-flight run +# (e.g. from a force-push) doesn't re-log a prior run's content as +# a fresh finding. +JOB_STARTED=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" --jq '.run_started_at // empty') + +# Fetch the marker'd review comment via raw API. We can't use +# `gh pr view --json comments` because (a) it doesn't expose +# `updated_at` (which we need below for the staleness guard now +# that comments are edited in place), and (b) we need the marker +# filter to ignore `@claude` mention responses that share the +# `claude[bot]` identity. +CLAUDE_JSON=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + --jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith(""))] | last // empty') + +if [ -z "$CLAUDE_JSON" ] || [ "$CLAUDE_JSON" = "null" ]; then + echo "No marker'd Claude review comment found on PR #$PR_NUMBER (review_status=$REVIEW_STATUS); skipping log." + exit 0 +fi + +CLAUDE_BODY=$(printf '%s' "$CLAUDE_JSON" | jq -r '.body // empty') +# Prefer updated_at (reflects the most recent edit) over created_at +# (frozen at original post time) — comments are now edited in place +# across runs. +CLAUDE_AT=$(printf '%s' "$CLAUDE_JSON" | jq -r '.updated_at // .created_at // empty') + +if [ -z "$CLAUDE_BODY" ]; then + echo "Claude review comment had empty body; skipping log." + exit 0 +fi + +# ISO-8601 lexicographic compare — both are UTC timestamps in the +# same shape, so string comparison is sound. +if [ -n "$JOB_STARTED" ] && [ -n "$CLAUDE_AT" ] && [ "$CLAUDE_AT" \< "$JOB_STARTED" ]; then + echo "::notice::Latest Claude review comment update ($CLAUDE_AT) predates this job's start ($JOB_STARTED); skipping to avoid re-logging stale content." + exit 0 +fi + +# Title: count findings (lines starting with `### `). The +# "no blockers" branch matches the sentinel phrase anywhere in the +# body — the concise prompt's `Reviewed; no blockers found.` doesn't +# start with "no blockers", so an anchored regex would miss it. +# Anywhere-match is safe because the phrase is a deliberate output +# from the prompt. +if printf '%s' "$CLAUDE_BODY" | grep -qi 'no blockers found'; then + COUNT_PART="no blockers" +else + FINDING_COUNT=$(printf '%s\n' "$CLAUDE_BODY" | grep -c '^### [0-9]' || true) + COUNT_PART="${FINDING_COUNT} finding(s) — triage pending" +fi + +if [ "$REVIEW_STATUS" = "success" ]; then + TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART" +else + TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART (review $REVIEW_STATUS — may be incomplete)" +fi + +BODY=$(printf '**Source:** %s\n**Repo:** %s\n**PR:** #%s\n**Model:** claude-sonnet-4-6\n**Phase:** baseline\n**Review job status:** %s\n**Date:** %s\n\n---\n\n%s\n' \ + "$PR_URL" "$REPO_SHORT" "$PR_NUMBER" "$REVIEW_STATUS" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$CLAUDE_BODY") + +# Structured run notes from the agent (optional). This is the +# channel that keeps verbose context off the PR — the agent writes +# to a fixed path under $RUNNER_TEMP, and we append here so the log +# issue gets the full picture while the PR comment stays concise. +# Absent file is fine; means the run had nothing structured to +# capture. +NOTES_FILE="${RUNNER_TEMP:-/tmp}/claude-review-notes.md" +if [ -f "$NOTES_FILE" ]; then + NOTES_CONTENT=$(cat "$NOTES_FILE") + BODY=$(printf '%s\n\n---\n\n%s\n' "$BODY" "$NOTES_CONTENT") + echo "Appended $(wc -c < "$NOTES_FILE") bytes of run notes from $NOTES_FILE" +else + echo "No run notes file at $NOTES_FILE — skipping notes append" +fi + +# One ai-review-log issue per PR. Stable prefix `[] PR #:` +# lets us look up an existing issue for this PR across runs even +# though the count/status portion past the colon changes per run. +# List API (not search) is used because search is eventually- +# consistent — a same-day second review run might fire before the +# first issue is indexed. +TITLE_PREFIX="[$REPO_SHORT] PR #$PR_NUMBER:" + +EXISTING_NUMBER=$(curl -sS \ + -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/HarperFast/ai-review-log/issues?labels=repo:$REPO_SHORT&state=all&per_page=100&sort=created&direction=desc" \ + | jq -r --arg prefix "$TITLE_PREFIX" \ + '[.[] | select(.title | startswith($prefix))] | first | .number // empty') + +if [ -n "$EXISTING_NUMBER" ] && [ "$EXISTING_NUMBER" != "null" ]; then + # Existing issue: append a comment, refresh the title to reflect + # this run's status. Title refresh is best-effort — we still + # report success on the comment alone. + COMMENT_PAYLOAD=$(jq -nc --arg body "$BODY" '{body: $body}') + HTTP_C=$(curl -sS -o /tmp/ai-log-comment-resp.json -w '%{http_code}' -X POST \ + -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER/comments" \ + -d "$COMMENT_PAYLOAD") + + PATCH_PAYLOAD=$(jq -nc --arg title "$TITLE" '{title: $title}') + HTTP_T=$(curl -sS -o /tmp/ai-log-patch-resp.json -w '%{http_code}' -X PATCH \ + -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER" \ + -d "$PATCH_PAYLOAD") + + if [ "$HTTP_C" -ge 200 ] && [ "$HTTP_C" -lt 300 ]; then + COMMENT_URL=$(jq -r '.html_url' /tmp/ai-log-comment-resp.json) + echo "Logged review as comment on existing issue: $COMMENT_URL" + else + echo "::warning::ai-review-log comment POST failed (HTTP $HTTP_C):" + cat /tmp/ai-log-comment-resp.json + fi + + if [ "$HTTP_T" -lt 200 ] || [ "$HTTP_T" -ge 300 ]; then + echo "::warning::ai-review-log title PATCH failed (HTTP $HTTP_T):" + cat /tmp/ai-log-patch-resp.json + fi +else + # No existing issue for this PR — create one. + CREATE_PAYLOAD=$(jq -nc \ + --arg title "$TITLE" \ + --arg repo_label "repo:$REPO_SHORT" \ + --arg body "$BODY" \ + '{title: $title, body: $body, labels: [$repo_label, "verdict:pending", "phase:baseline"]}') + + HTTP=$(curl -sS -o /tmp/ai-log-resp.json -w '%{http_code}' -X POST \ + -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/HarperFast/ai-review-log/issues \ + -d "$CREATE_PAYLOAD") + + if [ "$HTTP" -ge 200 ] && [ "$HTTP" -lt 300 ]; then + ISSUE_URL=$(jq -r '.html_url' /tmp/ai-log-resp.json) + echo "Logged review to new issue: $ISSUE_URL" + else + echo "::warning::ai-review-log POST failed (HTTP $HTTP):" + cat /tmp/ai-log-resp.json + fi +fi diff --git a/.github/scripts/parse-claude-mention.sh b/.github/scripts/parse-claude-mention.sh new file mode 100644 index 0000000..30149fa --- /dev/null +++ b/.github/scripts/parse-claude-mention.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Decide whether to proceed with an `@claude` mention and which model +# to use, based on the comment body. Driven by claude-mention.yml's +# "Parse mention" step. +# +# Rules (the precision gate; the job-level `if:` is a cheap +# pre-filter that only checks substring containment): +# 1. `@claude` must be the FIRST non-whitespace token (word- +# boundary after) — rules out `@claudette`, inline prose +# mentions ("saw @claude's fix"), and quoted replies +# (`> @claude ...`) where the reply is addressing a human. +# 2. Case-insensitive word-boundary `deep` anywhere in the body +# escalates to Opus. Sonnet is the default. +# +# Inputs: +# BODY — comment body (verbatim) +# GITHUB_OUTPUT — output file path +# +# Outputs (to $GITHUB_OUTPUT): +# proceed=true|false +# model=claude-opus-4-7|claude-sonnet-4-6 (only when proceed=true) +set -uo pipefail + +if ! printf '%s' "$BODY" | grep -Pqz '\A\s*@claude\b'; then + echo "Comment does not start with @claude; skipping." + echo "proceed=false" >> "$GITHUB_OUTPUT" + exit 0 +fi + +if printf '%s' "$BODY" | grep -Piq '\bdeep\b'; then + echo "model=claude-opus-4-7" >> "$GITHUB_OUTPUT" + echo "Selected claude-opus-4-7 (deep requested)" +else + echo "model=claude-sonnet-4-6" >> "$GITHUB_OUTPUT" + echo "Selected claude-sonnet-4-6 (default)" +fi +echo "proceed=true" >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/validate-auth-gate-invariants.sh b/.github/scripts/validate-auth-gate-invariants.sh new file mode 100644 index 0000000..b57e1a8 --- /dev/null +++ b/.github/scripts/validate-auth-gate-invariants.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# Validate that the AI workflow auth gate structure is preserved +# across all `claude-*.yml` workflows. STRUCTURAL lint, not a semantic +# test — catches the obvious attacks (delete the authorize job, drop +# the `needs:` dependency, broaden permissions, change the +# if-expression to a tautology). Subtle attacks (e.g., modifying the +# bash logic inside the auth check to admit everyone) are out of +# scope for this validator and are caught by CODEOWNERS review on +# `.github/` changes. +# +# Defense in depth: branch-protection on `main` should make this +# workflow's job a REQUIRED status check. +# +# Inputs (none — runs in the workflow checkout). Validates: +# .github/workflows/claude-*.yml +# +# Exit code: +# 0 all workflows pass +# 1 any check failed (errors emitted as ::error::) +set -uo pipefail + +fail() { + echo "::error::$1" + exit 1 +} + +# yq is pre-installed on ubuntu-latest runners. +command -v yq >/dev/null || fail "yq not available on runner" + +shopt -s nullglob +files=(.github/workflows/claude-*.yml) +if [ "${#files[@]}" -eq 0 ]; then + echo "No claude-*.yml workflows found; nothing to validate." + exit 0 +fi + +for f in "${files[@]}"; do + echo "" + echo "=== Validating $f ===" + + # 1. The authorize job exists. + yq -e '.jobs.authorize' "$f" >/dev/null \ + || fail "$f: missing 'authorize' job" + + # 2. authorize.outputs.authorized is wired to some step output. + output_expr=$(yq -r '.jobs.authorize.outputs.authorized // ""' "$f") + [ -n "$output_expr" ] \ + || fail "$f: authorize job has no outputs.authorized" + echo "$output_expr" | grep -q 'steps\..*\.outputs\.authorized' \ + || fail "$f: authorize.outputs.authorized must come from a step output (got: $output_expr)" + + # 3. authorize uses actions/create-github-app-token (pinned to a SHA). + app_token_step=$(yq -r '.jobs.authorize.steps[] | select(.uses != null) | .uses' "$f" | grep '^actions/create-github-app-token@' || true) + [ -n "$app_token_step" ] \ + || fail "$f: authorize doesn't use actions/create-github-app-token" + echo "$app_token_step" | grep -qE '@[0-9a-f]{40}( |$)' \ + || fail "$f: actions/create-github-app-token must be pinned to a 40-char SHA (got: $app_token_step)" + + # 4. authorize.permissions doesn't grant any write-level scope. + write_perms=$(yq -r '.jobs.authorize.permissions | (.[] // "") | select(. == "write")' "$f" 2>/dev/null || true) + [ -z "$write_perms" ] \ + || fail "$f: authorize.permissions grants 'write' on at least one scope — auth job must be read-only" + + # 5. Required secrets are referenced (the auth check can't work without them). + grep -q 'HARPERFAST_AI_CLIENT_ID' "$f" \ + || fail "$f: HARPERFAST_AI_CLIENT_ID secret not referenced" + grep -q 'HARPERFAST_AI_APP_PRIVATE_KEY' "$f" \ + || fail "$f: HARPERFAST_AI_APP_PRIVATE_KEY secret not referenced" + + # 6. The authorize job sets USERS_TO_CHECK on at least one of its + # steps. The auth script (`authorize-claude-workflow.sh`) fails + # closed if USERS_TO_CHECK is empty, but the workflow still + # shouldn't ship without it — make the omission a structural + # error rather than a silent runtime denial. Defense in depth + # against a PR that drops the env var thinking the script will + # "do the right thing". + # + # NOTE: yq on ubuntu-latest is mikefarah/yq (Go), not jq. It does + # NOT support jq's `empty` keyword, and an earlier version of this + # check using `// empty` lexer-erred silently (`2>/dev/null` ate it) + # and produced a false fail on workflows that DID set the env var. + # `select(. != null)` is the idiomatic yq filter for "skip steps + # without this env var"; `head -1` collapses the per-step stream to + # a single value (or empty). + users_to_check=$(yq -r '.jobs.authorize.steps[].env.USERS_TO_CHECK | select(. != null)' "$f" 2>/dev/null | head -1) + [ -n "$users_to_check" ] \ + || fail "$f: authorize job has no step setting USERS_TO_CHECK env var — the auth script needs at least one login to check (PR author, commenter, labeler, etc.)" + + # 7. Every non-authorize job has `needs: authorize` and a strict + # if-expression of exactly: needs.authorize.outputs.authorized == 'true' + # (whitespace normalized). Stricter than substring match — + # rules out tautologies like `... || true`. + other_jobs=$(yq -r '.jobs | keys | .[]' "$f" | grep -v '^authorize$' || true) + [ -n "$other_jobs" ] \ + || fail "$f: no non-authorize job found — workflow has nothing gated" + + for j in $other_jobs; do + needs=$(yq -r ".jobs.${j}.needs // \"\"" "$f") + [ "$needs" = "authorize" ] \ + || fail "$f: job '$j' must have 'needs: authorize' (got: $needs)" + + if_expr=$(yq -r ".jobs.${j}.if // \"\"" "$f") + # Normalize whitespace and quotes for the comparison. + normalized=$(echo "$if_expr" | tr -s ' ' | tr -d "\n") + expected="needs.authorize.outputs.authorized == 'true'" + [ "$normalized" = "$expected" ] \ + || fail "$f: job '$j' if: must be exactly \"$expected\" — no compound expressions, no tautologies (got: $if_expr)" + done + + echo " ✓ $f passed" +done + +echo "" +echo "All claude-*.yml workflows pass auth gate invariants." diff --git a/.github/workflows/auth-gate-invariants.yml b/.github/workflows/auth-gate-invariants.yml new file mode 100644 index 0000000..77d0b8b --- /dev/null +++ b/.github/workflows/auth-gate-invariants.yml @@ -0,0 +1,36 @@ +name: Auth gate invariants + +# Validates that the AI workflow auth gate structure is preserved. +# Runs on any PR that touches a `claude-*.yml` workflow file or this +# validator itself. +# +# This is a STRUCTURAL lint, not a semantic test. It catches the +# obvious attacks (delete the authorize job, drop the `needs:` +# dependency, broaden permissions, change the if-expression to a +# tautology). Subtle attacks (e.g., modifying the bash logic inside +# the auth check to admit everyone) are caught by CODEOWNERS review, +# which requires both @HarperFast/developers and @HarperFast/devops +# approval on `.github/` changes. +# +# Defense in depth — make this workflow a REQUIRED status check on +# `main` via branch protection so PRs can't merge without it passing. + +on: + pull_request: + paths: + - '.github/workflows/claude-*.yml' + - '.github/workflows/auth-gate-invariants.yml' + - '.github/scripts/validate-auth-gate-invariants.sh' + +jobs: + validate: + runs-on: ubuntu-latest + timeout-minutes: 2 + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + - name: Validate auth gate structure + run: bash .github/scripts/validate-auth-gate-invariants.sh diff --git a/.github/workflows/claude-issue-to-pr.yml b/.github/workflows/claude-issue-to-pr.yml index 383213b..5f4400c 100644 --- a/.github/workflows/claude-issue-to-pr.yml +++ b/.github/workflows/claude-issue-to-pr.yml @@ -20,18 +20,65 @@ concurrency: cancel-in-progress: false jobs: + authorize: + # Single source of truth for "is this label trigger allowed to + # spawn Claude on this repo?". The `work` job below has ONE `if:` + # that depends on this — no step-level guards, no individual user + # list. + # + # Cheap pre-filter at job level: explicit whitelist of allowed + # labels (NOT `startsWith('claude-fix:')`, which would match typoed + # variants like `claude-fix:typos`). + # + # We check the LABELER (`github.actor`), not the issue author. The + # labeler must already have at least triage permission to apply a + # label; a maintainer labeling an external-author issue is a + # legitimate way to invoke the agent on community reports. + # + # Trust set: the @HarperFast teams listed in this repo's + # `.github/CODEOWNERS`. Defaults to `@HarperFast/developers` if the + # file is missing, empty, or has no HarperFast handles. External- + # org handles are deliberately ignored. + # + # Required (organization-level) secrets: + # - HARPERFAST_AI_CLIENT_ID (the App's Client ID, like Iv23li…) + # - HARPERFAST_AI_APP_PRIVATE_KEY (.pem file contents) + if: contains(fromJSON('["claude-fix:typo","claude-fix:docs","claude-fix:deps","claude-fix:bug"]'), github.event.label.name) + runs-on: ubuntu-latest + timeout-minutes: 1 + permissions: + contents: read # for the .github/CODEOWNERS fetch + outputs: + authorized: ${{ steps.check.outputs.authorized }} + steps: + - name: Mint org-read token + id: app-token + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 + with: + client-id: ${{ secrets.HARPERFAST_AI_CLIENT_ID }} + private-key: ${{ secrets.HARPERFAST_AI_APP_PRIVATE_KEY }} + owner: HarperFast + + - name: Checkout (for CODEOWNERS read) + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + sparse-checkout: | + .github/CODEOWNERS + .github/scripts/authorize-claude-workflow.sh + sparse-checkout-cone-mode: false + + - name: Check labeler authorization + id: check + env: + DEFAULT_TOKEN: ${{ github.token }} + ORG_TOKEN: ${{ steps.app-token.outputs.token }} + ADMIT_CLAUDE_BOT: 'false' + USERS_TO_CHECK: ${{ github.actor }} + run: bash .github/scripts/authorize-claude-workflow.sh + work: - # Only trigger for `claude-fix:*` labels AND when the issue was - # opened by a HarperFast org member or collaborator. Labels added - # to issues opened by outside contributors are ignored to keep - # the trigger surface tight during calibration. - # Explicit whitelist of allowed labels — `startsWith('claude-fix:')` - # would match typoed variants (`claude-fix:typos`, `claude-fix:foo`) - # and the agent would waste turns trying to interpret them. - if: >- - contains(fromJSON('["claude-fix:typo","claude-fix:docs","claude-fix:deps","claude-fix:bug"]'), github.event.label.name) && - contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), - github.event.issue.author_association) + needs: authorize + if: needs.authorize.outputs.authorized == 'true' runs-on: ubuntu-latest timeout-minutes: 25 permissions: diff --git a/.github/workflows/claude-mention.yml b/.github/workflows/claude-mention.yml index f6a1719..24cf8ef 100644 --- a/.github/workflows/claude-mention.yml +++ b/.github/workflows/claude-mention.yml @@ -21,16 +21,61 @@ concurrency: cancel-in-progress: false jobs: + authorize: + # Single source of truth for "is this commenter allowed to trigger + # Claude on this repo?". The `work` job below has ONE `if:` that + # depends on this — no step-level guards, no individual user list. + # + # Cheap pre-filter at job level: comment must mention `@claude`. + # The first-non-whitespace-token precision check is in the work + # job's `Parse mention` step. + # + # Trust set: the @HarperFast teams listed in this repo's + # `.github/CODEOWNERS`. Defaults to `@HarperFast/developers` if the + # file is missing, empty, or has no HarperFast handles. External- + # org handles are deliberately ignored — we only admit HarperFast + # members. + # + # Required (organization-level) secrets: + # - HARPERFAST_AI_CLIENT_ID (the App's Client ID, like Iv23li…) + # - HARPERFAST_AI_APP_PRIVATE_KEY (.pem file contents) + if: contains(github.event.comment.body, '@claude') + runs-on: ubuntu-latest + timeout-minutes: 1 + permissions: + # contents: read for the .github/CODEOWNERS fetch via default token. + contents: read + outputs: + authorized: ${{ steps.check.outputs.authorized }} + steps: + - name: Mint org-read token + id: app-token + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 + with: + client-id: ${{ secrets.HARPERFAST_AI_CLIENT_ID }} + private-key: ${{ secrets.HARPERFAST_AI_APP_PRIVATE_KEY }} + owner: HarperFast + + - name: Checkout (for CODEOWNERS read) + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + sparse-checkout: | + .github/CODEOWNERS + .github/scripts/authorize-claude-workflow.sh + sparse-checkout-cone-mode: false + + - name: Check commenter authorization + id: check + env: + DEFAULT_TOKEN: ${{ github.token }} + ORG_TOKEN: ${{ steps.app-token.outputs.token }} + ADMIT_CLAUDE_BOT: 'false' + USERS_TO_CHECK: ${{ github.event.comment.user.login }} + run: bash .github/scripts/authorize-claude-workflow.sh + work: - # Belt-and-suspenders gate: - # 1. Comment must contain the trigger phrase. - # 2. Commenter must be HarperFast org OWNER / MEMBER or a repo - # COLLABORATOR (the action also performs its own write-access - # check on the actor as a fallback). - if: >- - contains(github.event.comment.body, '@claude') && - contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), - github.event.comment.author_association) + needs: authorize + if: needs.authorize.outputs.authorized == 'true' runs-on: ubuntu-latest timeout-minutes: 20 permissions: @@ -63,23 +108,7 @@ jobs: id: mention env: BODY: ${{ github.event.comment.body }} - run: | - set -uo pipefail - - if ! printf '%s' "$BODY" | grep -Pqz '\A\s*@claude\b'; then - echo "Comment does not start with @claude; skipping." - echo "proceed=false" >> "$GITHUB_OUTPUT" - exit 0 - fi - - if printf '%s' "$BODY" | grep -Piq '\bdeep\b'; then - echo "model=claude-opus-4-7" >> "$GITHUB_OUTPUT" - echo "Selected claude-opus-4-7 (deep requested)" - else - echo "model=claude-sonnet-4-6" >> "$GITHUB_OUTPUT" - echo "Selected claude-sonnet-4-6 (default)" - fi - echo "proceed=true" >> "$GITHUB_OUTPUT" + run: bash .github/scripts/parse-claude-mention.sh - name: Clone shared Harper skills # Pinned to a SHA (not `main`) so agent behavior is reproducible diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml index ff1fd5a..abe78b3 100644 --- a/.github/workflows/claude-review.yml +++ b/.github/workflows/claude-review.yml @@ -9,32 +9,92 @@ concurrency: cancel-in-progress: true jobs: + authorize: + # Single source of truth for "is this PR allowed to trigger Claude + # on this repo?". The `review` job below has ONE `if:` that depends + # on this — no step-level guards, no individual user list to + # maintain. + # + # Trust set: the @HarperFast teams listed in this repo's + # `.github/CODEOWNERS`. Same set as the people we trust to review + # code; alignment by construction. If CODEOWNERS is missing, + # empty, or has no `@HarperFast/` handles, we fall back to + # `@HarperFast/developers` as the default. External-org handles in + # CODEOWNERS (e.g. `@SomeOtherOrg/x`) are deliberately ignored — + # we only admit HarperFast members. `claude[bot]` is admitted + # explicitly so AI-authored PRs from the issue-to-PR pipeline get + # reviewed. + # + # We check TWO identities; both must be authorized: + # * `pull_request.user.login` — the PR's original author. + # * `github.actor` — whoever triggered THIS specific event (the + # pusher on `synchronize`, etc.). A non-trusted user pushing + # to a trusted user's PR branch changes the actor without + # changing the PR author; we want to refuse those events. + # + # Team-membership reads need an `Organization: Members: Read` + # token, which the default `GITHUB_TOKEN` lacks. We mint an + # installation token from an org-wide HarperFast GitHub App + # (Members: Read scope only) and use it ONLY in this job — the + # work job uses the default `GITHUB_TOKEN`, so the org-read + # capability never reaches the agent step. + # + # Required (organization-level) secrets: + # - HARPERFAST_AI_CLIENT_ID (the App's Client ID, like Iv23li…) + # - HARPERFAST_AI_APP_PRIVATE_KEY (.pem file contents) + runs-on: ubuntu-latest + timeout-minutes: 1 + permissions: + # `contents: read` so we can fetch this repo's .github/CODEOWNERS + # via the default token. Team-membership reads use the App token + # below, which is contained to this job. + contents: read + outputs: + authorized: ${{ steps.check.outputs.authorized }} + steps: + - name: Mint org-read token + id: app-token + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 + with: + client-id: ${{ secrets.HARPERFAST_AI_CLIENT_ID }} + private-key: ${{ secrets.HARPERFAST_AI_APP_PRIVATE_KEY }} + owner: HarperFast + + - name: Checkout (for CODEOWNERS read) + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + sparse-checkout: | + .github/CODEOWNERS + .github/scripts/authorize-claude-workflow.sh + sparse-checkout-cone-mode: false + + - name: Check authorization (PR author + event actor) + id: check + env: + DEFAULT_TOKEN: ${{ github.token }} + ORG_TOKEN: ${{ steps.app-token.outputs.token }} + ADMIT_CLAUDE_BOT: 'true' + USERS_TO_CHECK: | + ${{ github.event.pull_request.user.login }} + ${{ github.actor }} + run: bash .github/scripts/authorize-claude-workflow.sh + review: - # Review PRs authored by HarperFast org members / collaborators AND - # PRs opened by our own issue-to-PR bot (claude[bot]) — AI-authored PRs - # need review MOST, not least. External human PRs are not auto-reviewed; - # a maintainer can opt one in via an `@claude` mention (handled by a - # separate workflow). - if: >- - contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), - github.event.pull_request.author_association) - || github.event.pull_request.user.login == 'claude[bot]' + needs: authorize + if: needs.authorize.outputs.authorized == 'true' runs-on: ubuntu-latest - # Bumped from 10 to 15. We've observed the Claude review step stall on a - # single long-running API call inside the 10-min window on substantial - # diffs, producing a "cancelled" status with partial output. 15 gives - # headroom without letting a runaway loop burn forever (max-turns=24 is - # the real cost ceiling). + # 15 gives headroom for substantial diffs without letting a runaway loop + # burn forever (claude-code-action's --max-turns is the real cost ceiling). timeout-minutes: 15 permissions: contents: read pull-requests: write id-token: write # required by claude-code-action even with API-key auth env: - # Layered review scope — sourced from HarperFast/ai-review-prompts - # (public). Order matters: most-general first, most-specific last. - # Composed into a single prompt block by the "Compose review scope - # from layers" step. + # Layered review scope — sourced from HarperFast/ai-review-prompts. + # Order matters: most-general first, most-specific last. Composed + # into a single prompt block by the "Compose review scope from + # layers" step. REVIEW_LAYERS: | universal harper/common @@ -57,7 +117,7 @@ jobs: - name: Clone shared Harper skills # Pinned to a specific SHA (not `main`) so review behavior is # reproducible across runs — updates to the skills repo require - # an explicit pin bump in this workflow. + # an explicit pin bump here. uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 with: repository: HarperFast/skills @@ -77,68 +137,36 @@ jobs: id: scope env: LAYERS: ${{ env.REVIEW_LAYERS }} - run: | - set -euo pipefail - OUT=/tmp/composed-scope.md - : > "$OUT" - while IFS= read -r raw_layer; do - # Trim whitespace around each layer name - layer="$(printf '%s' "$raw_layer" | awk '{$1=$1;print}')" - [ -z "$layer" ] && continue - file=".ai-review-prompts/${layer}.md" - if [ ! -f "$file" ]; then - echo "::warning::Review layer '$layer' not found at $file; skipping." - continue - fi - { - cat "$file" - printf '\n\n' - } >> "$OUT" - done <<< "$LAYERS" - - BYTES=$(wc -c < "$OUT") - echo "Composed ${BYTES} bytes from review layers" - if [ "$BYTES" -eq 0 ]; then - echo "::error::Composed review scope is empty — all layers missing or unreadable." - exit 1 - fi - - # Random heredoc delimiter — collision-proof against any content - # a future layer file might include. $GITHUB_OUTPUT uses heredoc - # syntax; a fixed marker could be forged (or coincidentally - # appear) in layer content and corrupt the output. - DELIM="EOF_$(openssl rand -hex 16)" - { - echo "composed<<${DELIM}" - cat "$OUT" - echo "${DELIM}" - } >> "$GITHUB_OUTPUT" + run: bash .github/scripts/compose-review-scope.sh + + - name: Find prior review comment + id: prior_review + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: bash .github/scripts/find-prior-review-comment.sh - name: Claude review id: claude-review uses: anthropics/claude-code-action@c3d45e8e941e1b2ad7b278c57482d9c5bf1f35b3 # v1.0.99 with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - # Allow our issue-to-PR bot's PRs to be reviewed. The job-level - # `if:` gate above lets the workflow start, but claude-code-action - # has its own bot-actor check that refuses with "Workflow initiated - # by non-human actor" unless the bot is on this allowlist. We - # specifically allow `claude` (slug match — no `[bot]` suffix here) - # rather than `*`, so any OTHER bot that somehow triggers this - # workflow is still refused. + # Admit the issue-to-PR bot's PRs. Job-level `if:` gate above lets + # the workflow start; claude-code-action has its own bot-actor gate + # that refuses unless the bot is on this allowlist. allowed_bots: claude - show_full_output: true # TEMP: keep on during calibration so tool denials are visible; revert once reviews run cleanly + show_full_output: true # TEMP: keep on during calibration so tool denials are visible claude_args: | --model claude-sonnet-4-6 - --max-turns 24 - # This workflow is READ-ONLY by design — the agent reviews - # and comments, it does not modify the repo. Git subcommands - # are scoped individually to strictly read-only operations. - # (The claude-mention and claude-issue-to-pr workflows DO - # grant broader git access because they are authoring - # workflows. Their guarantee against destructive git ops - # comes from branch protection on main / release_* / v*.x, - # not from this allowlist.) + --max-turns 48 + # This workflow is READ-ONLY by design — the agent reviews and + # comments, it does not modify the repo. Git subcommands are + # scoped individually to strictly read-only operations. + # (The claude-mention and claude-issue-to-pr workflows DO grant + # broader git access because they authoring workflows. Their + # guarantee against destructive git ops comes from branch + # protection on main / release_* / v*.x, not from this + # allowlist.) # `Write` is granted but the prompt restricts it to a # single path (`$RUNNER_TEMP/claude-review-notes.md`) — the # channel for structured run notes that flow into the @@ -147,10 +175,26 @@ jobs: # path-bound is enforced via prompt; deviations are # contained because the runner is ephemeral and has no # write credentials beyond `gh pr comment`. - --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Read,Grep,Glob,Write,Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(git show:*)" + # `Bash(gh api:*)` is needed for editing a prior review + # comment by integer database ID (the only way to target a + # specific comment instead of "the most recent claude[bot] + # comment", which `--edit-last` defaults to and which + # collides with `@claude` mention responses). The job's + # token (`pull-requests: write` / `contents: read` / + # `id-token: write`) is repo-scoped, not PR-scoped — a + # successful prompt injection could PATCH any top-level + # PR/issue comment in this repo, read repo contents, or + # fetch an OIDC token. The actual containment is the + # prompt's injection guard (the `Note:` paragraph treating + # PR-checked-out `CLAUDE.md` / `AGENTS.md` as untrusted + # for review-discipline overrides, plus the explicit tool + # discipline in `## Tools`), the runner's ephemerality, + # and branch-protection rules on protected refs. + --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Bash(gh api:*),Read,Grep,Glob,Write,Bash(git diff:*),Bash(git log:*),Bash(git blame:*),Bash(git show:*)" prompt: | REPO: ${{ github.repository }} PR NUMBER: ${{ github.event.pull_request.number }} + PRIOR_REVIEW_COMMENT_ID: ${{ steps.prior_review.outputs.id }} The PR branch is already checked out in the current working directory. @@ -171,24 +215,43 @@ jobs: ## Scope to what changed Before reading widely, start by identifying the files the PR - actually touched (`git diff --name-only origin/main...HEAD`) - and focus your review there. Only expand scope when a - specific finding demands it. Grepping across unrelated - directories on a large repo burns turns without producing - signal. + actually touched (`git diff --name-only ...HEAD`) and + focus your review there. Only expand scope when a specific + finding demands it — e.g. a public API consumer you want to + verify, or a test file relevant to the changed code. Grepping + across unrelated directories on a repo this size burns turns + without producing signal. ## Tools - For file inspection use the `Read`, `Grep`, and `Glob` tools. + **Turn budget:** you have 48 turns total per review. Plan + accordingly: 1–2 turns to read agent context files, 1 turn + to identify changed files, ~1 turn per changed file via + `Read`, the rest for targeted searches and posting + findings. Most reviews finish well under that. Running + out of turns means no review gets posted at all — the + log step skips when no marker'd review comment exists. + + **The single biggest turn-burner is repo-wide search.** + Use the dedicated `Grep` tool with a tight `path` parameter + scoping to the changed files or the specific subdirectory + you need. Do NOT do `grep -rn …` (or `Bash(grep …)`) over + the whole repo — that pattern returned ~20 hits per call + in a recent harper run that timed out before posting + anything. For file inspection use `Read`, `Grep`, and + `Glob` tools. + Do NOT use `cat`, `head`, `tail`, `grep`, `ls`, or `find` - via Bash — those commands are not allowed and waste turns. + via Bash. The Bash allowlist below excludes them on + purpose, and the equivalent dedicated tools are faster. Do NOT run `bun test`, `npm test`, or any other script - that executes PR code — the PR's tests are already checked - separately. + that executes PR code — the PR's tests are already + checked separately. The allowed Bash commands are: - - `git diff ...HEAD` — the PR diff, local (no API - round-trip). `` is typically `origin/main`. + - `git diff ...HEAD` — the PR diff, same bytes as + `gh pr diff` but local, no API round-trip. `` is + typically `origin/main`. - `git log`, `git show` — history context. Use these to understand WHY a line is the way it is before flagging it. "This load-bearing check was added 3 years ago in @@ -202,10 +265,11 @@ jobs: blockers). - `gh pr view` — PR metadata (title, body, author, labels). Already run at start; re-invoke if needed. - - `gh pr comment` — post the top-level summary comment. + - `gh pr comment` — post the final review comment. Git subcommands are scoped individually on purpose — no - write operations are permitted. + write operations are permitted. Trying to call anything + not listed here will be denied. You MAY use the `Write` tool, but ONLY to write to `${{ runner.temp }}/claude-review-notes.md`. No other @@ -262,19 +326,47 @@ jobs: fresh on each run; GitHub auto-marks superseded ones "outdated". - **Top-level PR comment** — a concise summary that - anchors the inline findings. Cap at ~5 lines. Edit - in place across runs so the PR never accumulates stacked - review comments: - `gh pr comment --edit-last --create-if-none --body ""`. - Use `--body-file` if the body is large enough to hit - shell argument limits. Don't wrap prior reviews in - `
` here — the related ai-review-log issue + anchors the inline findings. Cap at ~5 lines. **There + is at most one review comment per PR; edit the same + comment in place across runs.** Don't wrap prior reviews + in `
` here — the related ai-review-log issue keeps history across runs. + **Posting mechanism (read carefully).** `--edit-last` + is NOT safe — it would clobber a `@claude` mention + response that happens to be the most recent + `claude[bot]` comment. Instead, use the marker-based + flow: + + 1. **Every review comment body MUST begin with the + literal first line ``, + followed by a newline, followed by your review + content.** This sentinel is what distinguishes the + review comment from mention responses (which lack + it). Future runs find and edit this comment via the + marker. + + 2. **If `PRIOR_REVIEW_COMMENT_ID` (set above) is + non-empty**, edit that exact comment by integer + database ID: + ``` + gh api -X PATCH "repos/${{ github.repository }}/issues/comments/$PRIOR_REVIEW_COMMENT_ID" -f body="$BODY" + ``` + (where `$BODY` starts with the marker line). + + 3. **If empty**, post fresh: + ``` + gh pr comment ${{ github.event.pull_request.number }} --body "$BODY" + ``` + (where `$BODY` starts with the marker line). + + Use `--body-file` (or `gh api … -F body=@`) for + large bodies that hit shell argument limits. + **For "no blockers" runs, the PR comment is one sentence.** This overrides the universal scope's - `summary is welcome during calibration` allowance — this - workflow has a log surface (the next step threads each + `summary is welcome during calibration` allowance — Harper + has a log surface (the next workflow step threads each run's PR comment into a per-PR `HarperFast/ai-review-log` issue), so the verbose tracing belongs there, not on the PR. Concrete shape: @@ -350,7 +442,9 @@ jobs: the log step is a no-op when the file is absent. - name: Log review to ai-review-log - # Best-effort — never fail the job if logging fails + # Best-effort — never fail the job if logging fails. Feeds the + # central HarperFast/ai-review-log issue tracker that aggregates + # findings across repos for calibration / weekly sweep. if: always() env: GH_TOKEN: ${{ github.token }} @@ -358,156 +452,5 @@ jobs: PR_NUMBER: ${{ github.event.pull_request.number }} PR_URL: ${{ github.event.pull_request.html_url }} REVIEW_STATUS: ${{ steps.claude-review.outcome }} - # Short repo name for log title/label (e.g. "oauth" in - # "HarperFast/oauth"). Interpolated so this workflow can be - # copied to other HarperFast repos without source edits. REPO_SHORT: ${{ github.event.repository.name }} - run: | - set -uo pipefail - - if [ -z "${AI_REVIEW_LOG_TOKEN:-}" ]; then - echo "::warning::AI_REVIEW_LOG_TOKEN secret not set; skipping log entry." - exit 0 - fi - - # When this workflow job started. Used to filter out stale Claude - # comments from previous runs so a cancelled in-flight run (e.g. - # from a force-push) doesn't re-log the prior run's comment as a - # fresh finding. - JOB_STARTED=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" --jq '.run_started_at // empty') - - # Fetch Claude's latest comment and its createdAt timestamp. - CLAUDE_JSON=$(gh pr view "$PR_NUMBER" --json comments \ - --jq '[.comments[] | select(.author.login == "claude")] | last // empty') - - if [ -z "$CLAUDE_JSON" ] || [ "$CLAUDE_JSON" = "null" ]; then - echo "No Claude comment found on PR #$PR_NUMBER (review_status=$REVIEW_STATUS); skipping log." - exit 0 - fi - - CLAUDE_BODY=$(printf '%s' "$CLAUDE_JSON" | jq -r '.body // empty') - CLAUDE_AT=$(printf '%s' "$CLAUDE_JSON" | jq -r '.createdAt // empty') - - if [ -z "$CLAUDE_BODY" ]; then - echo "Claude comment had empty body; skipping log." - exit 0 - fi - - # ISO-8601 lexicographic compare — both are UTC timestamps in the - # same shape, so string comparison is sound. - if [ -n "$JOB_STARTED" ] && [ -n "$CLAUDE_AT" ] && [ "$CLAUDE_AT" \< "$JOB_STARTED" ]; then - echo "::notice::Latest Claude comment ($CLAUDE_AT) predates this job's start ($JOB_STARTED); skipping to avoid re-logging a stale comment." - exit 0 - fi - - # Title: count findings (lines starting with `### `). "No blockers" case has none. - # Match the phrase anywhere in the body — the new concise prompt asks for - # `Reviewed; no blockers found.`, which doesn't start with "no blockers" - # and so wouldn't match an anchored regex. Anywhere-match is safe because - # `no blockers found` is a deliberate sentinel phrase from the prompt. - if printf '%s' "$CLAUDE_BODY" | grep -qi 'no blockers found'; then - COUNT_PART="no blockers" - else - FINDING_COUNT=$(printf '%s\n' "$CLAUDE_BODY" | grep -c '^### [0-9]' || true) - COUNT_PART="${FINDING_COUNT} finding(s) — triage pending" - fi - - # Decorate the title with the review job status when the review - # didn't complete cleanly (e.g., max_turns). Findings posted before - # the cutoff are still logged but flagged as potentially incomplete. - if [ "$REVIEW_STATUS" = "success" ]; then - TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART" - else - TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART (review $REVIEW_STATUS — may be incomplete)" - fi - - BODY=$(printf '**Source:** %s\n**Repo:** %s\n**PR:** #%s\n**Model:** claude-sonnet-4-6\n**Phase:** baseline\n**Review job status:** %s\n**Date:** %s\n\n---\n\n%s\n' \ - "$PR_URL" "$REPO_SHORT" "$PR_NUMBER" "$REVIEW_STATUS" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$CLAUDE_BODY") - - # Structured run notes from the agent (optional). This is - # the channel that keeps verbose context off the PR — the - # agent writes to a fixed path under $RUNNER_TEMP, and we - # append here so the log issue gets the full picture while - # the PR comment stays concise. Absent file is fine; means - # the run had nothing structured to capture. - NOTES_FILE="${RUNNER_TEMP:-/tmp}/claude-review-notes.md" - if [ -f "$NOTES_FILE" ]; then - NOTES_CONTENT=$(cat "$NOTES_FILE") - BODY=$(printf '%s\n\n---\n\n%s\n' "$BODY" "$NOTES_CONTENT") - echo "Appended $(wc -c < "$NOTES_FILE") bytes of run notes from $NOTES_FILE" - else - echo "No run notes file at $NOTES_FILE — skipping notes append" - fi - - # One ai-review-log issue per PR. Stable prefix - # `[] PR #:` lets us look up an existing issue for - # this PR across runs even though the count/status portion - # past the colon changes per run. List API (not search) is - # used because search is eventually-consistent — a same-day - # second review run might fire before the first issue is - # indexed. - TITLE_PREFIX="[$REPO_SHORT] PR #$PR_NUMBER:" - - EXISTING_NUMBER=$(curl -sS \ - -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/HarperFast/ai-review-log/issues?labels=repo:$REPO_SHORT&state=all&per_page=100&sort=created&direction=desc" \ - | jq -r --arg prefix "$TITLE_PREFIX" \ - '[.[] | select(.title | startswith($prefix))] | first | .number // empty') - - if [ -n "$EXISTING_NUMBER" ] && [ "$EXISTING_NUMBER" != "null" ]; then - # Existing issue: append a comment, refresh the title to - # reflect this run's status. Title refresh is best-effort — - # we still report success on the comment alone. - COMMENT_PAYLOAD=$(jq -nc --arg body "$BODY" '{body: $body}') - HTTP_C=$(curl -sS -o /tmp/ai-log-comment-resp.json -w '%{http_code}' -X POST \ - -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER/comments" \ - -d "$COMMENT_PAYLOAD") - - PATCH_PAYLOAD=$(jq -nc --arg title "$TITLE" '{title: $title}') - HTTP_T=$(curl -sS -o /tmp/ai-log-patch-resp.json -w '%{http_code}' -X PATCH \ - -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER" \ - -d "$PATCH_PAYLOAD") - - if [ "$HTTP_C" -ge 200 ] && [ "$HTTP_C" -lt 300 ]; then - COMMENT_URL=$(jq -r '.html_url' /tmp/ai-log-comment-resp.json) - echo "Logged review as comment on existing issue: $COMMENT_URL" - else - echo "::warning::ai-review-log comment POST failed (HTTP $HTTP_C):" - cat /tmp/ai-log-comment-resp.json - fi - - if [ "$HTTP_T" -lt 200 ] || [ "$HTTP_T" -ge 300 ]; then - echo "::warning::ai-review-log title PATCH failed (HTTP $HTTP_T):" - cat /tmp/ai-log-patch-resp.json - fi - else - # No existing issue for this PR — create one. - CREATE_PAYLOAD=$(jq -nc \ - --arg title "$TITLE" \ - --arg repo_label "repo:$REPO_SHORT" \ - --arg body "$BODY" \ - '{title: $title, body: $body, labels: [$repo_label, "verdict:pending", "phase:baseline"]}') - - HTTP=$(curl -sS -o /tmp/ai-log-resp.json -w '%{http_code}' -X POST \ - -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/HarperFast/ai-review-log/issues \ - -d "$CREATE_PAYLOAD") - - if [ "$HTTP" -ge 200 ] && [ "$HTTP" -lt 300 ]; then - ISSUE_URL=$(jq -r '.html_url' /tmp/ai-log-resp.json) - echo "Logged review to new issue: $ISSUE_URL" - else - echo "::warning::ai-review-log POST failed (HTTP $HTTP):" - cat /tmp/ai-log-resp.json - fi - fi + run: bash .github/scripts/log-review-to-ai-review-log.sh