HarperFast · heskew · May 2, 2026 · May 1, 2026 · May 1, 2026 · May 2, 2026
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# Decide whether the trigger (PR author, comment author, labeler) is
+# authorized to spawn a Claude workflow on this repo. Driven by the
+# `authorize` job in claude-review.yml / claude-mention.yml /
+# claude-issue-to-pr.yml.
+#
+# Trust set: every `@HarperFast/<team>` handle in this repo's
+# `.github/CODEOWNERS`. Same set as the people we trust to review code,
+# aligned by construction. Falls back to `@HarperFast/developers` if
+# CODEOWNERS is missing, empty, unparseable, or contains no HarperFast
+# handles. External-org handles in CODEOWNERS are deliberately ignored
+# — only HarperFast members are admitted.
+#
+# Inputs:
+#   USERS_TO_CHECK     — newline-separated logins; ALL must pass.
+#                        Empty / whitespace-only entries are skipped.
+#   ADMIT_CLAUDE_BOT   — "true" admits `claude[bot]` without a team
+#                        check (used by claude-review for AI-authored
+#                        PRs from the issue-to-PR pipeline). Anything
+#                        else requires team membership for every user.
+#   DEFAULT_TOKEN      — token for the CODEOWNERS read (typically
+#                        $GITHUB_TOKEN; needs `contents: read`).
+#   ORG_TOKEN          — token for `orgs/.../teams/.../memberships/...`
+#                        (App-installation token with `Members: Read`,
+#                        scoped to this `authorize` job only).
+#   GITHUB_REPOSITORY  — owner/repo (auto-set by GitHub Actions).
+#   GITHUB_OUTPUT      — output file path.
+#
+# Outputs (to $GITHUB_OUTPUT):
+#   authorized=true|false
+set -uo pipefail
+
+# Resolve the trust set from CODEOWNERS. The default token reads the
+# workflow repo's own .github/CODEOWNERS via the contents API.
+# Anything missing / empty / unparseable / containing no HarperFast
+# handles falls back to the default team.
+CODEOWNERS=$(GH_TOKEN="$DEFAULT_TOKEN" gh api \
+  "repos/${GITHUB_REPOSITORY}/contents/.github/CODEOWNERS" \
+  --jq '.content' 2>/dev/null | base64 -d 2>/dev/null || true)
+TEAMS=$(printf '%s' "$CODEOWNERS" | grep -oE '@HarperFast/[a-zA-Z0-9_-]+' | sort -u | sed 's|@HarperFast/||' || true)
+
+if [ -z "$TEAMS" ]; then
+  echo "::notice::No @HarperFast/<team> handles found in .github/CODEOWNERS (missing, empty, or only external orgs). Defaulting to developers."
+  TEAMS="developers"
+fi
+
+# Fail closed if USERS_TO_CHECK is empty or whitespace-only. The
+# main loop below skips empty entries with `[ -z "$user" ] && continue`
+# and would otherwise fall through to `authorized=true` if there was
+# nothing to check. An authorize job that forgot to set USERS_TO_CHECK
+# (or a malicious change that removed it) must NOT silently admit
+# every event — refuse here.
+if [ -z "${USERS_TO_CHECK//[[:space:]]/}" ]; then
+  echo "::error::USERS_TO_CHECK is empty or whitespace-only — denying by default. The authorize job must explicitly pass at least one login (PR author, commenter, labeler, etc.)."
+  echo "authorized=false" >> "$GITHUB_OUTPUT"
+  exit 0
+fi
+
+echo "Trust set (HarperFast teams from CODEOWNERS):"
+for t in $TEAMS; do echo "  - @HarperFast/$t"; done
+
+# is_authorized <login>
+# Admits claude[bot] iff ADMIT_CLAUDE_BOT=true; otherwise tries each
+# team in the trust set in order. Returns 0 on the first hit.
+is_authorized() {
+  local user="$1"
+
+  if [ "${ADMIT_CLAUDE_BOT:-false}" = "true" ] && [ "$user" = "claude[bot]" ]; then
+    echo "  → admitted: claude[bot]"
+    return 0
+  fi
+
+  for team in $TEAMS; do
+    # /orgs/{org}/teams/{team_slug}/memberships/{username}
+    # returns 200 for active members, 404 otherwise.
+    if GH_TOKEN="$ORG_TOKEN" gh api "orgs/HarperFast/teams/${team}/memberships/${user}" --silent >/dev/null 2>&1; then
+      echo "  → admitted via @HarperFast/${team} membership"
+      return 0
+    fi
+  done
+
+  echo "  → not a member of any HarperFast team in the trust set"
+  return 1
+}
+
+while IFS= read -r raw_user; do
+  user="$(printf '%s' "$raw_user" | awk '{$1=$1;print}')"
+  [ -z "$user" ] && continue
+  echo "Checking: $user"
+  if ! is_authorized "$user"; then
+    echo "User '$user' not authorized. Skipping the gated job."
+    echo "authorized=false" >> "$GITHUB_OUTPUT"
+    exit 0
+  fi
+done <<< "${USERS_TO_CHECK:-}"
+
+echo "authorized=true" >> "$GITHUB_OUTPUT"
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Compose the layered review scope from individual layer files into a
+# single markdown blob, and emit it as the `composed` output via
+# $GITHUB_OUTPUT. Driven by claude-review.yml's "Compose review scope
+# from layers" step.
+#
+# Inputs:
+#   LAYERS         — newline-separated layer names (e.g. "universal\nharper/v5")
+#   GITHUB_OUTPUT  — path to the GitHub Actions output file
+#
+# Layer files live at .ai-review-prompts/<layer>.md (the path the
+# `Clone review prompts` step checks out into). Missing layers emit
+# a workflow warning and continue; an empty composed result fails
+# the step (no review scope = no review discipline).
+set -euo pipefail
+
+OUT=/tmp/composed-scope.md
+: > "$OUT"
+while IFS= read -r raw_layer; do
+  # Trim whitespace around each layer name.
+  layer="$(printf '%s' "$raw_layer" | awk '{$1=$1;print}')"
+  [ -z "$layer" ] && continue
+  file=".ai-review-prompts/${layer}.md"
+  if [ ! -f "$file" ]; then
+    echo "::warning::Review layer '$layer' not found at $file; skipping."
+    continue
+  fi
+  {
+    cat "$file"
+    printf '\n\n'
+  } >> "$OUT"
+done <<< "${LAYERS:-}"
+
+BYTES=$(wc -c < "$OUT")
+echo "Composed ${BYTES} bytes from review layers"
+if [ "$BYTES" -eq 0 ]; then
+  echo "::error::Composed review scope is empty — all layers missing or unreadable."
+  exit 1
+fi
+
+# Random heredoc delimiter — collision-proof against any content a
+# future layer file might include. $GITHUB_OUTPUT uses heredoc
+# syntax; a fixed marker could be forged (or coincidentally appear)
+# in layer content and corrupt the output.
+DELIM="EOF_$(openssl rand -hex 16)"
+{
+  echo "composed<<${DELIM}"
+  cat "$OUT"
+  echo "${DELIM}"
+} >> "$GITHUB_OUTPUT"
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Find the prior `claude-review:v1`-marker'd top-level review comment
+# on a PR (if any) and write its integer database ID to
+# $GITHUB_OUTPUT under key `id`. Empty when no prior exists.
+#
+# Why marker-based lookup: `--edit-last` filters by authenticated
+# identity (`claude[bot]`) only — so after a `@claude` mention, the
+# most recent claude[bot] comment is the mention response, and
+# `--edit-last` clobbers it. Every review comment starts with
+# `<!-- claude-review:v1 -->`; mention responses never carry the
+# marker, so this lookup targets only the review comment.
+#
+# Inputs:
+#   GH_TOKEN             — token with `pull-requests: read`
+#   GITHUB_REPOSITORY    — owner/repo (auto-set by GitHub Actions)
+#   PR_NUMBER            — pull request number
+#   GITHUB_OUTPUT        — output file path
+set -uo pipefail
+
+EXISTING_ID=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+  --jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith("<!-- claude-review:v1 -->"))] | last | .id // empty')
+
+if [ -n "$EXISTING_ID" ]; then
+  echo "Prior review comment: $EXISTING_ID"
+else
+  echo "No prior review comment found — agent will post fresh."
+fi
+echo "id=${EXISTING_ID}" >> "$GITHUB_OUTPUT"
@@ -0,0 +1,178 @@
+#!/usr/bin/env bash
+# Log this run's PR review to the central HarperFast/ai-review-log
+# tracker — finds the per-PR issue by stable title prefix and
+# appends a comment, or creates a new issue if none exists. Driven
+# by claude-review.yml's "Log review to ai-review-log" step.
+#
+# Best-effort: never fails the job. A missing `AI_REVIEW_LOG_TOKEN`
+# secret, an absent claude review comment, or a stale comment all
+# exit cleanly with a notice/warning rather than failing.
+#
+# Inputs:
+#   GH_TOKEN              — token with `pull-requests: read`
+#   AI_REVIEW_LOG_TOKEN   — fine-grained PAT scoped to ai-review-log
+#                           with `issues: write` (optional — missing
+#                           skips logging with a warning)
+#   PR_NUMBER             — pull request number
+#   PR_URL                — html URL of the PR
+#   REVIEW_STATUS         — outcome of the Claude review step
+#                           (success / failure / cancelled / etc.)
+#   REPO_SHORT            — short repo name (e.g. "harper")
+#   GITHUB_REPOSITORY     — owner/repo of the PR's repo
+#   GITHUB_RUN_ID         — current Actions run ID (for staleness
+#                           guard)
+#   RUNNER_TEMP           — runner temp dir (where the agent's
+#                           optional run-notes file lives)
+set -uo pipefail
+
+if [ -z "${AI_REVIEW_LOG_TOKEN:-}" ]; then
+  echo "::warning::AI_REVIEW_LOG_TOKEN secret not set; skipping log entry."
+  exit 0
+fi
+
+# When this workflow job started. Used to filter out stale Claude
+# review comments from previous runs so a cancelled in-flight run
+# (e.g. from a force-push) doesn't re-log a prior run's content as
+# a fresh finding.
+JOB_STARTED=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" --jq '.run_started_at // empty')
+
+# Fetch the marker'd review comment via raw API. We can't use
+# `gh pr view --json comments` because (a) it doesn't expose
+# `updated_at` (which we need below for the staleness guard now
+# that comments are edited in place), and (b) we need the marker
+# filter to ignore `@claude` mention responses that share the
+# `claude[bot]` identity.
+CLAUDE_JSON=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+  --jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith("<!-- claude-review:v1 -->"))] | last // empty')
+
+if [ -z "$CLAUDE_JSON" ] || [ "$CLAUDE_JSON" = "null" ]; then
+  echo "No marker'd Claude review comment found on PR #$PR_NUMBER (review_status=$REVIEW_STATUS); skipping log."
+  exit 0
+fi
+
+CLAUDE_BODY=$(printf '%s' "$CLAUDE_JSON" | jq -r '.body // empty')
+# Prefer updated_at (reflects the most recent edit) over created_at
+# (frozen at original post time) — comments are now edited in place
+# across runs.
+CLAUDE_AT=$(printf '%s' "$CLAUDE_JSON" | jq -r '.updated_at // .created_at // empty')
+
+if [ -z "$CLAUDE_BODY" ]; then
+  echo "Claude review comment had empty body; skipping log."
+  exit 0
+fi
+
+# ISO-8601 lexicographic compare — both are UTC timestamps in the
+# same shape, so string comparison is sound.
+if [ -n "$JOB_STARTED" ] && [ -n "$CLAUDE_AT" ] && [ "$CLAUDE_AT" \< "$JOB_STARTED" ]; then
+  echo "::notice::Latest Claude review comment update ($CLAUDE_AT) predates this job's start ($JOB_STARTED); skipping to avoid re-logging stale content."
+  exit 0
+fi
+
+# Title: count findings (lines starting with `### <digit>`). The
+# "no blockers" branch matches the sentinel phrase anywhere in the
+# body — the concise prompt's `Reviewed; no blockers found.` doesn't
+# start with "no blockers", so an anchored regex would miss it.
+# Anywhere-match is safe because the phrase is a deliberate output
+# from the prompt.
+if printf '%s' "$CLAUDE_BODY" | grep -qi 'no blockers found'; then
+  COUNT_PART="no blockers"
+else
+  FINDING_COUNT=$(printf '%s\n' "$CLAUDE_BODY" | grep -c '^### [0-9]' || true)
+  COUNT_PART="${FINDING_COUNT} finding(s) — triage pending"
+fi
+
+if [ "$REVIEW_STATUS" = "success" ]; then
+  TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART"
+else
+  TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART (review $REVIEW_STATUS — may be incomplete)"
+fi
+
+BODY=$(printf '**Source:** %s\n**Repo:** %s\n**PR:** #%s\n**Model:** claude-sonnet-4-6\n**Phase:** baseline\n**Review job status:** %s\n**Date:** %s\n\n---\n\n%s\n' \
+  "$PR_URL" "$REPO_SHORT" "$PR_NUMBER" "$REVIEW_STATUS" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$CLAUDE_BODY")
+
+# Structured run notes from the agent (optional). This is the
+# channel that keeps verbose context off the PR — the agent writes
+# to a fixed path under $RUNNER_TEMP, and we append here so the log
+# issue gets the full picture while the PR comment stays concise.
+# Absent file is fine; means the run had nothing structured to
+# capture.
+NOTES_FILE="${RUNNER_TEMP:-/tmp}/claude-review-notes.md"
+if [ -f "$NOTES_FILE" ]; then
+  NOTES_CONTENT=$(cat "$NOTES_FILE")
+  BODY=$(printf '%s\n\n---\n\n%s\n' "$BODY" "$NOTES_CONTENT")
+  echo "Appended $(wc -c < "$NOTES_FILE") bytes of run notes from $NOTES_FILE"
+else
+  echo "No run notes file at $NOTES_FILE — skipping notes append"
+fi
+
+# One ai-review-log issue per PR. Stable prefix `[<repo>] PR #<N>:`
+# lets us look up an existing issue for this PR across runs even
+# though the count/status portion past the colon changes per run.
+# List API (not search) is used because search is eventually-
+# consistent — a same-day second review run might fire before the
+# first issue is indexed.
+TITLE_PREFIX="[$REPO_SHORT] PR #$PR_NUMBER:"
+
+EXISTING_NUMBER=$(curl -sS \
+  -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
+  -H "Accept: application/vnd.github+json" \
+  -H "X-GitHub-Api-Version: 2022-11-28" \
+  "https://api.github.com/repos/HarperFast/ai-review-log/issues?labels=repo:$REPO_SHORT&state=all&per_page=100&sort=created&direction=desc" \
+  | jq -r --arg prefix "$TITLE_PREFIX" \
+    '[.[] | select(.title | startswith($prefix))] | first | .number // empty')
+
+if [ -n "$EXISTING_NUMBER" ] && [ "$EXISTING_NUMBER" != "null" ]; then
+  # Existing issue: append a comment, refresh the title to reflect
+  # this run's status. Title refresh is best-effort — we still
+  # report success on the comment alone.
+  COMMENT_PAYLOAD=$(jq -nc --arg body "$BODY" '{body: $body}')
+  HTTP_C=$(curl -sS -o /tmp/ai-log-comment-resp.json -w '%{http_code}' -X POST \
+    -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
+    -H "Accept: application/vnd.github+json" \
+    -H "X-GitHub-Api-Version: 2022-11-28" \
+    "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER/comments" \
+    -d "$COMMENT_PAYLOAD")
+
+  PATCH_PAYLOAD=$(jq -nc --arg title "$TITLE" '{title: $title}')
+  HTTP_T=$(curl -sS -o /tmp/ai-log-patch-resp.json -w '%{http_code}' -X PATCH \
+    -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
+    -H "Accept: application/vnd.github+json" \
+    -H "X-GitHub-Api-Version: 2022-11-28" \
+    "https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER" \
+    -d "$PATCH_PAYLOAD")
+
+  if [ "$HTTP_C" -ge 200 ] && [ "$HTTP_C" -lt 300 ]; then
+    COMMENT_URL=$(jq -r '.html_url' /tmp/ai-log-comment-resp.json)
+    echo "Logged review as comment on existing issue: $COMMENT_URL"
+  else
+    echo "::warning::ai-review-log comment POST failed (HTTP $HTTP_C):"
+    cat /tmp/ai-log-comment-resp.json
+  fi
+
+  if [ "$HTTP_T" -lt 200 ] || [ "$HTTP_T" -ge 300 ]; then
+    echo "::warning::ai-review-log title PATCH failed (HTTP $HTTP_T):"
+    cat /tmp/ai-log-patch-resp.json
+  fi
+else
+  # No existing issue for this PR — create one.
+  CREATE_PAYLOAD=$(jq -nc \
+    --arg title "$TITLE" \
+    --arg repo_label "repo:$REPO_SHORT" \
+    --arg body "$BODY" \
+    '{title: $title, body: $body, labels: [$repo_label, "verdict:pending", "phase:baseline"]}')
+
+  HTTP=$(curl -sS -o /tmp/ai-log-resp.json -w '%{http_code}' -X POST \
+    -H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
+    -H "Accept: application/vnd.github+json" \
+    -H "X-GitHub-Api-Version: 2022-11-28" \
+    https://api.github.com/repos/HarperFast/ai-review-log/issues \
+    -d "$CREATE_PAYLOAD")
+
+  if [ "$HTTP" -ge 200 ] && [ "$HTTP" -lt 300 ]; then
+    ISSUE_URL=$(jq -r '.html_url' /tmp/ai-log-resp.json)
+    echo "Logged review to new issue: $ISSUE_URL"
+  else
+    echo "::warning::ai-review-log POST failed (HTTP $HTTP):"
+    cat /tmp/ai-log-resp.json
+  fi
+fi
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Decide whether to proceed with an `@claude` mention and which model
+# to use, based on the comment body. Driven by claude-mention.yml's
+# "Parse mention" step.
+#
+# Rules (the precision gate; the job-level `if:` is a cheap
+# pre-filter that only checks substring containment):
+#   1. `@claude` must be the FIRST non-whitespace token (word-
+#      boundary after) — rules out `@claudette`, inline prose
+#      mentions ("saw @claude's fix"), and quoted replies
+#      (`> @claude ...`) where the reply is addressing a human.
+#   2. Case-insensitive word-boundary `deep` anywhere in the body
+#      escalates to Opus. Sonnet is the default.
+#
+# Inputs:
+#   BODY           — comment body (verbatim)
+#   GITHUB_OUTPUT  — output file path
+#
+# Outputs (to $GITHUB_OUTPUT):
+#   proceed=true|false
+#   model=claude-opus-4-7|claude-sonnet-4-6  (only when proceed=true)
+set -uo pipefail
+
+if ! printf '%s' "$BODY" | grep -Pqz '\A\s*@claude\b'; then
+  echo "Comment does not start with @claude; skipping."
+  echo "proceed=false" >> "$GITHUB_OUTPUT"
+  exit 0
+fi
+
+if printf '%s' "$BODY" | grep -Piq '\bdeep\b'; then
+  echo "model=claude-opus-4-7" >> "$GITHUB_OUTPUT"
+  echo "Selected claude-opus-4-7 (deep requested)"
+else
+  echo "model=claude-sonnet-4-6" >> "$GITHUB_OUTPUT"
+  echo "Selected claude-sonnet-4-6 (default)"
+fi
+echo "proceed=true" >> "$GITHUB_OUTPUT"