Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions .github/scripts/authorize-claude-workflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Decide whether the trigger (PR author, comment author, labeler) is
# authorized to spawn a Claude workflow on this repo. Driven by the
# `authorize` job in claude-review.yml / claude-mention.yml /
# claude-issue-to-pr.yml.
#
# Trust set: every `@HarperFast/<team>` handle in this repo's
# `.github/CODEOWNERS`. Same set as the people we trust to review code,
# aligned by construction. Falls back to `@HarperFast/developers` if
# CODEOWNERS is missing, empty, unparseable, or contains no HarperFast
# handles. External-org handles in CODEOWNERS are deliberately ignored
# — only HarperFast members are admitted.
#
# Inputs:
# USERS_TO_CHECK — newline-separated logins; ALL must pass.
# Empty / whitespace-only entries are skipped.
# ADMIT_CLAUDE_BOT — "true" admits `claude[bot]` without a team
# check (used by claude-review for AI-authored
# PRs from the issue-to-PR pipeline). Anything
# else requires team membership for every user.
# DEFAULT_TOKEN — token for the CODEOWNERS read (typically
# $GITHUB_TOKEN; needs `contents: read`).
# ORG_TOKEN — token for `orgs/.../teams/.../memberships/...`
# (App-installation token with `Members: Read`,
# scoped to this `authorize` job only).
# GITHUB_REPOSITORY — owner/repo (auto-set by GitHub Actions).
# GITHUB_OUTPUT — output file path.
#
# Outputs (to $GITHUB_OUTPUT):
# authorized=true|false
set -uo pipefail

# Resolve the trust set from CODEOWNERS. The default token reads the
# workflow repo's own .github/CODEOWNERS via the contents API.
# Anything missing / empty / unparseable / containing no HarperFast
# handles falls back to the default team.
CODEOWNERS=$(GH_TOKEN="$DEFAULT_TOKEN" gh api \
"repos/${GITHUB_REPOSITORY}/contents/.github/CODEOWNERS" \
--jq '.content' 2>/dev/null | base64 -d 2>/dev/null || true)
TEAMS=$(printf '%s' "$CODEOWNERS" | grep -oE '@HarperFast/[a-zA-Z0-9_-]+' | sort -u | sed 's|@HarperFast/||' || true)

if [ -z "$TEAMS" ]; then
echo "::notice::No @HarperFast/<team> handles found in .github/CODEOWNERS (missing, empty, or only external orgs). Defaulting to developers."
TEAMS="developers"
fi

# Fail closed if USERS_TO_CHECK is empty or whitespace-only. The
# main loop below skips empty entries with `[ -z "$user" ] && continue`
# and would otherwise fall through to `authorized=true` if there was
# nothing to check. An authorize job that forgot to set USERS_TO_CHECK
# (or a malicious change that removed it) must NOT silently admit
# every event — refuse here.
if [ -z "${USERS_TO_CHECK//[[:space:]]/}" ]; then
echo "::error::USERS_TO_CHECK is empty or whitespace-only — denying by default. The authorize job must explicitly pass at least one login (PR author, commenter, labeler, etc.)."
echo "authorized=false" >> "$GITHUB_OUTPUT"
exit 0
fi

echo "Trust set (HarperFast teams from CODEOWNERS):"
for t in $TEAMS; do echo " - @HarperFast/$t"; done

# is_authorized <login>
# Admits claude[bot] iff ADMIT_CLAUDE_BOT=true; otherwise tries each
# team in the trust set in order. Returns 0 on the first hit.
is_authorized() {
local user="$1"

if [ "${ADMIT_CLAUDE_BOT:-false}" = "true" ] && [ "$user" = "claude[bot]" ]; then
echo " → admitted: claude[bot]"
return 0
fi

for team in $TEAMS; do
# /orgs/{org}/teams/{team_slug}/memberships/{username}
# returns 200 for active members, 404 otherwise.
if GH_TOKEN="$ORG_TOKEN" gh api "orgs/HarperFast/teams/${team}/memberships/${user}" --silent >/dev/null 2>&1; then
echo " → admitted via @HarperFast/${team} membership"
return 0
fi
done

echo " → not a member of any HarperFast team in the trust set"
return 1
}

while IFS= read -r raw_user; do
user="$(printf '%s' "$raw_user" | awk '{$1=$1;print}')"
[ -z "$user" ] && continue
echo "Checking: $user"
if ! is_authorized "$user"; then
echo "User '$user' not authorized. Skipping the gated job."
echo "authorized=false" >> "$GITHUB_OUTPUT"
exit 0
fi
done <<< "${USERS_TO_CHECK:-}"

echo "authorized=true" >> "$GITHUB_OUTPUT"
50 changes: 50 additions & 0 deletions .github/scripts/compose-review-scope.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Compose the layered review scope from individual layer files into a
# single markdown blob, and emit it as the `composed` output via
# $GITHUB_OUTPUT. Driven by claude-review.yml's "Compose review scope
# from layers" step.
#
# Inputs:
# LAYERS — newline-separated layer names (e.g. "universal\nharper/v5")
# GITHUB_OUTPUT — path to the GitHub Actions output file
#
# Layer files live at .ai-review-prompts/<layer>.md (the path the
# `Clone review prompts` step checks out into). Missing layers emit
# a workflow warning and continue; an empty composed result fails
# the step (no review scope = no review discipline).
set -euo pipefail

OUT=/tmp/composed-scope.md
: > "$OUT"
while IFS= read -r raw_layer; do
# Trim whitespace around each layer name.
layer="$(printf '%s' "$raw_layer" | awk '{$1=$1;print}')"
[ -z "$layer" ] && continue
file=".ai-review-prompts/${layer}.md"
if [ ! -f "$file" ]; then
echo "::warning::Review layer '$layer' not found at $file; skipping."
continue
fi
{
cat "$file"
printf '\n\n'
} >> "$OUT"
done <<< "${LAYERS:-}"

BYTES=$(wc -c < "$OUT")
echo "Composed ${BYTES} bytes from review layers"
if [ "$BYTES" -eq 0 ]; then
echo "::error::Composed review scope is empty — all layers missing or unreadable."
exit 1
fi

# Random heredoc delimiter — collision-proof against any content a
# future layer file might include. $GITHUB_OUTPUT uses heredoc
# syntax; a fixed marker could be forged (or coincidentally appear)
# in layer content and corrupt the output.
DELIM="EOF_$(openssl rand -hex 16)"
{
echo "composed<<${DELIM}"
cat "$OUT"
echo "${DELIM}"
} >> "$GITHUB_OUTPUT"
28 changes: 28 additions & 0 deletions .github/scripts/find-prior-review-comment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Find the prior `claude-review:v1`-marker'd top-level review comment
# on a PR (if any) and write its integer database ID to
# $GITHUB_OUTPUT under key `id`. Empty when no prior exists.
#
# Why marker-based lookup: `--edit-last` filters by authenticated
# identity (`claude[bot]`) only — so after a `@claude` mention, the
# most recent claude[bot] comment is the mention response, and
# `--edit-last` clobbers it. Every review comment starts with
# `<!-- claude-review:v1 -->`; mention responses never carry the
# marker, so this lookup targets only the review comment.
#
# Inputs:
# GH_TOKEN — token with `pull-requests: read`
# GITHUB_REPOSITORY — owner/repo (auto-set by GitHub Actions)
# PR_NUMBER — pull request number
# GITHUB_OUTPUT — output file path
set -uo pipefail

EXISTING_ID=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
--jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith("<!-- claude-review:v1 -->"))] | last | .id // empty')

if [ -n "$EXISTING_ID" ]; then
echo "Prior review comment: $EXISTING_ID"
else
echo "No prior review comment found — agent will post fresh."
fi
echo "id=${EXISTING_ID}" >> "$GITHUB_OUTPUT"
178 changes: 178 additions & 0 deletions .github/scripts/log-review-to-ai-review-log.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env bash
# Log this run's PR review to the central HarperFast/ai-review-log
# tracker — finds the per-PR issue by stable title prefix and
# appends a comment, or creates a new issue if none exists. Driven
# by claude-review.yml's "Log review to ai-review-log" step.
#
# Best-effort: never fails the job. A missing `AI_REVIEW_LOG_TOKEN`
# secret, an absent claude review comment, or a stale comment all
# exit cleanly with a notice/warning rather than failing.
#
# Inputs:
# GH_TOKEN — token with `pull-requests: read`
# AI_REVIEW_LOG_TOKEN — fine-grained PAT scoped to ai-review-log
# with `issues: write` (optional — missing
# skips logging with a warning)
# PR_NUMBER — pull request number
# PR_URL — html URL of the PR
# REVIEW_STATUS — outcome of the Claude review step
# (success / failure / cancelled / etc.)
# REPO_SHORT — short repo name (e.g. "harper")
# GITHUB_REPOSITORY — owner/repo of the PR's repo
# GITHUB_RUN_ID — current Actions run ID (for staleness
# guard)
# RUNNER_TEMP — runner temp dir (where the agent's
# optional run-notes file lives)
set -uo pipefail

if [ -z "${AI_REVIEW_LOG_TOKEN:-}" ]; then
echo "::warning::AI_REVIEW_LOG_TOKEN secret not set; skipping log entry."
exit 0
fi

# When this workflow job started. Used to filter out stale Claude
# review comments from previous runs so a cancelled in-flight run
# (e.g. from a force-push) doesn't re-log a prior run's content as
# a fresh finding.
JOB_STARTED=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" --jq '.run_started_at // empty')

# Fetch the marker'd review comment via raw API. We can't use
# `gh pr view --json comments` because (a) it doesn't expose
# `updated_at` (which we need below for the staleness guard now
# that comments are edited in place), and (b) we need the marker
# filter to ignore `@claude` mention responses that share the
# `claude[bot]` identity.
CLAUDE_JSON=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
--jq '[.[] | select(.user.login == "claude[bot]") | select(.body | startswith("<!-- claude-review:v1 -->"))] | last // empty')

if [ -z "$CLAUDE_JSON" ] || [ "$CLAUDE_JSON" = "null" ]; then
echo "No marker'd Claude review comment found on PR #$PR_NUMBER (review_status=$REVIEW_STATUS); skipping log."
exit 0
fi

CLAUDE_BODY=$(printf '%s' "$CLAUDE_JSON" | jq -r '.body // empty')
# Prefer updated_at (reflects the most recent edit) over created_at
# (frozen at original post time) — comments are now edited in place
# across runs.
CLAUDE_AT=$(printf '%s' "$CLAUDE_JSON" | jq -r '.updated_at // .created_at // empty')

if [ -z "$CLAUDE_BODY" ]; then
echo "Claude review comment had empty body; skipping log."
exit 0
fi

# ISO-8601 lexicographic compare — both are UTC timestamps in the
# same shape, so string comparison is sound.
if [ -n "$JOB_STARTED" ] && [ -n "$CLAUDE_AT" ] && [ "$CLAUDE_AT" \< "$JOB_STARTED" ]; then
echo "::notice::Latest Claude review comment update ($CLAUDE_AT) predates this job's start ($JOB_STARTED); skipping to avoid re-logging stale content."
exit 0
fi

# Title: count findings (lines starting with `### <digit>`). The
# "no blockers" branch matches the sentinel phrase anywhere in the
# body — the concise prompt's `Reviewed; no blockers found.` doesn't
# start with "no blockers", so an anchored regex would miss it.
# Anywhere-match is safe because the phrase is a deliberate output
# from the prompt.
if printf '%s' "$CLAUDE_BODY" | grep -qi 'no blockers found'; then
COUNT_PART="no blockers"
else
FINDING_COUNT=$(printf '%s\n' "$CLAUDE_BODY" | grep -c '^### [0-9]' || true)
COUNT_PART="${FINDING_COUNT} finding(s) — triage pending"
fi

if [ "$REVIEW_STATUS" = "success" ]; then
TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART"
else
TITLE="[$REPO_SHORT] PR #$PR_NUMBER: $COUNT_PART (review $REVIEW_STATUS — may be incomplete)"
fi

BODY=$(printf '**Source:** %s\n**Repo:** %s\n**PR:** #%s\n**Model:** claude-sonnet-4-6\n**Phase:** baseline\n**Review job status:** %s\n**Date:** %s\n\n---\n\n%s\n' \
"$PR_URL" "$REPO_SHORT" "$PR_NUMBER" "$REVIEW_STATUS" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$CLAUDE_BODY")

# Structured run notes from the agent (optional). This is the
# channel that keeps verbose context off the PR — the agent writes
# to a fixed path under $RUNNER_TEMP, and we append here so the log
# issue gets the full picture while the PR comment stays concise.
# Absent file is fine; means the run had nothing structured to
# capture.
NOTES_FILE="${RUNNER_TEMP:-/tmp}/claude-review-notes.md"
if [ -f "$NOTES_FILE" ]; then
NOTES_CONTENT=$(cat "$NOTES_FILE")
BODY=$(printf '%s\n\n---\n\n%s\n' "$BODY" "$NOTES_CONTENT")
echo "Appended $(wc -c < "$NOTES_FILE") bytes of run notes from $NOTES_FILE"
else
echo "No run notes file at $NOTES_FILE — skipping notes append"
fi

# One ai-review-log issue per PR. Stable prefix `[<repo>] PR #<N>:`
# lets us look up an existing issue for this PR across runs even
# though the count/status portion past the colon changes per run.
# List API (not search) is used because search is eventually-
# consistent — a same-day second review run might fire before the
# first issue is indexed.
TITLE_PREFIX="[$REPO_SHORT] PR #$PR_NUMBER:"

EXISTING_NUMBER=$(curl -sS \
-H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/HarperFast/ai-review-log/issues?labels=repo:$REPO_SHORT&state=all&per_page=100&sort=created&direction=desc" \
| jq -r --arg prefix "$TITLE_PREFIX" \
'[.[] | select(.title | startswith($prefix))] | first | .number // empty')

if [ -n "$EXISTING_NUMBER" ] && [ "$EXISTING_NUMBER" != "null" ]; then
# Existing issue: append a comment, refresh the title to reflect
# this run's status. Title refresh is best-effort — we still
# report success on the comment alone.
COMMENT_PAYLOAD=$(jq -nc --arg body "$BODY" '{body: $body}')
HTTP_C=$(curl -sS -o /tmp/ai-log-comment-resp.json -w '%{http_code}' -X POST \
-H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER/comments" \
-d "$COMMENT_PAYLOAD")

PATCH_PAYLOAD=$(jq -nc --arg title "$TITLE" '{title: $title}')
HTTP_T=$(curl -sS -o /tmp/ai-log-patch-resp.json -w '%{http_code}' -X PATCH \
-H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/HarperFast/ai-review-log/issues/$EXISTING_NUMBER" \
-d "$PATCH_PAYLOAD")

if [ "$HTTP_C" -ge 200 ] && [ "$HTTP_C" -lt 300 ]; then
COMMENT_URL=$(jq -r '.html_url' /tmp/ai-log-comment-resp.json)
echo "Logged review as comment on existing issue: $COMMENT_URL"
else
echo "::warning::ai-review-log comment POST failed (HTTP $HTTP_C):"
cat /tmp/ai-log-comment-resp.json
fi

if [ "$HTTP_T" -lt 200 ] || [ "$HTTP_T" -ge 300 ]; then
echo "::warning::ai-review-log title PATCH failed (HTTP $HTTP_T):"
cat /tmp/ai-log-patch-resp.json
fi
else
# No existing issue for this PR — create one.
CREATE_PAYLOAD=$(jq -nc \
--arg title "$TITLE" \
--arg repo_label "repo:$REPO_SHORT" \
--arg body "$BODY" \
'{title: $title, body: $body, labels: [$repo_label, "verdict:pending", "phase:baseline"]}')

HTTP=$(curl -sS -o /tmp/ai-log-resp.json -w '%{http_code}' -X POST \
-H "Authorization: Bearer $AI_REVIEW_LOG_TOKEN" \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/HarperFast/ai-review-log/issues \
-d "$CREATE_PAYLOAD")

if [ "$HTTP" -ge 200 ] && [ "$HTTP" -lt 300 ]; then
ISSUE_URL=$(jq -r '.html_url' /tmp/ai-log-resp.json)
echo "Logged review to new issue: $ISSUE_URL"
else
echo "::warning::ai-review-log POST failed (HTTP $HTTP):"
cat /tmp/ai-log-resp.json
fi
fi
37 changes: 37 additions & 0 deletions .github/scripts/parse-claude-mention.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Decide whether to proceed with an `@claude` mention and which model
# to use, based on the comment body. Driven by claude-mention.yml's
# "Parse mention" step.
#
# Rules (the precision gate; the job-level `if:` is a cheap
# pre-filter that only checks substring containment):
# 1. `@claude` must be the FIRST non-whitespace token (word-
# boundary after) — rules out `@claudette`, inline prose
# mentions ("saw @claude's fix"), and quoted replies
# (`> @claude ...`) where the reply is addressing a human.
# 2. Case-insensitive word-boundary `deep` anywhere in the body
# escalates to Opus. Sonnet is the default.
#
# Inputs:
# BODY — comment body (verbatim)
# GITHUB_OUTPUT — output file path
#
# Outputs (to $GITHUB_OUTPUT):
# proceed=true|false
# model=claude-opus-4-7|claude-sonnet-4-6 (only when proceed=true)
set -uo pipefail

if ! printf '%s' "$BODY" | grep -Pqz '\A\s*@claude\b'; then
echo "Comment does not start with @claude; skipping."
echo "proceed=false" >> "$GITHUB_OUTPUT"
exit 0
fi

if printf '%s' "$BODY" | grep -Piq '\bdeep\b'; then
echo "model=claude-opus-4-7" >> "$GITHUB_OUTPUT"
echo "Selected claude-opus-4-7 (deep requested)"
else
echo "model=claude-sonnet-4-6" >> "$GITHUB_OUTPUT"
echo "Selected claude-sonnet-4-6 (default)"
fi
echo "proceed=true" >> "$GITHUB_OUTPUT"
Loading
Loading