Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions .github/scripts/authorize-claude-workflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Decide whether the trigger (PR author, comment author, labeler) is
# authorized to spawn a Claude workflow on this repo. Driven by the
# `authorize` job in claude-review.yml / claude-mention.yml /
# claude-issue-to-pr.yml.
#
# Trust set: every `@HarperFast/<team>` handle in this repo's
# `.github/CODEOWNERS`. Same set as the people we trust to review code,
# aligned by construction. Falls back to `@HarperFast/developers` if
# CODEOWNERS is missing, empty, unparseable, or contains no HarperFast
# handles. External-org handles in CODEOWNERS are deliberately ignored
# — only HarperFast members are admitted.
#
# Inputs:
# USERS_TO_CHECK — newline-separated logins; ALL must pass.
# Empty / whitespace-only entries are skipped.
# ADMIT_CLAUDE_BOT — "true" admits `claude[bot]` without a team
# check (used by claude-review for AI-authored
# PRs from the issue-to-PR pipeline). Anything
# else requires team membership for every user.
# DEFAULT_TOKEN — token for the CODEOWNERS read (typically
# $GITHUB_TOKEN; needs `contents: read`).
# ORG_TOKEN — token for `orgs/.../teams/.../memberships/...`
# (App-installation token with `Members: Read`,
# scoped to this `authorize` job only).
# GITHUB_REPOSITORY — owner/repo (auto-set by GitHub Actions).
# GITHUB_OUTPUT — output file path.
#
# Outputs (to $GITHUB_OUTPUT):
# authorized=true|false
set -uo pipefail

# Resolve the trust set from CODEOWNERS. The default token reads the
# workflow repo's own .github/CODEOWNERS via the contents API.
# Anything missing / empty / unparseable / containing no HarperFast
# handles falls back to the default team.
CODEOWNERS=$(GH_TOKEN="$DEFAULT_TOKEN" gh api \
"repos/${GITHUB_REPOSITORY}/contents/.github/CODEOWNERS" \
--jq '.content' 2>/dev/null | base64 -d 2>/dev/null || true)
TEAMS=$(printf '%s' "$CODEOWNERS" | grep -oE '@HarperFast/[a-zA-Z0-9_-]+' | sort -u | sed 's|@HarperFast/||' || true)

if [ -z "$TEAMS" ]; then
echo "::notice::No @HarperFast/<team> handles found in .github/CODEOWNERS (missing, empty, or only external orgs). Defaulting to developers."
TEAMS="developers"
fi

# Fail closed if USERS_TO_CHECK is empty or whitespace-only. The
# main loop below skips empty entries with `[ -z "$user" ] && continue`
# and would otherwise fall through to `authorized=true` if there was
# nothing to check. An authorize job that forgot to set USERS_TO_CHECK
# (or a malicious change that removed it) must NOT silently admit
# every event — refuse here.
if [ -z "${USERS_TO_CHECK//[[:space:]]/}" ]; then
echo "::error::USERS_TO_CHECK is empty or whitespace-only — denying by default. The authorize job must explicitly pass at least one login (PR author, commenter, labeler, etc.)."
echo "authorized=false" >> "$GITHUB_OUTPUT"
exit 0
fi

echo "Trust set (HarperFast teams from CODEOWNERS):"
for t in $TEAMS; do echo " - @HarperFast/$t"; done

# is_authorized <login>
# Admits claude[bot] iff ADMIT_CLAUDE_BOT=true; otherwise tries each
# team in the trust set in order. Returns 0 on the first hit.
is_authorized() {
local user="$1"

if [ "${ADMIT_CLAUDE_BOT:-false}" = "true" ] && [ "$user" = "claude[bot]" ]; then
echo " → admitted: claude[bot]"
return 0
fi

for team in $TEAMS; do
# /orgs/{org}/teams/{team_slug}/memberships/{username}
# returns 200 for active members, 404 otherwise.
if GH_TOKEN="$ORG_TOKEN" gh api "orgs/HarperFast/teams/${team}/memberships/${user}" --silent >/dev/null 2>&1; then
echo " → admitted via @HarperFast/${team} membership"
return 0
fi
done

echo " → not a member of any HarperFast team in the trust set"
return 1
}

while IFS= read -r raw_user; do
user="$(printf '%s' "$raw_user" | awk '{$1=$1;print}')"
[ -z "$user" ] && continue
echo "Checking: $user"
if ! is_authorized "$user"; then
echo "User '$user' not authorized. Skipping the gated job."
echo "authorized=false" >> "$GITHUB_OUTPUT"
exit 0
fi
done <<< "${USERS_TO_CHECK:-}"

echo "authorized=true" >> "$GITHUB_OUTPUT"
106 changes: 106 additions & 0 deletions .github/scripts/validate-auth-gate-invariants.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env bash
# Validate that the AI workflow auth gate structure is preserved
# across all `claude-*.yml` workflows. STRUCTURAL lint, not a semantic
# test — catches the obvious attacks (delete the authorize job, drop
# the `needs:` dependency, broaden permissions, change the
# if-expression to a tautology). Subtle attacks (e.g., modifying the
# bash logic inside the auth check to admit everyone) are out of
# scope for this validator and are caught by CODEOWNERS review on
# `.github/` changes.
#
# Defense in depth: branch-protection on `main` should make this
# workflow's job a REQUIRED status check.
#
# Inputs (none — runs in the workflow checkout). Validates:
# .github/workflows/claude-*.yml
#
# Exit code:
# 0 all workflows pass
# 1 any check failed (errors emitted as ::error::)
set -uo pipefail

fail() {
echo "::error::$1"
exit 1
}

# yq is pre-installed on ubuntu-latest runners.
command -v yq >/dev/null || fail "yq not available on runner"

shopt -s nullglob
files=(.github/workflows/claude-*.yml)
if [ "${#files[@]}" -eq 0 ]; then
echo "No claude-*.yml workflows found; nothing to validate."
exit 0
fi

for f in "${files[@]}"; do
echo ""
echo "=== Validating $f ==="

# 1. The authorize job exists.
yq -e '.jobs.authorize' "$f" >/dev/null \
|| fail "$f: missing 'authorize' job"

# 2. authorize.outputs.authorized is wired to some step output.
output_expr=$(yq -r '.jobs.authorize.outputs.authorized // ""' "$f")
[ -n "$output_expr" ] \
|| fail "$f: authorize job has no outputs.authorized"
echo "$output_expr" | grep -q 'steps\..*\.outputs\.authorized' \
|| fail "$f: authorize.outputs.authorized must come from a step output (got: $output_expr)"

# 3. authorize uses actions/create-github-app-token (pinned to a SHA).
app_token_step=$(yq -r '.jobs.authorize.steps[] | select(.uses != null) | .uses' "$f" | grep '^actions/create-github-app-token@' || true)
[ -n "$app_token_step" ] \
|| fail "$f: authorize doesn't use actions/create-github-app-token"
echo "$app_token_step" | grep -qE '@[0-9a-f]{40}( |$)' \
|| fail "$f: actions/create-github-app-token must be pinned to a 40-char SHA (got: $app_token_step)"

# 4. authorize.permissions doesn't grant any write-level scope.
write_perms=$(yq -r '.jobs.authorize.permissions | (.[] // "") | select(. == "write")' "$f" 2>/dev/null || true)
[ -z "$write_perms" ] \
|| fail "$f: authorize.permissions grants 'write' on at least one scope — auth job must be read-only"

# 5. Required secrets are referenced (the auth check can't work without them).
grep -q 'HARPERFAST_AI_CLIENT_ID' "$f" \
|| fail "$f: HARPERFAST_AI_CLIENT_ID secret not referenced"
grep -q 'HARPERFAST_AI_APP_PRIVATE_KEY' "$f" \
|| fail "$f: HARPERFAST_AI_APP_PRIVATE_KEY secret not referenced"

# 6. The authorize job sets USERS_TO_CHECK on at least one of its
# steps. The auth script (`authorize-claude-workflow.sh`) fails
# closed if USERS_TO_CHECK is empty, but the workflow still
# shouldn't ship without it — make the omission a structural
# error rather than a silent runtime denial. Defense in depth
# against a PR that drops the env var thinking the script will
# "do the right thing".
users_to_check=$(yq -r '[.jobs.authorize.steps[].env.USERS_TO_CHECK // empty] | .[0] // ""' "$f" 2>/dev/null)
[ -n "$users_to_check" ] \
|| fail "$f: authorize job has no step setting USERS_TO_CHECK env var — the auth script needs at least one login to check (PR author, commenter, labeler, etc.)"

# 7. Every non-authorize job has `needs: authorize` and a strict
# if-expression of exactly: needs.authorize.outputs.authorized == 'true'
# (whitespace normalized). Stricter than substring match —
# rules out tautologies like `... || true`.
other_jobs=$(yq -r '.jobs | keys | .[]' "$f" | grep -v '^authorize$' || true)
[ -n "$other_jobs" ] \
|| fail "$f: no non-authorize job found — workflow has nothing gated"

for j in $other_jobs; do
needs=$(yq -r ".jobs.${j}.needs // \"\"" "$f")
[ "$needs" = "authorize" ] \
|| fail "$f: job '$j' must have 'needs: authorize' (got: $needs)"

if_expr=$(yq -r ".jobs.${j}.if // \"\"" "$f")
# Normalize whitespace and quotes for the comparison.
normalized=$(echo "$if_expr" | tr -s ' ' | tr -d "\n")
expected="needs.authorize.outputs.authorized == 'true'"
[ "$normalized" = "$expected" ] \
|| fail "$f: job '$j' if: must be exactly \"$expected\" — no compound expressions, no tautologies (got: $if_expr)"
done

echo " ✓ $f passed"
done

echo ""
echo "All claude-*.yml workflows pass auth gate invariants."
36 changes: 36 additions & 0 deletions .github/workflows/auth-gate-invariants.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Auth gate invariants

# Validates that the AI workflow auth gate structure is preserved.
# Runs on any PR that touches a `claude-*.yml` workflow file or this
# validator itself.
#
# This is a STRUCTURAL lint, not a semantic test. It catches the
# obvious attacks (delete the authorize job, drop the `needs:`
# dependency, broaden permissions, change the if-expression to a
# tautology). Subtle attacks (e.g., modifying the bash logic inside
# the auth check to admit everyone) are caught by CODEOWNERS review,
# which requires both @HarperFast/developers and @HarperFast/devops
# approval on `.github/` changes.
#
# Defense in depth — make this workflow a REQUIRED status check on
# `main` via branch protection so PRs can't merge without it passing.

on:
pull_request:
paths:
- '.github/workflows/claude-*.yml'
- '.github/workflows/auth-gate-invariants.yml'
- '.github/scripts/validate-auth-gate-invariants.sh'

jobs:
validate:
runs-on: ubuntu-latest
timeout-minutes: 2
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1

- name: Validate auth gate structure
run: bash .github/scripts/validate-auth-gate-invariants.sh
69 changes: 58 additions & 11 deletions .github/workflows/claude-issue-to-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,65 @@ concurrency:
cancel-in-progress: false

jobs:
authorize:
# Single source of truth for "is this label trigger allowed to
# spawn Claude on this repo?". The `work` job below has ONE `if:`
# that depends on this — no step-level guards, no individual user
# list.
#
# Cheap pre-filter at job level: explicit whitelist of allowed
# labels (NOT `startsWith('claude-fix:')`, which would match typoed
# variants like `claude-fix:typos`).
#
# We check the LABELER (`github.actor`), not the issue author. The
# labeler must already have at least triage permission to apply a
# label; a maintainer labeling an external-author issue is a
# legitimate way to invoke the agent on community reports.
#
# Trust set: the @HarperFast teams listed in this repo's
# `.github/CODEOWNERS`. Defaults to `@HarperFast/developers` if the
# file is missing, empty, or has no HarperFast handles. External-
# org handles are deliberately ignored.
#
# Required (organization-level) secrets:
# - HARPERFAST_AI_CLIENT_ID (the App's Client ID, like Iv23li…)
# - HARPERFAST_AI_APP_PRIVATE_KEY (.pem file contents)
if: contains(fromJSON('["claude-fix:typo","claude-fix:docs","claude-fix:deps","claude-fix:bug"]'), github.event.label.name)
runs-on: ubuntu-latest
timeout-minutes: 1
permissions:
contents: read # for the .github/CODEOWNERS fetch
outputs:
authorized: ${{ steps.check.outputs.authorized }}
steps:
- name: Mint org-read token
id: app-token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ secrets.HARPERFAST_AI_CLIENT_ID }}
private-key: ${{ secrets.HARPERFAST_AI_APP_PRIVATE_KEY }}
owner: HarperFast

- name: Checkout (for CODEOWNERS read)
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
with:
sparse-checkout: |
.github/CODEOWNERS
.github/scripts/authorize-claude-workflow.sh
sparse-checkout-cone-mode: false

- name: Check labeler authorization
id: check
env:
DEFAULT_TOKEN: ${{ github.token }}
ORG_TOKEN: ${{ steps.app-token.outputs.token }}
ADMIT_CLAUDE_BOT: 'false'
USERS_TO_CHECK: ${{ github.actor }}
run: bash .github/scripts/authorize-claude-workflow.sh

work:
# Only trigger for `claude-fix:*` labels AND when the issue was
# opened by a HarperFast org member or collaborator. Labels added
# to issues opened by outside contributors are ignored to keep
# the trigger surface tight during calibration.
# Explicit whitelist of allowed labels — `startsWith('claude-fix:')`
# would match typoed variants (`claude-fix:typos`, `claude-fix:foo`)
# and the agent would waste turns trying to interpret them.
if: >-
contains(fromJSON('["claude-fix:typo","claude-fix:docs","claude-fix:deps","claude-fix:bug"]'), github.event.label.name) &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'),
github.event.issue.author_association)
needs: authorize
if: needs.authorize.outputs.authorized == 'true'
runs-on: ubuntu-latest
timeout-minutes: 25
permissions:
Expand Down
63 changes: 54 additions & 9 deletions .github/workflows/claude-mention.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,61 @@ concurrency:
cancel-in-progress: false

jobs:
authorize:
# Single source of truth for "is this commenter allowed to trigger
# Claude on this repo?". The `work` job below has ONE `if:` that
# depends on this — no step-level guards, no individual user list.
#
# Cheap pre-filter at job level: comment must mention `@claude`.
# The first-non-whitespace-token precision check is in the work
# job's `Parse mention` step.
#
# Trust set: the @HarperFast teams listed in this repo's
# `.github/CODEOWNERS`. Defaults to `@HarperFast/developers` if the
# file is missing, empty, or has no HarperFast handles. External-
# org handles are deliberately ignored — we only admit HarperFast
# members.
#
# Required (organization-level) secrets:
# - HARPERFAST_AI_CLIENT_ID (the App's Client ID, like Iv23li…)
# - HARPERFAST_AI_APP_PRIVATE_KEY (.pem file contents)
if: contains(github.event.comment.body, '@claude')
runs-on: ubuntu-latest
timeout-minutes: 1
permissions:
# contents: read for the .github/CODEOWNERS fetch via default token.
contents: read
outputs:
authorized: ${{ steps.check.outputs.authorized }}
steps:
- name: Mint org-read token
id: app-token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ secrets.HARPERFAST_AI_CLIENT_ID }}
private-key: ${{ secrets.HARPERFAST_AI_APP_PRIVATE_KEY }}
owner: HarperFast

- name: Checkout (for CODEOWNERS read)
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
with:
sparse-checkout: |
.github/CODEOWNERS
.github/scripts/authorize-claude-workflow.sh
sparse-checkout-cone-mode: false

- name: Check commenter authorization
id: check
env:
DEFAULT_TOKEN: ${{ github.token }}
ORG_TOKEN: ${{ steps.app-token.outputs.token }}
ADMIT_CLAUDE_BOT: 'false'
USERS_TO_CHECK: ${{ github.event.comment.user.login }}
run: bash .github/scripts/authorize-claude-workflow.sh

work:
# Belt-and-suspenders gate:
# 1. Comment must contain the trigger phrase.
# 2. Commenter must be HarperFast org OWNER / MEMBER or a repo
# COLLABORATOR (the action also performs its own write-access
# check on the actor as a fallback).
if: >-
contains(github.event.comment.body, '@claude') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'),
github.event.comment.author_association)
needs: authorize
if: needs.authorize.outputs.authorized == 'true'
runs-on: ubuntu-latest
timeout-minutes: 20
permissions:
Expand Down
Loading
Loading