From 8c42325921e3cb18b0f04509ef9da790b01e1c31 Mon Sep 17 00:00:00 2001 From: Val Alexander <68980965+BunsDev@users.noreply.github.com> Date: Wed, 20 May 2026 11:21:31 -0500 Subject: [PATCH 1/2] chore(script): extend check_ai_attribution to scan commit messages The file-content scan only catches attribution checked in to tracked files. Agents that follow the default `Co-Authored-By: ` git trailer slip past it. Scan commit-message bodies of commits ahead of the base ref (default origin/main, override via AI_ATTRIBUTION_BASE_REF) against the same patterns. Output prefixes commit-message findings with `[commit-msg ]` so they're distinguishable from file-content hits at a glance. The scan no-ops when no usable base ref exists (fresh clone, detached HEAD on the base itself), preserving the existing behavior for that case. Use --pretty=tformat:%H so each SHA is newline-terminated for the while-read loop (--pretty=format:%H omits the trailing newline and the last record would not be read). --- script/check_ai_attribution | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/script/check_ai_attribution b/script/check_ai_attribution index 34b5ee63..3bf37b06 100755 --- a/script/check_ai_attribution +++ b/script/check_ai_attribution @@ -33,6 +33,42 @@ for pattern in "${patterns[@]}"; do "$pattern" . >> "$tmp" || true done +# Commit-message scan. The file-content scan above only catches attribution that +# lives in tracked files; an agent that follows the default `Co-Authored-By: ...` +# trailer for git commits would slip past it. Scan commit-message bodies of +# commits ahead of the base ref (default: origin/main) against the same patterns. +# +# Override the base ref with AI_ATTRIBUTION_BASE_REF for CI or non-standard +# setups. Skips silently when no usable base ref exists (fresh clone, detached +# HEAD on the base itself, etc.) so the existing file-scan behavior is preserved. +if git rev-parse --git-dir >/dev/null 2>&1; then + base_ref="${AI_ATTRIBUTION_BASE_REF:-}" + if [[ -z "$base_ref" ]]; then + for candidate in origin/main main origin/master master; do + if git rev-parse --verify --quiet "$candidate" >/dev/null; then + base_ref="$candidate" + break + fi + done + fi + + if [[ -n "$base_ref" ]] && git rev-parse --verify --quiet "$base_ref" >/dev/null; then + while IFS= read -r sha; do + [[ -z "$sha" ]] && continue + body="$(git log -1 --pretty=format:%B "$sha")" + for pattern in "${patterns[@]}"; do + # Emit one prefixed line per matching message line. + if matched="$(printf '%s\n' "$body" | rg --pcre2 --no-line-number --no-filename "$pattern" 2>/dev/null)"; then + while IFS= read -r line; do + [[ -z "$line" ]] && continue + printf '[commit-msg %s] %s\n' "$sha" "$line" >> "$tmp" + done <<< "$matched" + fi + done + done < <(git log "$base_ref..HEAD" --pretty=tformat:%H 2>/dev/null || true) + fi +fi + if [[ ! -s "$tmp" ]]; then echo "AI attribution guard passed." exit 0 From 0163b36ca382aa6e1e4bdc41af0bb5f6274e6652 Mon Sep 17 00:00:00 2001 From: Val Alexander <68980965+BunsDev@users.noreply.github.com> Date: Wed, 20 May 2026 20:21:01 -0500 Subject: [PATCH 2/2] fix: harden AI_ATTRIBUTION_BASE_REF against option injection --- script/check_ai_attribution | 39 ++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/script/check_ai_attribution b/script/check_ai_attribution index 3bf37b06..5a45ad29 100755 --- a/script/check_ai_attribution +++ b/script/check_ai_attribution @@ -52,20 +52,31 @@ if git rev-parse --git-dir >/dev/null 2>&1; then done fi - if [[ -n "$base_ref" ]] && git rev-parse --verify --quiet "$base_ref" >/dev/null; then - while IFS= read -r sha; do - [[ -z "$sha" ]] && continue - body="$(git log -1 --pretty=format:%B "$sha")" - for pattern in "${patterns[@]}"; do - # Emit one prefixed line per matching message line. - if matched="$(printf '%s\n' "$body" | rg --pcre2 --no-line-number --no-filename "$pattern" 2>/dev/null)"; then - while IFS= read -r line; do - [[ -z "$line" ]] && continue - printf '[commit-msg %s] %s\n' "$sha" "$line" >> "$tmp" - done <<< "$matched" - fi - done - done < <(git log "$base_ref..HEAD" --pretty=tformat:%H 2>/dev/null || true) + if [[ -n "$base_ref" ]]; then + # Reject refs that start with '-' to prevent option injection + if [[ "$base_ref" == -* ]]; then + echo "AI_ATTRIBUTION_BASE_REF must not start with '-': $base_ref" >&2 + exit 1 + fi + + # Resolve to a commit SHA to avoid option injection + base_sha=$(git rev-parse --verify --quiet -- "${base_ref}^{commit}" 2>/dev/null) || base_sha="" + + if [[ -n "$base_sha" ]]; then + while IFS= read -r sha; do + [[ -z "$sha" ]] && continue + body="$(git log -1 --pretty=format:%B "$sha")" + for pattern in "${patterns[@]}"; do + # Emit one prefixed line per matching message line. + if matched="$(printf '%s\n' "$body" | rg --pcre2 --no-line-number --no-filename "$pattern" 2>/dev/null)"; then + while IFS= read -r line; do + [[ -z "$line" ]] && continue + printf '[commit-msg %s] %s\n' "$sha" "$line" >> "$tmp" + done <<< "$matched" + fi + done + done < <(git log "$base_sha..HEAD" --pretty=tformat:%H 2>/dev/null || true) + fi fi fi