From 2bc1ff471324f7197ab499de363d4cf6999e7dcb Mon Sep 17 00:00:00 2001 From: agi-bootstrap Date: Thu, 26 Mar 2026 23:34:27 -0700 Subject: [PATCH] feat: add Gemini CLI as second opinion backend for /codex The /codex skill now supports both OpenAI Codex CLI and Google Gemini CLI as the second opinion provider. Backend is auto-detected at runtime (codex first, then gemini) or explicitly configured via: gstack-config set second_opinion_backend gemini|codex|auto New bin/gstack-second-opinion dispatcher centralizes all CLI-specific logic so resolvers and templates stay backend-agnostic. Supports exec, review, resume, detect, and name subcommands. All resolvers (review.ts, design.ts, constants.ts) and the codex/SKILL.md.tmpl updated to use the dispatcher. Generated SKILL.md files regenerated. Follow-up: autoplan/SKILL.md.tmpl still has inline codex exec calls for dual-voice sections that need separate migration. Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/gstack-second-opinion | 244 +++++++++++++++++++++++++++ codex/SKILL.md | 293 ++++++++++++--------------------- codex/SKILL.md.tmpl | 283 ++++++++++++------------------- design-consultation/SKILL.md | 31 ++-- design-review/SKILL.md | 29 ++-- office-hours/SKILL.md | 50 +++--- plan-ceo-review/SKILL.md | 25 +-- plan-design-review/SKILL.md | 27 +-- plan-eng-review/SKILL.md | 25 +-- review/SKILL.md | 69 ++++---- scripts/resolvers/constants.ts | 25 ++- scripts/resolvers/design.ts | 65 ++++---- scripts/resolvers/review.ts | 112 ++++++------- ship/SKILL.md | 69 ++++---- 14 files changed, 734 insertions(+), 613 deletions(-) create mode 100755 bin/gstack-second-opinion diff --git a/bin/gstack-second-opinion b/bin/gstack-second-opinion new file mode 100755 index 000000000..a763d03eb --- /dev/null +++ b/bin/gstack-second-opinion @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +# gstack-second-opinion — backend-agnostic dispatcher for /codex second opinion +# Supports: OpenAI Codex CLI, Google Gemini CLI +# Usage: gstack-second-opinion [args] + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CONFIG_CMD="$SCRIPT_DIR/gstack-config" + +# ─── Backend Detection ─────────────────────────────────────────────── + +detect_backend() { + local pref + pref=$("$CONFIG_CMD" get second_opinion_backend 2>/dev/null || echo "auto") + [ -z "$pref" ] && pref="auto" + + case "$pref" in + codex) + if command -v codex &>/dev/null; then + echo "codex" + else + echo "ERROR: second_opinion_backend is set to 'codex' but codex is not installed." >&2 + echo "Install: npm install -g @openai/codex" >&2 + echo "none" + fi + ;; + gemini) + if command -v gemini &>/dev/null; then + echo "gemini" + else + echo "ERROR: second_opinion_backend is set to 'gemini' but gemini is not installed." >&2 + echo "Install: npm install -g @google/gemini-cli" >&2 + echo "none" + fi + ;; + auto|*) + if command -v codex &>/dev/null; then + echo "codex" + elif command -v gemini &>/dev/null; then + echo "gemini" + else + echo "none" + fi + ;; + esac +} + +backend_name() { + local backend + backend=$(detect_backend) + case "$backend" in + codex) echo "CODEX" ;; + gemini) echo "GEMINI" ;; + *) echo "NONE" ;; + esac +} + +# ─── Exec Subcommand ───────────────────────────────────────────────── + +do_exec() { + local prompt="" + local effort="high" + local web_search=false + local model="" + local json_output=false + + while [[ $# -gt 0 ]]; do + case "$1" in + --effort) effort="$2"; shift 2 ;; + --web-search) web_search=true; shift ;; + -m) model="$2"; shift 2 ;; + --json) json_output=true; shift ;; + *) prompt="$1"; shift ;; + esac + done + + local backend + backend=$(detect_backend) + local repo_root + repo_root=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + + case "$backend" in + codex) + local cmd=(codex exec "$prompt" -C "$repo_root" -s read-only) + cmd+=(-c "model_reasoning_effort=\"$effort\"") + [ "$web_search" = true ] && cmd+=(--enable web_search_cached) + [ "$json_output" = true ] && cmd+=(--json) + [ -n "$model" ] && cmd+=(-m "$model") + "${cmd[@]}" + ;; + gemini) + local cmd=(gemini -p "$prompt" --approval-mode plan) + if [ "$json_output" = true ]; then + cmd+=(-o stream-json) + else + cmd+=(-o text) + fi + [ -n "$model" ] && cmd+=(-m "$model") + "${cmd[@]}" + ;; + none) + echo "ERROR: No second opinion CLI found." >&2 + echo "Install one of:" >&2 + echo " npm install -g @openai/codex" >&2 + echo " npm install -g @google/gemini-cli" >&2 + return 1 + ;; + esac +} + +# ─── Review Subcommand ─────────────────────────────────────────────── + +do_review() { + local base="" + local effort="high" + local web_search=false + local model="" + local instructions="" + + while [[ $# -gt 0 ]]; do + case "$1" in + --base) base="$2"; shift 2 ;; + --effort) effort="$2"; shift 2 ;; + --web-search) web_search=true; shift ;; + -m) model="$2"; shift 2 ;; + *) instructions="$1"; shift ;; + esac + done + + local backend + backend=$(detect_backend) + local repo_root + repo_root=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + + case "$backend" in + codex) + local cmd=(codex review --base "$base") + cmd+=(-c "model_reasoning_effort=\"$effort\"") + [ "$web_search" = true ] && cmd+=(--enable web_search_cached) + [ -n "$model" ] && cmd+=(-m "$model") + [ -n "$instructions" ] && cmd=("${cmd[@]:0:2}" "$instructions" "${cmd[@]:2}") + cd "$repo_root" && "${cmd[@]}" + ;; + gemini) + # Gemini has no built-in review command. Construct the prompt with the diff. + local diff_content + diff_content=$(git diff "origin/$base" 2>/dev/null || git diff "$base" 2>/dev/null || echo "(no diff available)") + + # Truncate large diffs + local diff_lines + diff_lines=$(echo "$diff_content" | wc -l | tr -d ' ') + if [ "$diff_lines" -gt 500 ]; then + diff_content=$(echo "$diff_content" | head -500) + diff_content="$diff_content + +[TRUNCATED: showing first 500 of $diff_lines lines]" + fi + + local review_prompt="You are a senior code reviewer. Review this git diff. For each issue found, classify it: +- [P1] Critical: bugs, security holes, data loss risks, race conditions +- [P2] Important: performance issues, missing error handling, code smells, maintainability + +Be direct. Be terse. No compliments. Just the problems. +If the code is clean, say so in one line." + + [ -n "$instructions" ] && review_prompt="$review_prompt + +Additional focus: $instructions" + + review_prompt="$review_prompt + +GIT DIFF: +$diff_content" + + local cmd=(gemini -p "$review_prompt" --approval-mode plan -o text) + [ -n "$model" ] && cmd+=(-m "$model") + "${cmd[@]}" + ;; + none) + echo "ERROR: No second opinion CLI found." >&2 + echo "Install one of:" >&2 + echo " npm install -g @openai/codex" >&2 + echo " npm install -g @google/gemini-cli" >&2 + return 1 + ;; + esac +} + +# ─── Resume Subcommand ─────────────────────────────────────────────── + +do_resume() { + local prompt="" + local effort="medium" + local model="" + local session_id="" + + while [[ $# -gt 0 ]]; do + case "$1" in + --session) session_id="$2"; shift 2 ;; + --effort) effort="$2"; shift 2 ;; + -m) model="$2"; shift 2 ;; + *) prompt="$1"; shift ;; + esac + done + + local backend + backend=$(detect_backend) + local repo_root + repo_root=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + + case "$backend" in + codex) + local cmd=(codex exec resume "$session_id" "$prompt" -C "$repo_root" -s read-only) + cmd+=(-c "model_reasoning_effort=\"$effort\"") + cmd+=(--enable web_search_cached --json) + [ -n "$model" ] && cmd+=(-m "$model") + "${cmd[@]}" + ;; + gemini) + local cmd=(gemini --resume latest -p "$prompt" --approval-mode plan -o text) + [ -n "$model" ] && cmd+=(-m "$model") + "${cmd[@]}" + ;; + none) + echo "ERROR: No second opinion CLI found." >&2 + return 1 + ;; + esac +} + +# ─── Main Dispatch ──────────────────────────────────────────────────── + +case "${1:-}" in + detect) echo "BACKEND: $(detect_backend)" ;; + name) backend_name ;; + exec) shift; do_exec "$@" ;; + review) shift; do_review "$@" ;; + resume) shift; do_resume "$@" ;; + *) + echo "Usage: gstack-second-opinion [args]" >&2 + exit 1 + ;; +esac diff --git a/codex/SKILL.md b/codex/SKILL.md index 471280374..91986d0a2 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -1,13 +1,14 @@ --- name: codex preamble-tier: 3 -version: 1.0.0 +version: 2.0.0 description: | - OpenAI Codex CLI wrapper — three modes. Code review: independent diff review via - codex review with pass/fail gate. Challenge: adversarial mode that tries to break - your code. Consult: ask codex anything with session continuity for follow-ups. - The "200 IQ autistic developer" second opinion. Use when asked to "codex review", - "codex challenge", "ask codex", "second opinion", or "consult codex". + Multi-AI second opinion — three modes. Code review: independent diff review via + a second AI (Codex or Gemini CLI) with pass/fail gate. Challenge: adversarial mode + that tries to break your code. Consult: ask a different AI anything with session + continuity for follow-ups. The "200 IQ autistic developer" second opinion. + Use when asked to "codex review", "codex challenge", "ask codex", "second opinion", + or "consult codex". allowed-tools: - Bash - Read @@ -363,23 +364,30 @@ branch name wherever the instructions say "the base branch" or ``. # /codex — Multi-AI Second Opinion -You are running the `/codex` skill. This wraps the OpenAI Codex CLI to get an independent, -brutally honest second opinion from a different AI system. +You are running the `/codex` skill. This wraps an external AI CLI (OpenAI Codex or +Google Gemini, auto-detected) to get an independent, brutally honest second opinion +from a different AI system. -Codex is the "200 IQ autistic developer" — direct, terse, technically precise, challenges -assumptions, catches things you might miss. Present its output faithfully, not summarized. +The second opinion is the "200 IQ autistic developer" — direct, terse, technically +precise, challenges assumptions, catches things you might miss. Present its output +faithfully, not summarized. --- -## Step 0: Check codex binary +## Step 0: Detect second opinion backend ```bash -CODEX_BIN=$(which codex 2>/dev/null || echo "") -[ -z "$CODEX_BIN" ] && echo "NOT_FOUND" || echo "FOUND: $CODEX_BIN" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +echo "BACKEND: $_SO_BACKEND NAME: $_SO_NAME" ``` -If `NOT_FOUND`: stop and tell the user: -"Codex CLI not found. Install it: `npm install -g @openai/codex` or see https://github.com/openai/codex" +If `_SO_BACKEND` is `none`: stop and tell the user: +"No second opinion CLI found. Install one: +- OpenAI Codex: `npm install -g @openai/codex` (requires OpenAI subscription) +- Google Gemini: `npm install -g @google/gemini-cli` (free with Google account) + +Then set your preference: `gstack-config set second_opinion_backend gemini` (or `codex` or `auto`)" --- @@ -394,7 +402,7 @@ Parse the user's input to determine which mode to run: `git diff origin/ --stat 2>/dev/null | tail -1 || git diff --stat 2>/dev/null | tail -1` - If a diff exists, use AskUserQuestion: ``` - Codex detected changes against the base branch. What should it do? + $_SO_NAME detected changes against the base branch. What should it do? A) Review the diff (code review with pass/fail gate) B) Challenge the diff (adversarial — try to break it) C) Something else — I'll provide a prompt @@ -404,48 +412,43 @@ Parse the user's input to determine which mode to run: If no project-scoped match, fall back to: `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1` but warn the user: "Note: this plan may be from a different project." - If a plan file exists, offer to review it - - Otherwise, ask: "What would you like to ask Codex?" + - Otherwise, ask: "What would you like to ask $_SO_NAME?" 4. `/codex ` — **Consult mode** (Step 2C), where the remaining text is the prompt -**Reasoning effort override:** If the user's input contains `--xhigh` anywhere, -note it and remove it from the prompt text before passing to Codex. When `--xhigh` -is present, use `model_reasoning_effort="xhigh"` for all modes regardless of the -per-mode default below. Otherwise, use the per-mode defaults: -- Review (2A): `high` — bounded diff input, needs thoroughness -- Challenge (2B): `high` — adversarial but bounded by diff -- Consult (2C): `medium` — large context, interactive, needs speed +**Reasoning effort override (Codex only):** If the user's input contains `--xhigh` anywhere, +note it and remove it from the prompt text. When `--xhigh` is present, use +`--effort xhigh` for all modes. Otherwise, use the per-mode defaults: +- Review (2A): `high` +- Challenge (2B): `high` +- Consult (2C): `medium` + +Note: reasoning effort flags only apply to Codex backend. They are silently ignored for Gemini. --- ## Step 2A: Review Mode -Run Codex code review against the current branch diff. +Run second opinion code review against the current branch diff. 1. Create temp files for output capture: ```bash -TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) +TMPERR=$(mktemp /tmp/so-err-XXXXXX.txt) ``` 2. Run the review (5-minute timeout): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search 2>"$TMPERR" ``` -If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. - Use `timeout: 300000` on the Bash call. If the user provided custom instructions -(e.g., `/codex review focus on security`), pass them as the prompt argument: +(e.g., `/codex review focus on security`), pass them as the last argument: ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review "focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search "focus on security" 2>"$TMPERR" ``` -3. Capture the output. Then parse cost from stderr: +3. Capture the output. Then read stderr: ```bash -grep "tokens used" "$TMPERR" 2>/dev/null || echo "tokens: unknown" +cat "$TMPERR" 2>/dev/null ``` 4. Determine gate verdict by checking the review output for critical findings. @@ -455,11 +458,11 @@ grep "tokens used" "$TMPERR" 2>/dev/null || echo "tokens: unknown" 5. Present the output: ``` -CODEX SAYS (code review): +$_SO_NAME SAYS (code review): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -GATE: PASS Tokens: 14,331 | Est. cost: ~$0.12 +GATE: PASS ``` or @@ -473,8 +476,8 @@ GATE: FAIL (N critical findings) ``` CROSS-MODEL ANALYSIS: - Both found: [findings that overlap between Claude and Codex] - Only Codex found: [findings unique to Codex] + Both found: [findings that overlap between Claude and $_SO_NAME] + Only $_SO_NAME found: [findings unique to the second opinion] Only Claude found: [findings unique to Claude's /review] Agreement rate: X% (N/M total unique findings overlap) ``` @@ -564,8 +567,8 @@ plan's living status. ## Step 2B: Challenge (Adversarial) Mode -Codex tries to break your code — finding edge cases, race conditions, security holes, -and failure modes that a normal review would miss. +The second opinion tries to break your code — finding edge cases, race conditions, +security holes, and failure modes that a normal review would miss. 1. Construct the adversarial prompt. If the user provided a focus area (e.g., `/codex challenge security`), include it: @@ -576,75 +579,51 @@ Default prompt (no focus): With focus (e.g., "security"): "Review the changes on this branch against the base branch. Run `git diff origin/` to see the diff. Focus specifically on SECURITY. Your job is to find every way an attacker could exploit this code. Think about injection vectors, auth bypasses, privilege escalation, data exposure, and timing attacks. Be adversarial." -2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout): - -If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. +2. Run the second opinion (5-minute timeout): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>/dev/null | PYTHONUNBUFFERED=1 python3 -u -c " -import sys, json -for line in sys.stdin: - line = line.strip() - if not line: continue - try: - obj = json.loads(line) - t = obj.get('type','') - if t == 'item.completed' and 'item' in obj: - item = obj['item'] - itype = item.get('type','') - text = item.get('text','') - if itype == 'reasoning' and text: - print(f'[codex thinking] {text}', flush=True) - print(flush=True) - elif itype == 'agent_message' and text: - print(text, flush=True) - elif itype == 'command_execution': - cmd = item.get('command','') - if cmd: print(f'[codex ran] {cmd}', flush=True) - elif t == 'turn.completed': - usage = obj.get('usage',{}) - tokens = usage.get('input_tokens',0) + usage.get('output_tokens',0) - if tokens: print(f'\ntokens used: {tokens}', flush=True) - except: pass -" -``` - -This parses codex's JSONL events to extract reasoning traces, tool calls, and the final -response. The `[codex thinking]` lines show what codex reasoned through before its answer. - -3. Present the full streamed output: - -``` -CODEX SAYS (adversarial challenge): +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort high --web-search 2>/dev/null +``` + +Use `timeout: 300000` on the Bash call. + +3. Present the full output: + +``` +$_SO_NAME SAYS (adversarial challenge): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -Tokens: N | Est. cost: ~$X.XX ``` --- ## Step 2C: Consult Mode -Ask Codex anything about the codebase. Supports session continuity for follow-ups. +Ask the second opinion anything about the codebase. Supports session continuity for follow-ups. 1. **Check for existing session:** + +For Codex backend: ```bash -cat .context/codex-session-id 2>/dev/null || echo "NO_SESSION" +cat .context/second-opinion-session-id 2>/dev/null || echo "NO_SESSION" ``` -If a session file exists (not `NO_SESSION`), use AskUserQuestion: +For Gemini backend: +```bash +gemini --list-sessions 2>/dev/null | head -5 || echo "NO_SESSIONS" ``` -You have an active Codex conversation from earlier. Continue it or start fresh? -A) Continue the conversation (Codex remembers the prior context) + +If a session exists, use AskUserQuestion: +``` +You have an active $_SO_NAME conversation from earlier. Continue it or start fresh? +A) Continue the conversation ($_SO_NAME remembers the prior context) B) Start a new conversation ``` 2. Create temp files: ```bash -TMPRESP=$(mktemp /tmp/codex-resp-XXXXXX.txt) -TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) +TMPERR=$(mktemp /tmp/so-err-XXXXXX.txt) ``` 3. **Plan review auto-detection:** If the user's prompt is about reviewing a plan, @@ -653,17 +632,16 @@ or if plan files exist and the user said `/codex` with no arguments: ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1 ``` If no project-scoped match, fall back to `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1` -but warn: "Note: this plan may be from a different project — verify before sending to Codex." +but warn: "Note: this plan may be from a different project — verify before sending." -**IMPORTANT — embed content, don't reference path:** Codex runs sandboxed to the repo -root (`-C`) and cannot access `~/.claude/plans/` or any files outside the repo. You MUST -read the plan file yourself and embed its FULL CONTENT in the prompt below. Do NOT tell -Codex the file path or ask it to read the plan file — it will waste 10+ tool calls -searching and fail. +**IMPORTANT — embed content, don't reference path:** The second opinion CLI runs +sandboxed and cannot access `~/.claude/plans/` or files outside the repo. You MUST +read the plan file yourself and embed its FULL CONTENT in the prompt below. Do NOT +tell it the file path or ask it to read the plan file. Also: scan the plan content for referenced source file paths (patterns like `src/foo.ts`, `lib/bar.py`, paths containing `/` that exist in the repo). If found, list them in the -prompt so Codex reads them directly instead of discovering them via rg/find. +prompt so the second opinion reads them directly. Prepend the persona to the user's prompt: "You are a brutally honest technical reviewer. Review this plan for: logical gaps and @@ -675,129 +653,72 @@ Also review these source files referenced in the plan: " -4. Run codex exec with **JSONL output** to capture reasoning traces (5-minute timeout): - -If the user passed `--xhigh`, use `"xhigh"` instead of `"medium"`. +4. Run the second opinion (5-minute timeout): For a **new session:** ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " -import sys, json -for line in sys.stdin: - line = line.strip() - if not line: continue - try: - obj = json.loads(line) - t = obj.get('type','') - if t == 'thread.started': - tid = obj.get('thread_id','') - if tid: print(f'SESSION_ID:{tid}', flush=True) - elif t == 'item.completed' and 'item' in obj: - item = obj['item'] - itype = item.get('type','') - text = item.get('text','') - if itype == 'reasoning' and text: - print(f'[codex thinking] {text}', flush=True) - print(flush=True) - elif itype == 'agent_message' and text: - print(text, flush=True) - elif itype == 'command_execution': - cmd = item.get('command','') - if cmd: print(f'[codex ran] {cmd}', flush=True) - elif t == 'turn.completed': - usage = obj.get('usage',{}) - tokens = usage.get('input_tokens',0) + usage.get('output_tokens',0) - if tokens: print(f'\ntokens used: {tokens}', flush=True) - except: pass -" +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort medium --web-search 2>"$TMPERR" ``` For a **resumed session** (user chose "Continue"): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec resume "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " - -" +~/.claude/skills/gstack/bin/gstack-second-opinion resume "" --effort medium 2>"$TMPERR" ``` -5. Capture session ID from the streamed output. The parser prints `SESSION_ID:` - from the `thread.started` event. Save it for follow-ups: +5. If using Codex backend with `--json` output, capture session ID from the streamed output. +Save it for follow-ups: ```bash mkdir -p .context ``` -Save the session ID printed by the parser (the line starting with `SESSION_ID:`) -to `.context/codex-session-id`. +Save the session ID to `.context/second-opinion-session-id`. -6. Present the full streamed output: +6. Present the full output: ``` -CODEX SAYS (consult): +$_SO_NAME SAYS (consult): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -Tokens: N | Est. cost: ~$X.XX Session saved — run /codex again to continue this conversation. ``` -7. After presenting, note any points where Codex's analysis differs from your own - understanding. If there is a disagreement, flag it: +7. After presenting, note any points where the second opinion's analysis differs from + your own understanding. If there is a disagreement, flag it: "Note: Claude Code disagrees on X because Y." --- -## Model & Reasoning - -**Model:** No model is hardcoded — codex uses whatever its current default is (the frontier -agentic coding model). This means as OpenAI ships newer models, /codex automatically -uses them. If the user wants a specific model, pass `-m` through to codex. - -**Reasoning effort (per-mode defaults):** -- **Review (2A):** `high` — bounded diff input, needs thoroughness but not max tokens -- **Challenge (2B):** `high` — adversarial but bounded by diff size -- **Consult (2C):** `medium` — large context (plans, codebase), interactive, needs speed - -`xhigh` uses ~23x more tokens than `high` and causes 50+ minute hangs on large context -tasks (OpenAI issues #8545, #8402, #6931). Users can override with `--xhigh` flag -(e.g., `/codex review --xhigh`) when they want maximum reasoning and are willing to wait. - -**Web search:** All codex commands use `--enable web_search_cached` so Codex can look up -docs and APIs during review. This is OpenAI's cached index — fast, no extra cost. - -If the user specifies a model (e.g., `/codex review -m gpt-5.1-codex-max` -or `/codex challenge -m gpt-5.2`), pass the `-m` flag through to codex. - ---- - -## Cost Estimation - -Parse token count from stderr. Codex prints `tokens used\nN` to stderr. +## Model -Display as: `Tokens: N` +**Model:** No model is hardcoded — the second opinion CLI uses whatever its current +default is (the frontier model for that provider). This means as providers ship newer +models, /codex automatically uses them. If the user wants a specific model, pass `-m` +through to the CLI. -If token count is not available, display: `Tokens: unknown` +If the user specifies a model (e.g., `/codex review -m gemini-2.5-pro` +or `/codex challenge -m gpt-5.2`), pass the `-m` flag through. --- ## Error Handling -- **Binary not found:** Detected in Step 0. Stop with install instructions. -- **Auth error:** Codex prints an auth error to stderr. Surface the error: - "Codex authentication failed. Run `codex login` in your terminal to authenticate via ChatGPT." +- **Binary not found:** Detected in Step 0. Stop with install instructions for both CLIs. +- **Auth error:** Surface the error with backend-specific guidance: + - Codex: "Run `codex login` in your terminal to authenticate via ChatGPT." + - Gemini: "Run `gemini` interactively in your terminal to authenticate via Google." - **Timeout:** If the Bash call times out (5 min), tell the user: - "Codex timed out after 5 minutes. The diff may be too large or the API may be slow. Try again or use a smaller scope." -- **Empty response:** If `$TMPRESP` is empty or doesn't exist, tell the user: - "Codex returned no response. Check stderr for errors." + "$_SO_NAME timed out after 5 minutes. The diff may be too large or the API may be slow. Try again or use a smaller scope." +- **Empty response:** "$_SO_NAME returned no response. Check stderr for errors." - **Session resume failure:** If resume fails, delete the session file and start fresh. --- ## Important Rules -- **Never modify files.** This skill is read-only. Codex runs in read-only sandbox mode. -- **Present output verbatim.** Do not truncate, summarize, or editorialize Codex's output - before showing it. Show it in full inside the CODEX SAYS block. +- **Never modify files.** This skill is read-only. The second opinion runs in read-only mode. +- **Present output verbatim.** Do not truncate, summarize, or editorialize the output + before showing it. Show it in full inside the $_SO_NAME SAYS block. - **Add synthesis after, not instead of.** Any Claude commentary comes after the full output. -- **5-minute timeout** on all Bash calls to codex (`timeout: 300000`). -- **No double-reviewing.** If the user already ran `/review`, Codex provides a second - independent opinion. Do not re-run Claude Code's own review. +- **5-minute timeout** on all Bash calls (`timeout: 300000`). +- **No double-reviewing.** If the user already ran `/review`, the second opinion provides + an independent opinion. Do not re-run Claude Code's own review. diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 60247abd9..a94c01b4a 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -1,13 +1,14 @@ --- name: codex preamble-tier: 3 -version: 1.0.0 +version: 2.0.0 description: | - OpenAI Codex CLI wrapper — three modes. Code review: independent diff review via - codex review with pass/fail gate. Challenge: adversarial mode that tries to break - your code. Consult: ask codex anything with session continuity for follow-ups. - The "200 IQ autistic developer" second opinion. Use when asked to "codex review", - "codex challenge", "ask codex", "second opinion", or "consult codex". + Multi-AI second opinion — three modes. Code review: independent diff review via + a second AI (Codex or Gemini CLI) with pass/fail gate. Challenge: adversarial mode + that tries to break your code. Consult: ask a different AI anything with session + continuity for follow-ups. The "200 IQ autistic developer" second opinion. + Use when asked to "codex review", "codex challenge", "ask codex", "second opinion", + or "consult codex". allowed-tools: - Bash - Read @@ -23,23 +24,30 @@ allowed-tools: # /codex — Multi-AI Second Opinion -You are running the `/codex` skill. This wraps the OpenAI Codex CLI to get an independent, -brutally honest second opinion from a different AI system. +You are running the `/codex` skill. This wraps an external AI CLI (OpenAI Codex or +Google Gemini, auto-detected) to get an independent, brutally honest second opinion +from a different AI system. -Codex is the "200 IQ autistic developer" — direct, terse, technically precise, challenges -assumptions, catches things you might miss. Present its output faithfully, not summarized. +The second opinion is the "200 IQ autistic developer" — direct, terse, technically +precise, challenges assumptions, catches things you might miss. Present its output +faithfully, not summarized. --- -## Step 0: Check codex binary +## Step 0: Detect second opinion backend ```bash -CODEX_BIN=$(which codex 2>/dev/null || echo "") -[ -z "$CODEX_BIN" ] && echo "NOT_FOUND" || echo "FOUND: $CODEX_BIN" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +echo "BACKEND: $_SO_BACKEND NAME: $_SO_NAME" ``` -If `NOT_FOUND`: stop and tell the user: -"Codex CLI not found. Install it: `npm install -g @openai/codex` or see https://github.com/openai/codex" +If `_SO_BACKEND` is `none`: stop and tell the user: +"No second opinion CLI found. Install one: +- OpenAI Codex: `npm install -g @openai/codex` (requires OpenAI subscription) +- Google Gemini: `npm install -g @google/gemini-cli` (free with Google account) + +Then set your preference: `gstack-config set second_opinion_backend gemini` (or `codex` or `auto`)" --- @@ -54,7 +62,7 @@ Parse the user's input to determine which mode to run: `git diff origin/ --stat 2>/dev/null | tail -1 || git diff --stat 2>/dev/null | tail -1` - If a diff exists, use AskUserQuestion: ``` - Codex detected changes against the base branch. What should it do? + $_SO_NAME detected changes against the base branch. What should it do? A) Review the diff (code review with pass/fail gate) B) Challenge the diff (adversarial — try to break it) C) Something else — I'll provide a prompt @@ -64,48 +72,43 @@ Parse the user's input to determine which mode to run: If no project-scoped match, fall back to: `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1` but warn the user: "Note: this plan may be from a different project." - If a plan file exists, offer to review it - - Otherwise, ask: "What would you like to ask Codex?" + - Otherwise, ask: "What would you like to ask $_SO_NAME?" 4. `/codex ` — **Consult mode** (Step 2C), where the remaining text is the prompt -**Reasoning effort override:** If the user's input contains `--xhigh` anywhere, -note it and remove it from the prompt text before passing to Codex. When `--xhigh` -is present, use `model_reasoning_effort="xhigh"` for all modes regardless of the -per-mode default below. Otherwise, use the per-mode defaults: -- Review (2A): `high` — bounded diff input, needs thoroughness -- Challenge (2B): `high` — adversarial but bounded by diff -- Consult (2C): `medium` — large context, interactive, needs speed +**Reasoning effort override (Codex only):** If the user's input contains `--xhigh` anywhere, +note it and remove it from the prompt text. When `--xhigh` is present, use +`--effort xhigh` for all modes. Otherwise, use the per-mode defaults: +- Review (2A): `high` +- Challenge (2B): `high` +- Consult (2C): `medium` + +Note: reasoning effort flags only apply to Codex backend. They are silently ignored for Gemini. --- ## Step 2A: Review Mode -Run Codex code review against the current branch diff. +Run second opinion code review against the current branch diff. 1. Create temp files for output capture: ```bash -TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) +TMPERR=$(mktemp /tmp/so-err-XXXXXX.txt) ``` 2. Run the review (5-minute timeout): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search 2>"$TMPERR" ``` -If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. - Use `timeout: 300000` on the Bash call. If the user provided custom instructions -(e.g., `/codex review focus on security`), pass them as the prompt argument: +(e.g., `/codex review focus on security`), pass them as the last argument: ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review "focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search "focus on security" 2>"$TMPERR" ``` -3. Capture the output. Then parse cost from stderr: +3. Capture the output. Then read stderr: ```bash -grep "tokens used" "$TMPERR" 2>/dev/null || echo "tokens: unknown" +cat "$TMPERR" 2>/dev/null ``` 4. Determine gate verdict by checking the review output for critical findings. @@ -115,11 +118,11 @@ grep "tokens used" "$TMPERR" 2>/dev/null || echo "tokens: unknown" 5. Present the output: ``` -CODEX SAYS (code review): +$_SO_NAME SAYS (code review): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -GATE: PASS Tokens: 14,331 | Est. cost: ~$0.12 +GATE: PASS ``` or @@ -133,8 +136,8 @@ GATE: FAIL (N critical findings) ``` CROSS-MODEL ANALYSIS: - Both found: [findings that overlap between Claude and Codex] - Only Codex found: [findings unique to Codex] + Both found: [findings that overlap between Claude and $_SO_NAME] + Only $_SO_NAME found: [findings unique to the second opinion] Only Claude found: [findings unique to Claude's /review] Agreement rate: X% (N/M total unique findings overlap) ``` @@ -159,8 +162,8 @@ rm -f "$TMPERR" ## Step 2B: Challenge (Adversarial) Mode -Codex tries to break your code — finding edge cases, race conditions, security holes, -and failure modes that a normal review would miss. +The second opinion tries to break your code — finding edge cases, race conditions, +security holes, and failure modes that a normal review would miss. 1. Construct the adversarial prompt. If the user provided a focus area (e.g., `/codex challenge security`), include it: @@ -171,75 +174,51 @@ Default prompt (no focus): With focus (e.g., "security"): "Review the changes on this branch against the base branch. Run `git diff origin/` to see the diff. Focus specifically on SECURITY. Your job is to find every way an attacker could exploit this code. Think about injection vectors, auth bypasses, privilege escalation, data exposure, and timing attacks. Be adversarial." -2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout): - -If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. +2. Run the second opinion (5-minute timeout): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>/dev/null | PYTHONUNBUFFERED=1 python3 -u -c " -import sys, json -for line in sys.stdin: - line = line.strip() - if not line: continue - try: - obj = json.loads(line) - t = obj.get('type','') - if t == 'item.completed' and 'item' in obj: - item = obj['item'] - itype = item.get('type','') - text = item.get('text','') - if itype == 'reasoning' and text: - print(f'[codex thinking] {text}', flush=True) - print(flush=True) - elif itype == 'agent_message' and text: - print(text, flush=True) - elif itype == 'command_execution': - cmd = item.get('command','') - if cmd: print(f'[codex ran] {cmd}', flush=True) - elif t == 'turn.completed': - usage = obj.get('usage',{}) - tokens = usage.get('input_tokens',0) + usage.get('output_tokens',0) - if tokens: print(f'\ntokens used: {tokens}', flush=True) - except: pass -" +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort high --web-search 2>/dev/null ``` -This parses codex's JSONL events to extract reasoning traces, tool calls, and the final -response. The `[codex thinking]` lines show what codex reasoned through before its answer. +Use `timeout: 300000` on the Bash call. -3. Present the full streamed output: +3. Present the full output: ``` -CODEX SAYS (adversarial challenge): +$_SO_NAME SAYS (adversarial challenge): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -Tokens: N | Est. cost: ~$X.XX ``` --- ## Step 2C: Consult Mode -Ask Codex anything about the codebase. Supports session continuity for follow-ups. +Ask the second opinion anything about the codebase. Supports session continuity for follow-ups. 1. **Check for existing session:** + +For Codex backend: ```bash -cat .context/codex-session-id 2>/dev/null || echo "NO_SESSION" +cat .context/second-opinion-session-id 2>/dev/null || echo "NO_SESSION" ``` -If a session file exists (not `NO_SESSION`), use AskUserQuestion: +For Gemini backend: +```bash +gemini --list-sessions 2>/dev/null | head -5 || echo "NO_SESSIONS" +``` + +If a session exists, use AskUserQuestion: ``` -You have an active Codex conversation from earlier. Continue it or start fresh? -A) Continue the conversation (Codex remembers the prior context) +You have an active $_SO_NAME conversation from earlier. Continue it or start fresh? +A) Continue the conversation ($_SO_NAME remembers the prior context) B) Start a new conversation ``` 2. Create temp files: ```bash -TMPRESP=$(mktemp /tmp/codex-resp-XXXXXX.txt) -TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) +TMPERR=$(mktemp /tmp/so-err-XXXXXX.txt) ``` 3. **Plan review auto-detection:** If the user's prompt is about reviewing a plan, @@ -248,17 +227,16 @@ or if plan files exist and the user said `/codex` with no arguments: ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1 ``` If no project-scoped match, fall back to `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1` -but warn: "Note: this plan may be from a different project — verify before sending to Codex." +but warn: "Note: this plan may be from a different project — verify before sending." -**IMPORTANT — embed content, don't reference path:** Codex runs sandboxed to the repo -root (`-C`) and cannot access `~/.claude/plans/` or any files outside the repo. You MUST -read the plan file yourself and embed its FULL CONTENT in the prompt below. Do NOT tell -Codex the file path or ask it to read the plan file — it will waste 10+ tool calls -searching and fail. +**IMPORTANT — embed content, don't reference path:** The second opinion CLI runs +sandboxed and cannot access `~/.claude/plans/` or files outside the repo. You MUST +read the plan file yourself and embed its FULL CONTENT in the prompt below. Do NOT +tell it the file path or ask it to read the plan file. Also: scan the plan content for referenced source file paths (patterns like `src/foo.ts`, `lib/bar.py`, paths containing `/` that exist in the repo). If found, list them in the -prompt so Codex reads them directly instead of discovering them via rg/find. +prompt so the second opinion reads them directly. Prepend the persona to the user's prompt: "You are a brutally honest technical reviewer. Review this plan for: logical gaps and @@ -270,129 +248,72 @@ Also review these source files referenced in the plan: " -4. Run codex exec with **JSONL output** to capture reasoning traces (5-minute timeout): - -If the user passed `--xhigh`, use `"xhigh"` instead of `"medium"`. +4. Run the second opinion (5-minute timeout): For a **new session:** ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " -import sys, json -for line in sys.stdin: - line = line.strip() - if not line: continue - try: - obj = json.loads(line) - t = obj.get('type','') - if t == 'thread.started': - tid = obj.get('thread_id','') - if tid: print(f'SESSION_ID:{tid}', flush=True) - elif t == 'item.completed' and 'item' in obj: - item = obj['item'] - itype = item.get('type','') - text = item.get('text','') - if itype == 'reasoning' and text: - print(f'[codex thinking] {text}', flush=True) - print(flush=True) - elif itype == 'agent_message' and text: - print(text, flush=True) - elif itype == 'command_execution': - cmd = item.get('command','') - if cmd: print(f'[codex ran] {cmd}', flush=True) - elif t == 'turn.completed': - usage = obj.get('usage',{}) - tokens = usage.get('input_tokens',0) + usage.get('output_tokens',0) - if tokens: print(f'\ntokens used: {tokens}', flush=True) - except: pass -" +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort medium --web-search 2>"$TMPERR" ``` For a **resumed session** (user chose "Continue"): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec resume "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " - -" +~/.claude/skills/gstack/bin/gstack-second-opinion resume "" --effort medium 2>"$TMPERR" ``` -5. Capture session ID from the streamed output. The parser prints `SESSION_ID:` - from the `thread.started` event. Save it for follow-ups: +5. If using Codex backend with `--json` output, capture session ID from the streamed output. +Save it for follow-ups: ```bash mkdir -p .context ``` -Save the session ID printed by the parser (the line starting with `SESSION_ID:`) -to `.context/codex-session-id`. +Save the session ID to `.context/second-opinion-session-id`. -6. Present the full streamed output: +6. Present the full output: ``` -CODEX SAYS (consult): +$_SO_NAME SAYS (consult): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ -Tokens: N | Est. cost: ~$X.XX Session saved — run /codex again to continue this conversation. ``` -7. After presenting, note any points where Codex's analysis differs from your own - understanding. If there is a disagreement, flag it: +7. After presenting, note any points where the second opinion's analysis differs from + your own understanding. If there is a disagreement, flag it: "Note: Claude Code disagrees on X because Y." --- -## Model & Reasoning - -**Model:** No model is hardcoded — codex uses whatever its current default is (the frontier -agentic coding model). This means as OpenAI ships newer models, /codex automatically -uses them. If the user wants a specific model, pass `-m` through to codex. - -**Reasoning effort (per-mode defaults):** -- **Review (2A):** `high` — bounded diff input, needs thoroughness but not max tokens -- **Challenge (2B):** `high` — adversarial but bounded by diff size -- **Consult (2C):** `medium` — large context (plans, codebase), interactive, needs speed - -`xhigh` uses ~23x more tokens than `high` and causes 50+ minute hangs on large context -tasks (OpenAI issues #8545, #8402, #6931). Users can override with `--xhigh` flag -(e.g., `/codex review --xhigh`) when they want maximum reasoning and are willing to wait. - -**Web search:** All codex commands use `--enable web_search_cached` so Codex can look up -docs and APIs during review. This is OpenAI's cached index — fast, no extra cost. - -If the user specifies a model (e.g., `/codex review -m gpt-5.1-codex-max` -or `/codex challenge -m gpt-5.2`), pass the `-m` flag through to codex. - ---- - -## Cost Estimation - -Parse token count from stderr. Codex prints `tokens used\nN` to stderr. +## Model -Display as: `Tokens: N` +**Model:** No model is hardcoded — the second opinion CLI uses whatever its current +default is (the frontier model for that provider). This means as providers ship newer +models, /codex automatically uses them. If the user wants a specific model, pass `-m` +through to the CLI. -If token count is not available, display: `Tokens: unknown` +If the user specifies a model (e.g., `/codex review -m gemini-2.5-pro` +or `/codex challenge -m gpt-5.2`), pass the `-m` flag through. --- ## Error Handling -- **Binary not found:** Detected in Step 0. Stop with install instructions. -- **Auth error:** Codex prints an auth error to stderr. Surface the error: - "Codex authentication failed. Run `codex login` in your terminal to authenticate via ChatGPT." +- **Binary not found:** Detected in Step 0. Stop with install instructions for both CLIs. +- **Auth error:** Surface the error with backend-specific guidance: + - Codex: "Run `codex login` in your terminal to authenticate via ChatGPT." + - Gemini: "Run `gemini` interactively in your terminal to authenticate via Google." - **Timeout:** If the Bash call times out (5 min), tell the user: - "Codex timed out after 5 minutes. The diff may be too large or the API may be slow. Try again or use a smaller scope." -- **Empty response:** If `$TMPRESP` is empty or doesn't exist, tell the user: - "Codex returned no response. Check stderr for errors." + "$_SO_NAME timed out after 5 minutes. The diff may be too large or the API may be slow. Try again or use a smaller scope." +- **Empty response:** "$_SO_NAME returned no response. Check stderr for errors." - **Session resume failure:** If resume fails, delete the session file and start fresh. --- ## Important Rules -- **Never modify files.** This skill is read-only. Codex runs in read-only sandbox mode. -- **Present output verbatim.** Do not truncate, summarize, or editorialize Codex's output - before showing it. Show it in full inside the CODEX SAYS block. +- **Never modify files.** This skill is read-only. The second opinion runs in read-only mode. +- **Present output verbatim.** Do not truncate, summarize, or editorialize the output + before showing it. Show it in full inside the $_SO_NAME SAYS block. - **Add synthesis after, not instead of.** Any Claude commentary comes after the full output. -- **5-minute timeout** on all Bash calls to codex (`timeout: 300000`). -- **No double-reviewing.** If the user already ran `/review`, Codex provides a second - independent opinion. Do not re-run Claude Code's own review. +- **5-minute timeout** on all Bash calls (`timeout: 300000`). +- **No double-reviewing.** If the user already ran `/review`, the second opinion provides + an independent opinion. Do not re-run Claude Code's own review. diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 52cef88ac..d16136bc3 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -462,18 +462,19 @@ Use AskUserQuestion: If user chooses B, skip this step and continue. -**Check Codex availability:** +**Check second opinion availability:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a second opinion CLI is available**, launch both voices simultaneously: -1. **Codex design voice** (via Bash): +1. **Second opinion design voice** (via Bash): ```bash -TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Given this product context, propose a complete design direction: +TMPERR_DESIGN=$(mktemp /tmp/so-design-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Given this product context, propose a complete design direction: - Visual thesis: one sentence describing mood, material, and energy - Typography: specific font names (not defaults — no Inter/Roboto/Arial/system) + hex colors - Color system: CSS variables for background, surface, primary text, muted text, accent @@ -481,7 +482,7 @@ codex exec "Given this product context, propose a complete design direction: - Differentiation: 2 deliberate departures from category norms - Anti-slop: no purple gradients, no 3-column icon grids, no centered everything, no decorative blobs -Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_DESIGN" +Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." --effort medium --web-search 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash @@ -498,19 +499,19 @@ Dispatch a subagent with this prompt: Be bold. Be specific. No hedging." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response." +- On any second opinion error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design direction):` header. +Present second opinion output under a `$_SO_NAME SAYS (design direction):` header. Present subagent output under a `CLAUDE SUBAGENT (design direction):` header. **Synthesis:** Claude main references both Codex and subagent proposals in the Phase 3 proposal. Present: -- Areas of agreement between all three voices (Claude main + Codex + subagent) +- Areas of agreement between all three voices (Claude main + second opinion + subagent) - Genuine divergences as creative alternatives for the user to choose from -- "Codex and I agree on X. Codex suggested Y where I'm proposing Z — here's why..." +- "The second opinion and I agree on X. It suggested Y where I'm proposing Z — here's why..." **Log the result:** ```bash diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 2f64917ce..7acff6b22 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -987,18 +987,19 @@ Record baseline design score and AI slop score at end of Phase 6. **Automatic:** Outside voices run automatically when Codex is available. No opt-in needed. -**Check Codex availability:** +**Check second opinion availability:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a second opinion CLI is available**, launch both voices simultaneously: -1. **Codex design voice** (via Bash): +1. **Second opinion design voice** (via Bash): ```bash -TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the frontend source code in this repo. Evaluate against these design hard rules: +TMPERR_DESIGN=$(mktemp /tmp/so-design-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the frontend source code in this repo. Evaluate against these design hard rules: - Spacing: systematic (design tokens / CSS variables) or magic numbers? - Typography: expressive purposeful fonts or default stacks? - Color: CSS variables with defined system, or hardcoded hex scattered? @@ -1027,7 +1028,7 @@ HARD REJECTION — flag if ANY apply: 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout -Be specific. Reference file:line for every finding." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" +Be specific. Reference file:line for every finding." --effort high --web-search 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash @@ -1045,19 +1046,19 @@ Dispatch a subagent with this prompt: For each finding: what's wrong, severity (critical/high/medium), and the file:line." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response." +- On any second opinion error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design source audit):` header. +Present second opinion output under a `$_SO_NAME SAYS (design source audit):` header. Present subagent output under a `CLAUDE SUBAGENT (design consistency):` header. **Synthesis — Litmus scorecard:** Use the same scorecard format as /plan-design-review (shown above). Fill in from both outputs. -Merge findings into the triage with `[codex]` / `[subagent]` / `[cross-model]` tags. +Merge findings into the triage with `[second-opinion]` / `[subagent]` / `[cross-model]` tags. **Log the result:** ```bash diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index bbee02fea..748e54110 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -675,20 +675,22 @@ Use AskUserQuestion to confirm. If the user disagrees with a premise, revise und **Binary check first — no question if unavailable:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +echo "SECOND_OPINION: $_SO_BACKEND ($_SO_NAME)" ``` -If `CODEX_NOT_AVAILABLE`: skip Phase 3.5 entirely — no message, no AskUserQuestion. Proceed directly to Phase 4. +If `_SO_BACKEND` is `none`: skip Phase 3.5 entirely — no message, no AskUserQuestion. Proceed directly to Phase 4. -If `CODEX_AVAILABLE`: use AskUserQuestion: +If a backend is available: use AskUserQuestion: -> Want a second opinion from a different AI model? Codex will independently review your problem statement, key answers, premises, and any landscape findings from this session. It hasn't seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. +> Want a second opinion from a different AI model? $_SO_NAME will independently review your problem statement, key answers, premises, and any landscape findings from this session. It hasn't seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. > A) Yes, get a second opinion > B) No, proceed to alternatives -If B: skip Phase 3.5 entirely. Remember that Codex did NOT run (affects design doc, founder signals, and Phase 4 below). +If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (affects design doc, founder signals, and Phase 4 below). -**If A: Run the Codex cold read.** +**If A: Run the cold read.** 1. Assemble a structured context block from Phases 1-3: - Mode (Startup or Builder) @@ -710,12 +712,11 @@ Write the full prompt (context block + instructions) to this file. Use the mode- **Builder mode instructions:** "You are an independent technical advisor reading a transcript of a builder brainstorming session. [CONTEXT BLOCK HERE]. Your job: 1) What is the COOLEST version of this they haven't considered? 2) What's the ONE thing from their answers that reveals what excites them most? Quote it. 3) What existing open source project or tool gets them 50% of the way there — and what's the 50% they'd need to build? 4) If you had a weekend to build this, what would you build first? Be specific. Be direct. No preamble." -3. Run Codex: +3. Run the second opinion CLI: ```bash -TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +TMPERR_OH=$(mktemp /tmp/so-oh-err-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "$(cat "$CODEX_PROMPT_FILE")" --effort high --web-search 2>"$TMPERR_OH" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -724,19 +725,19 @@ cat "$TMPERR_OH" rm -f "$TMPERR_OH" "$CODEX_PROMPT_FILE" ``` -**Error handling:** All errors are non-blocking — Codex second opinion is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate. Skipping second opinion." -- **Timeout:** "Codex timed out after 5 minutes. Skipping second opinion." -- **Empty response:** "Codex returned no response. Stderr: . Skipping second opinion." +**Error handling:** All errors are non-blocking — the second opinion is a quality enhancement, not a prerequisite. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed. Skipping second opinion." +- **Timeout:** "$_SO_NAME timed out after 5 minutes. Skipping second opinion." +- **Empty response:** "$_SO_NAME returned no response. Stderr: . Skipping second opinion." On any error, proceed to Phase 4 — do NOT fall back to a Claude subagent (this is brainstorming, not adversarial review). 4. **Presentation:** ``` -SECOND OPINION (Codex): +SECOND OPINION ($_SO_NAME): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` @@ -852,29 +853,30 @@ The screenshot file at `/tmp/gstack-sketch.png` can be referenced by downstream After the wireframe is approved, offer outside design perspectives: ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -If Codex is available, use AskUserQuestion: -> "Want outside design perspectives on the chosen approach? Codex proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." +If a second opinion CLI is available, use AskUserQuestion: +> "Want outside design perspectives on the chosen approach? $_SO_NAME proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." > > A) Yes — get outside design voices > B) No — proceed without If user chooses A, launch both voices simultaneously: -1. **Codex** (via Bash, `model_reasoning_effort="medium"`): +1. **Second opinion** (via Bash): ```bash -TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" +TMPERR_SKETCH=$(mktemp /tmp/so-sketch-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." --effort medium --web-search 2>"$TMPERR_SKETCH" ``` Use a 5-minute timeout (`timeout: 300000`). After completion: `cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"` 2. **Claude subagent** (via Agent tool): "For this product approach, what design direction would you recommend? What aesthetic, typography, and interaction patterns fit? What would make this approach feel inevitable to the user? Be specific — font names, hex colors, spacing values." -Present Codex output under `CODEX SAYS (design sketch):` and subagent output under `CLAUDE SUBAGENT (design direction):`. +Present second opinion output under `$_SO_NAME SAYS (design sketch):` and subagent output under `CLAUDE SUBAGENT (design direction):`. Error handling: all non-blocking. On failure, skip and continue. --- diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 675487a20..f8b4be64c 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1048,7 +1048,9 @@ thorough review. **Check tool availability:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` Use AskUserQuestion: @@ -1087,12 +1089,11 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If SO_AVAILABLE:** ```bash -TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" +TMPERR_PV=$(mktemp /tmp/so-planreview-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort high --web-search 2>"$TMPERR_PV" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -1103,20 +1104,20 @@ cat "$TMPERR_PV" Present the full output verbatim: ``` -CODEX SAYS (plan review — outside voice): +$_SO_NAME SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \`codex login\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "$_SO_NAME auth failed." +- Timeout: "$_SO_NAME timed out after 5 minutes." +- Empty response: "$_SO_NAME returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If SO_NOT_AVAILABLE (or second opinion errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 31389bbc4..2e853868c 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -479,18 +479,19 @@ Use AskUserQuestion: If user chooses B, skip this step and continue. -**Check Codex availability:** +**Check second opinion availability:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a second opinion CLI is available**, launch both voices simultaneously: -1. **Codex design voice** (via Bash): +1. **Second opinion design voice** (via Bash): ```bash -TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Read the plan file at [plan-file-path]. Evaluate this plan's UI/UX design against these criteria. +TMPERR_DESIGN=$(mktemp /tmp/so-design-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Read the plan file at [plan-file-path]. Evaluate this plan's UI/UX design against these criteria. HARD REJECTION — flag if ANY apply: 1. Generic SaaS card grid as first impression @@ -515,7 +516,7 @@ HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, the - APP UI: Calm surface hierarchy, dense but readable, utility language, minimal chrome - UNIVERSAL: CSS variables for colors, no default font stacks, one job per section, cards earn existence -For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" +For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." --effort high --web-search 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash @@ -535,13 +536,13 @@ Dispatch a subagent with this prompt: For each finding: what's wrong, severity (critical/high/medium), and the fix." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response." +- On any second opinion error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design critique):` header. +Present second opinion output under a `$_SO_NAME SAYS (design critique):` header. Present subagent output under a `CLAUDE SUBAGENT (design completeness):` header. **Synthesis — Litmus scorecard:** diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 41a29f2b5..8b8b51d4f 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -706,7 +706,9 @@ thorough review. **Check tool availability:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` Use AskUserQuestion: @@ -745,12 +747,11 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If SO_AVAILABLE:** ```bash -TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" +TMPERR_PV=$(mktemp /tmp/so-planreview-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort high --web-search 2>"$TMPERR_PV" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -761,20 +762,20 @@ cat "$TMPERR_PV" Present the full output verbatim: ``` -CODEX SAYS (plan review — outside voice): +$_SO_NAME SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \`codex login\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "$_SO_NAME auth failed." +- Timeout: "$_SO_NAME timed out after 5 minutes." +- Empty response: "$_SO_NAME returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If SO_NOT_AVAILABLE (or second opinion errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. diff --git a/review/SKILL.md b/review/SKILL.md index 05df971d3..67c751fb0 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -611,18 +611,19 @@ source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of `git rev-parse --short HEAD`. -7. **Codex design voice** (optional, automatic if available): +7. **Second opinion design voice** (optional, automatic if available): ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -If Codex is available, run a lightweight design check on the diff: +If a second opinion CLI is available, run a lightweight design check on the diff: ```bash -TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +TMPERR_DRL=$(mktemp /tmp/so-drl-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." --effort high --web-search 2>"$TMPERR_DRL" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -632,7 +633,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a `CODEX (design):` header, merged with the checklist findings above. +Present output under a `$_SO_NAME (design):` header, merged with the checklist findings above. Include any design findings alongside the findings from Step 4. They follow the same Fix-First flow in Step 5 — AUTO-FIX for mechanical CSS fixes, ASK for everything else. @@ -952,10 +953,13 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" +echo "BACKEND: $_SO_BACKEND ($_SO_NAME)" echo "OLD_CFG: ${OLD_CFG:-not_set}" ``` @@ -965,8 +969,8 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. **Auto-select tier based on diff size:** - **Small (< 50 lines changed):** Skip adversarial review entirely. Print: "Small diff ($DIFF_TOTAL lines) — adversarial review skipped." Continue to the next step. -- **Medium (50–199 lines changed):** Run Codex adversarial challenge (or Claude adversarial subagent if Codex unavailable). Jump to the "Medium tier" section. -- **Large (200+ lines changed):** Run all remaining passes — Codex structured review + Claude adversarial subagent + Codex adversarial. Jump to the "Large tier" section. +- **Medium (50–199 lines changed):** Run second opinion adversarial challenge (or Claude adversarial subagent if no second opinion CLI available). Jump to the "Medium tier" section. +- **Large (200+ lines changed):** Run all remaining passes — second opinion structured review + Claude adversarial subagent + second opinion adversarial. Jump to the "Large tier" section. --- @@ -974,14 +978,13 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second opinion CLI is available:** run the adversarial challenge. **If NOT available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Second opinion adversarial:** ```bash -TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" +TMPERR_ADV=$(mktemp /tmp/so-adv-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." --effort high --web-search 2>"$TMPERR_ADV" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: @@ -992,11 +995,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -1013,9 +1016,9 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "$_SO_BACKEND" if second opinion ran, "claude" if subagent ran. If both failed, do NOT persist. -**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). +**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if second opinion CLI was used). --- @@ -1023,26 +1026,24 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Second opinion structured review (if available):** ```bash -TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +TMPERR=$(mktemp /tmp/so-review-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search 2>"$TMPERR" ``` -Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. +Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `$_SO_NAME SAYS (code review):` header. Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. If GATE is FAIL, use AskUserQuestion: ``` -Codex found N critical issues in the diff. +$_SO_NAME found N critical issues in the diff. A) Investigate and fix now (recommended) B) Continue — review will still complete ``` -If A: address the findings. Re-run `codex review` to verify. +If A: address the findings. Re-run the second opinion review to verify. Read stderr for errors (same error handling as medium tier). @@ -1050,15 +1051,15 @@ After stderr: `rm -f "$TMPERR"` **2. Claude adversarial subagent:** Dispatch a subagent with the adversarial prompt (same prompt as medium tier). This always runs regardless of Codex availability. -**3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). +**3. Second opinion adversarial challenge (if available):** Run `gstack-second-opinion exec` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" +If no second opinion CLI is available for steps 1 and 3, note to the user: "No second opinion CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install one for full 4-pass coverage: `npm install -g @openai/codex` or `npm install -g @google/gemini-cli`" **Persist the review result AFTER all passes complete** (not after each sub-step): ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if second opinion CLI ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if second opinion CLI was unavailable. If all passes failed, do NOT persist. --- @@ -1072,8 +1073,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to $_SO_NAME: [from second opinion adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ $_SO_NAME ✓/✗ ════════════════════════════════════════════════════════════ ``` diff --git a/scripts/resolvers/constants.ts b/scripts/resolvers/constants.ts index fa720931a..f7041d164 100644 --- a/scripts/resolvers/constants.ts +++ b/scripts/resolvers/constants.ts @@ -37,14 +37,35 @@ export const OPENAI_LITMUS_CHECKS = [ ]; /** - * Shared Codex error handling block for resolver output. + * Shared second opinion error handling block for resolver output. * Used by ADVERSARIAL_STEP, CODEX_PLAN_REVIEW, CODEX_SECOND_OPINION, * DESIGN_OUTSIDE_VOICES, DESIGN_REVIEW_LITE, DESIGN_SKETCH. */ -export function codexErrorHandling(feature: string): string { +export function secondOpinionErrorHandling(feature: string): string { return `**Error handling:** All errors are non-blocking — the ${feature} is informational. - Auth failure (stderr contains "auth", "login", "unauthorized"): note and skip - Timeout: note timeout duration and skip - Empty response: note and skip On any error: continue — ${feature} is informational, not a gate.`; } + +/** @deprecated Use secondOpinionErrorHandling instead */ +export function codexErrorHandling(feature: string): string { + return secondOpinionErrorHandling(feature); +} + +/** + * Shared second opinion detection snippet for resolver output. + * Detects whether codex or gemini CLI is available, using gstack-second-opinion dispatcher. + */ +export function secondOpinionDetection(binDir: string): string { + return `\`\`\`bash +_SO_BACKEND=$(${binDir}/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(${binDir}/gstack-second-opinion name 2>/dev/null) +echo "SECOND_OPINION_BACKEND: $_SO_BACKEND" +echo "SECOND_OPINION_NAME: $_SO_NAME" +\`\`\` + +If \`SECOND_OPINION_BACKEND\` is \`none\`: skip — no second opinion CLI is available. +Install one of: \\\`npm install -g @openai/codex\\\` or \\\`npm install -g @google/gemini-cli\\\``; +} diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts index a59f516ff..eeea9e1d5 100644 --- a/scripts/resolvers/design.ts +++ b/scripts/resolvers/design.ts @@ -4,21 +4,22 @@ import { AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS } from export function generateDesignReviewLite(ctx: TemplateContext): string { const litmusList = OPENAI_LITMUS_CHECKS.map((item, i) => `${i + 1}. ${item}`).join(' '); const rejectionList = OPENAI_HARD_REJECTIONS.map((item, i) => `${i + 1}. ${item}`).join(' '); - // Codex block only for Claude host + // Second opinion block only for Claude host const codexBlock = ctx.host === 'codex' ? '' : ` -7. **Codex design voice** (optional, automatic if available): +7. **Second opinion design voice** (optional, automatic if available): \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" \`\`\` -If Codex is available, run a lightweight design check on the diff: +If a second opinion CLI is available, run a lightweight design check on the diff: \`\`\`bash -TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +TMPERR_DRL=$(mktemp /tmp/so-drl-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." --effort high --web-search 2>"$TMPERR_DRL" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: @@ -28,7 +29,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a \`CODEX (design):\` header, merged with the checklist findings above.`; +Present output under a \`$_SO_NAME (design):\` header, merged with the checklist findings above.`; return `## Design Review (conditional, diff-scoped) @@ -454,29 +455,30 @@ The screenshot file at \`/tmp/gstack-sketch.png\` can be referenced by downstrea After the wireframe is approved, offer outside design perspectives: \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" \`\`\` -If Codex is available, use AskUserQuestion: -> "Want outside design perspectives on the chosen approach? Codex proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." +If a second opinion CLI is available, use AskUserQuestion: +> "Want outside design perspectives on the chosen approach? $_SO_NAME proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." > > A) Yes — get outside design voices > B) No — proceed without If user chooses A, launch both voices simultaneously: -1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`): +1. **Second opinion** (via Bash): \`\`\`bash -TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" +TMPERR_SKETCH=$(mktemp /tmp/so-sketch-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." --effort medium --web-search 2>"$TMPERR_SKETCH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\` 2. **Claude subagent** (via Agent tool): "For this product approach, what design direction would you recommend? What aesthetic, typography, and interaction patterns fit? What would make this approach feel inevitable to the user? Be specific — font names, hex colors, spacing values." -Present Codex output under \`CODEX SAYS (design sketch):\` and subagent output under \`CLAUDE SUBAGENT (design direction):\`. +Present second opinion output under \`$_SO_NAME SAYS (design sketch):\` and subagent output under \`CLAUDE SUBAGENT (design direction):\`. Error handling: all non-blocking. On failure, skip and continue.`; } @@ -615,31 +617,32 @@ Fill in each cell from the Codex and subagent outputs. CONFIRMED = both agree. D - Passes can skip discovery and go straight to fixing for pre-identified issues` : isDesignConsultation ? ` **Synthesis:** Claude main references both Codex and subagent proposals in the Phase 3 proposal. Present: -- Areas of agreement between all three voices (Claude main + Codex + subagent) +- Areas of agreement between all three voices (Claude main + second opinion + subagent) - Genuine divergences as creative alternatives for the user to choose from -- "Codex and I agree on X. Codex suggested Y where I'm proposing Z — here's why..."` : ` +- "The second opinion and I agree on X. It suggested Y where I'm proposing Z — here's why..."` : ` **Synthesis — Litmus scorecard:** Use the same scorecard format as /plan-design-review (shown above). Fill in from both outputs. -Merge findings into the triage with \`[codex]\` / \`[subagent]\` / \`[cross-model]\` tags.`; +Merge findings into the triage with \`[second-opinion]\` / \`[subagent]\` / \`[cross-model]\` tags.`; const escapedCodexPrompt = codexPrompt.replace(/`/g, '\\`').replace(/\$/g, '\\$'); return `## Design Outside Voices (parallel) ${optInSection} -**Check Codex availability:** +**Check second opinion availability:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" \`\`\` -**If Codex is available**, launch both voices simultaneously: +**If a second opinion CLI is available**, launch both voices simultaneously: -1. **Codex design voice** (via Bash): +1. **Second opinion design voice** (via Bash): \`\`\`bash -TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "${escapedCodexPrompt}" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN" +TMPERR_DESIGN=$(mktemp /tmp/so-design-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "${escapedCodexPrompt}" --effort ${reasoningEffort} --web-search 2>"$TMPERR_DESIGN" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash @@ -651,13 +654,13 @@ Dispatch a subagent with this prompt: "${subagentPrompt}" **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged \`[single-model]\`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response." +- On any second opinion error: proceed with Claude subagent output only, tagged \`[single-model]\`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a \`CODEX SAYS (design ${isPlanDesignReview ? 'critique' : isDesignReview ? 'source audit' : 'direction'}):\` header. +Present second opinion output under a \`$_SO_NAME SAYS (design ${isPlanDesignReview ? 'critique' : isDesignReview ? 'source audit' : 'direction'}):\` header. Present subagent output under a \`CLAUDE SUBAGENT (design ${isPlanDesignReview ? 'completeness' : isDesignReview ? 'consistency' : 'direction'}):\` header. ${synthesisSection} diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index a4963b133..80e356a31 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -253,20 +253,22 @@ export function generateCodexSecondOpinion(ctx: TemplateContext): string { **Binary check first — no question if unavailable:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +echo "SECOND_OPINION: $_SO_BACKEND ($_SO_NAME)" \`\`\` -If \`CODEX_NOT_AVAILABLE\`: skip Phase 3.5 entirely — no message, no AskUserQuestion. Proceed directly to Phase 4. +If \`_SO_BACKEND\` is \`none\`: skip Phase 3.5 entirely — no message, no AskUserQuestion. Proceed directly to Phase 4. -If \`CODEX_AVAILABLE\`: use AskUserQuestion: +If a backend is available: use AskUserQuestion: -> Want a second opinion from a different AI model? Codex will independently review your problem statement, key answers, premises, and any landscape findings from this session. It hasn't seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. +> Want a second opinion from a different AI model? $_SO_NAME will independently review your problem statement, key answers, premises, and any landscape findings from this session. It hasn't seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. > A) Yes, get a second opinion > B) No, proceed to alternatives -If B: skip Phase 3.5 entirely. Remember that Codex did NOT run (affects design doc, founder signals, and Phase 4 below). +If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (affects design doc, founder signals, and Phase 4 below). -**If A: Run the Codex cold read.** +**If A: Run the cold read.** 1. Assemble a structured context block from Phases 1-3: - Mode (Startup or Builder) @@ -288,12 +290,11 @@ Write the full prompt (context block + instructions) to this file. Use the mode- **Builder mode instructions:** "You are an independent technical advisor reading a transcript of a builder brainstorming session. [CONTEXT BLOCK HERE]. Your job: 1) What is the COOLEST version of this they haven't considered? 2) What's the ONE thing from their answers that reveals what excites them most? Quote it. 3) What existing open source project or tool gets them 50% of the way there — and what's the 50% they'd need to build? 4) If you had a weekend to build this, what would you build first? Be specific. Be direct. No preamble." -3. Run Codex: +3. Run the second opinion CLI: \`\`\`bash -TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +TMPERR_OH=$(mktemp /tmp/so-oh-err-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "$(cat "$CODEX_PROMPT_FILE")" --effort high --web-search 2>"$TMPERR_OH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: @@ -302,19 +303,19 @@ cat "$TMPERR_OH" rm -f "$TMPERR_OH" "$CODEX_PROMPT_FILE" \`\`\` -**Error handling:** All errors are non-blocking — Codex second opinion is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \\\`codex login\\\` to authenticate. Skipping second opinion." -- **Timeout:** "Codex timed out after 5 minutes. Skipping second opinion." -- **Empty response:** "Codex returned no response. Stderr: . Skipping second opinion." +**Error handling:** All errors are non-blocking — the second opinion is a quality enhancement, not a prerequisite. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed. Skipping second opinion." +- **Timeout:** "$_SO_NAME timed out after 5 minutes. Skipping second opinion." +- **Empty response:** "$_SO_NAME returned no response. Stderr: . Skipping second opinion." On any error, proceed to Phase 4 — do NOT fall back to a Claude subagent (this is brainstorming, not adversarial review). 4. **Presentation:** \`\`\` -SECOND OPINION (Codex): +SECOND OPINION ($_SO_NAME): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ \`\`\` @@ -349,10 +350,13 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" +echo "BACKEND: $_SO_BACKEND ($_SO_NAME)" echo "OLD_CFG: \${OLD_CFG:-not_set}" \`\`\` @@ -362,8 +366,8 @@ If \`OLD_CFG\` is \`disabled\`: skip this step silently. Continue to the next st **Auto-select tier based on diff size:** - **Small (< 50 lines changed):** Skip adversarial review entirely. Print: "Small diff ($DIFF_TOTAL lines) — adversarial review skipped." Continue to the next step. -- **Medium (50–199 lines changed):** Run Codex adversarial challenge (or Claude adversarial subagent if Codex unavailable). Jump to the "Medium tier" section. -- **Large (200+ lines changed):** Run all remaining passes — Codex structured review + Claude adversarial subagent + Codex adversarial. Jump to the "Large tier" section. +- **Medium (50–199 lines changed):** Run second opinion adversarial challenge (or Claude adversarial subagent if no second opinion CLI available). Jump to the "Medium tier" section. +- **Large (200+ lines changed):** Run all remaining passes — second opinion structured review + Claude adversarial subagent + second opinion adversarial. Jump to the "Large tier" section. --- @@ -371,14 +375,13 @@ If \`OLD_CFG\` is \`disabled\`: skip this step silently. Continue to the next st Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second opinion CLI is available:** run the adversarial challenge. **If NOT available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Second opinion adversarial:** \`\`\`bash -TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" +TMPERR_ADV=$(mktemp /tmp/so-adv-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." --effort high --web-search 2>"$TMPERR_ADV" \`\`\` Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. After the command completes, read stderr: @@ -389,11 +392,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \\\`codex login\\\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -410,9 +413,9 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co \`\`\`bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' \`\`\` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "$_SO_BACKEND" if second opinion ran, "claude" if subagent ran. If both failed, do NOT persist. -**Cleanup:** Run \`rm -f "$TMPERR_ADV"\` after processing (if Codex was used). +**Cleanup:** Run \`rm -f "$TMPERR_ADV"\` after processing (if second opinion CLI was used). --- @@ -420,26 +423,24 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Second opinion structured review (if available):** \`\`\`bash -TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +TMPERR=$(mktemp /tmp/so-review-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search 2>"$TMPERR" \`\`\` -Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`CODEX SAYS (code review):\` header. +Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`$_SO_NAME SAYS (code review):\` header. Check for \`[P1]\` markers: found → \`GATE: FAIL\`, not found → \`GATE: PASS\`. If GATE is FAIL, use AskUserQuestion: \`\`\` -Codex found N critical issues in the diff. +$_SO_NAME found N critical issues in the diff. A) Investigate and fix now (recommended) B) Continue — review will still complete \`\`\` -If A: address the findings${isShip ? '. After fixing, re-run tests (Step 3) since code has changed' : ''}. Re-run \`codex review\` to verify. +If A: address the findings${isShip ? '. After fixing, re-run tests (Step 3) since code has changed' : ''}. Re-run the second opinion review to verify. Read stderr for errors (same error handling as medium tier). @@ -447,15 +448,15 @@ After stderr: \`rm -f "$TMPERR"\` **2. Claude adversarial subagent:** Dispatch a subagent with the adversarial prompt (same prompt as medium tier). This always runs regardless of Codex availability. -**3. Codex adversarial challenge (if available):** Run \`codex exec\` with the adversarial prompt (same as medium tier). +**3. Second opinion adversarial challenge (if available):** Run \`gstack-second-opinion exec\` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: \`npm install -g @openai/codex\`" +If no second opinion CLI is available for steps 1 and 3, note to the user: "No second opinion CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install one for full 4-pass coverage: \`npm install -g @openai/codex\` or \`npm install -g @google/gemini-cli\`" **Persist the review result AFTER all passes complete** (not after each sub-step): \`\`\`bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' \`\`\` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if second opinion CLI ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if second opinion CLI was unavailable. If all passes failed, do NOT persist. --- @@ -469,8 +470,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to $_SO_NAME: [from second opinion adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ $_SO_NAME ✓/✗ ════════════════════════════════════════════════════════════ \`\`\` @@ -492,7 +493,9 @@ thorough review. **Check tool availability:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" \`\`\` Use AskUserQuestion: @@ -531,12 +534,11 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If SO_AVAILABLE:** \`\`\`bash -TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" +TMPERR_PV=$(mktemp /tmp/so-planreview-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "" --effort high --web-search 2>"$TMPERR_PV" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: @@ -547,20 +549,20 @@ cat "$TMPERR_PV" Present the full output verbatim: \`\`\` -CODEX SAYS (plan review — outside voice): +$_SO_NAME SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ \`\`\` **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \\\`codex login\\\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "$_SO_NAME auth failed." +- Timeout: "$_SO_NAME timed out after 5 minutes." +- Empty response: "$_SO_NAME returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If SO_NOT_AVAILABLE (or second opinion errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. diff --git a/ship/SKILL.md b/ship/SKILL.md index 6192c50bf..21db01ab2 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -1337,18 +1337,19 @@ source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of `git rev-parse --short HEAD`. -7. **Codex design voice** (optional, automatic if available): +7. **Second opinion design voice** (optional, automatic if available): ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" ``` -If Codex is available, run a lightweight design check on the diff: +If a second opinion CLI is available, run a lightweight design check on the diff: ```bash -TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +TMPERR_DRL=$(mktemp /tmp/so-drl-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." --effort high --web-search 2>"$TMPERR_DRL" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -1358,7 +1359,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a `CODEX (design):` header, merged with the checklist findings above. +Present output under a `$_SO_NAME (design):` header, merged with the checklist findings above. Include any design findings alongside the code review findings. They follow the same Fix-First flow below. @@ -1442,10 +1443,13 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +_SO_BACKEND=$(~/.claude/skills/gstack/bin/gstack-second-opinion detect 2>/dev/null | grep BACKEND | awk '{print $2}') +_SO_NAME=$(~/.claude/skills/gstack/bin/gstack-second-opinion name 2>/dev/null) +[ "$_SO_BACKEND" != "none" ] && echo "SO_AVAILABLE" || echo "SO_NOT_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" +echo "BACKEND: $_SO_BACKEND ($_SO_NAME)" echo "OLD_CFG: ${OLD_CFG:-not_set}" ``` @@ -1455,8 +1459,8 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. **Auto-select tier based on diff size:** - **Small (< 50 lines changed):** Skip adversarial review entirely. Print: "Small diff ($DIFF_TOTAL lines) — adversarial review skipped." Continue to the next step. -- **Medium (50–199 lines changed):** Run Codex adversarial challenge (or Claude adversarial subagent if Codex unavailable). Jump to the "Medium tier" section. -- **Large (200+ lines changed):** Run all remaining passes — Codex structured review + Claude adversarial subagent + Codex adversarial. Jump to the "Large tier" section. +- **Medium (50–199 lines changed):** Run second opinion adversarial challenge (or Claude adversarial subagent if no second opinion CLI available). Jump to the "Medium tier" section. +- **Large (200+ lines changed):** Run all remaining passes — second opinion structured review + Claude adversarial subagent + second opinion adversarial. Jump to the "Large tier" section. --- @@ -1464,14 +1468,13 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second opinion CLI is available:** run the adversarial challenge. **If NOT available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Second opinion adversarial:** ```bash -TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" +TMPERR_ADV=$(mktemp /tmp/so-adv-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." --effort high --web-search 2>"$TMPERR_ADV" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: @@ -1482,11 +1485,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "$_SO_NAME authentication failed." +- **Timeout:** "$_SO_NAME timed out after 5 minutes." +- **Empty response:** "$_SO_NAME returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -1503,9 +1506,9 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "$_SO_BACKEND" if second opinion ran, "claude" if subagent ran. If both failed, do NOT persist. -**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). +**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if second opinion CLI was used). --- @@ -1513,26 +1516,24 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Second opinion structured review (if available):** ```bash -TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -cd "$_REPO_ROOT" -codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +TMPERR=$(mktemp /tmp/so-review-XXXXXXXX) +~/.claude/skills/gstack/bin/gstack-second-opinion review --base --effort high --web-search 2>"$TMPERR" ``` -Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. +Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `$_SO_NAME SAYS (code review):` header. Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. If GATE is FAIL, use AskUserQuestion: ``` -Codex found N critical issues in the diff. +$_SO_NAME found N critical issues in the diff. A) Investigate and fix now (recommended) B) Continue — review will still complete ``` -If A: address the findings. After fixing, re-run tests (Step 3) since code has changed. Re-run `codex review` to verify. +If A: address the findings. After fixing, re-run tests (Step 3) since code has changed. Re-run the second opinion review to verify. Read stderr for errors (same error handling as medium tier). @@ -1540,15 +1541,15 @@ After stderr: `rm -f "$TMPERR"` **2. Claude adversarial subagent:** Dispatch a subagent with the adversarial prompt (same prompt as medium tier). This always runs regardless of Codex availability. -**3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). +**3. Second opinion adversarial challenge (if available):** Run `gstack-second-opinion exec` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" +If no second opinion CLI is available for steps 1 and 3, note to the user: "No second opinion CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install one for full 4-pass coverage: `npm install -g @openai/codex` or `npm install -g @google/gemini-cli`" **Persist the review result AFTER all passes complete** (not after each sub-step): ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if second opinion CLI ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if second opinion CLI was unavailable. If all passes failed, do NOT persist. --- @@ -1562,8 +1563,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to $_SO_NAME: [from second opinion adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ $_SO_NAME ✓/✗ ════════════════════════════════════════════════════════════ ```