diff --git a/routing/anthropic.yaml b/routing/anthropic.yaml index bc1bda0..7b636ff 100644 --- a/routing/anthropic.yaml +++ b/routing/anthropic.yaml @@ -12,7 +12,7 @@ name: anthropic description: "Anthropic-only routing. All roles use Claude models exclusively." -updated: "2026-04-22" +updated: "2026-05-08" roles: general: @@ -59,9 +59,9 @@ roles: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -74,6 +74,8 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -83,6 +85,30 @@ roles: config: reasoning_effort: high + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + vision: description: "Understanding visual input — screenshots, diagrams, UI mockups" candidates: diff --git a/routing/balanced.yaml b/routing/balanced.yaml index 93f33b0..b091c1e 100644 --- a/routing/balanced.yaml +++ b/routing/balanced.yaml @@ -24,7 +24,7 @@ name: balanced description: "Quality/cost balance for mixed workloads. Curated by Amplifier Foundation team." -updated: "2026-04-22" +updated: "2026-05-08" roles: # --------------------------------------------------------------------------- @@ -149,23 +149,25 @@ roles: critique: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - # Anthropic promoted above OpenAI per ordering rule (2026-04-22). + # Capability over thinking budget: Opus+high beats Sonnet+xhigh on critique + # quality (joi-90y, 2026-05-08). xhigh produces longer outputs of the + # same model class, not higher-quality outputs. - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: openai - model: gpt-5.5 + model: gpt-?.?-pro* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: gemini model: gemini-*-pro-preview config: - reasoning_effort: xhigh + reasoning_effort: high - provider: github-copilot - model: gpt-5.5 + model: claude-opus-4.6 config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -184,12 +186,20 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium - provider: openai model: gpt-5.5 + config: + reasoning_effort: medium - provider: gemini model: gemini-*-pro-preview + config: + reasoning_effort: medium - provider: github-copilot model: claude-opus-4.6 + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -211,6 +221,64 @@ roles: - provider: anthropic model: claude-sonnet-* + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: medium + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: medium + - provider: github-copilot + model: claude-opus-4.6 + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + # --------------------------------------------------------------------------- # Capability Roles # --------------------------------------------------------------------------- diff --git a/routing/quality.yaml b/routing/quality.yaml index e9a8da6..50465d1 100644 --- a/routing/quality.yaml +++ b/routing/quality.yaml @@ -23,7 +23,7 @@ name: quality description: "Best available models. Prioritizes capability over cost." -updated: "2026-04-22" +updated: "2026-05-08" roles: general: @@ -124,23 +124,25 @@ roles: critique: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - # Anthropic promoted above OpenAI per ordering rule (2026-04-22). + # Capability over thinking budget: Opus+high beats Sonnet+xhigh on critique + # quality (joi-90y, 2026-05-08). xhigh produces longer outputs of the + # same model class, not higher-quality outputs. - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: openai model: gpt-?.?-pro* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: gemini model: gemini-*-pro-preview config: - reasoning_effort: xhigh + reasoning_effort: high - provider: github-copilot model: claude-opus-4.6 config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -161,14 +163,20 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium - provider: openai model: gpt-?.?-pro* config: - reasoning_effort: low + reasoning_effort: medium - provider: gemini model: gemini-*-pro-preview + config: + reasoning_effort: medium - provider: github-copilot model: claude-opus-4.6 + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -188,6 +196,64 @@ roles: - provider: github-copilot model: claude-opus-4.6 + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: medium + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: medium + - provider: github-copilot + model: claude-opus-4.6 + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + vision: description: "Understanding visual input — screenshots, diagrams, UI mockups" candidates: