From a09966f955a78e198d23f4e0d2b1c6f8cd502fe5 Mon Sep 17 00:00:00 2001 From: Joichi Ito Date: Fri, 8 May 2026 09:19:31 +0600 Subject: [PATCH] feat(routing): add debugging/orchestration/evaluation roles + fix critique tradeoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements joi-90y. Extends the routing matrix with three new cognitive roles for high-stakes work, and corrects the critique role's capability-vs-thinking- budget tradeoff. NEW ROLES (added to anthropic.yaml, balanced.yaml, quality.yaml): - debugging Opus + high — bug-hunter, session-analyst, incident analysis - orchestration Opus + medium — root session, coordinator work - evaluation Opus + high — comparing parallel agent outputs CHANGED: - critique: Sonnet+xhigh → Opus+high. xhigh produces longer outputs of the same model class, not higher-quality outputs. For critique tasks, capability (model class) > thinking budget. Inline comment captures the rationale. - writing: added reasoning_effort: medium for coherence across long outputs. No-op for creative (already has no reasoning_effort). All 3 files have updated: bumped to 2026-05-08. Tests pass. Generated with Amplifier (https://github.com/microsoft/amplifier) Co-Authored-By: Amplifier <240397093+microsoft-amplifier@users.noreply.github.com> --- routing/anthropic.yaml | 32 ++++++++++++++-- routing/balanced.yaml | 86 +++++++++++++++++++++++++++++++++++++----- routing/quality.yaml | 82 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 180 insertions(+), 20 deletions(-) diff --git a/routing/anthropic.yaml b/routing/anthropic.yaml index bc1bda0..7b636ff 100644 --- a/routing/anthropic.yaml +++ b/routing/anthropic.yaml @@ -12,7 +12,7 @@ name: anthropic description: "Anthropic-only routing. All roles use Claude models exclusively." -updated: "2026-04-22" +updated: "2026-05-08" roles: general: @@ -59,9 +59,9 @@ roles: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -74,6 +74,8 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -83,6 +85,30 @@ roles: config: reasoning_effort: high + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + vision: description: "Understanding visual input — screenshots, diagrams, UI mockups" candidates: diff --git a/routing/balanced.yaml b/routing/balanced.yaml index 93f33b0..b091c1e 100644 --- a/routing/balanced.yaml +++ b/routing/balanced.yaml @@ -24,7 +24,7 @@ name: balanced description: "Quality/cost balance for mixed workloads. Curated by Amplifier Foundation team." -updated: "2026-04-22" +updated: "2026-05-08" roles: # --------------------------------------------------------------------------- @@ -149,23 +149,25 @@ roles: critique: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - # Anthropic promoted above OpenAI per ordering rule (2026-04-22). + # Capability over thinking budget: Opus+high beats Sonnet+xhigh on critique + # quality (joi-90y, 2026-05-08). xhigh produces longer outputs of the + # same model class, not higher-quality outputs. - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: openai - model: gpt-5.5 + model: gpt-?.?-pro* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: gemini model: gemini-*-pro-preview config: - reasoning_effort: xhigh + reasoning_effort: high - provider: github-copilot - model: gpt-5.5 + model: claude-opus-4.6 config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -184,12 +186,20 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium - provider: openai model: gpt-5.5 + config: + reasoning_effort: medium - provider: gemini model: gemini-*-pro-preview + config: + reasoning_effort: medium - provider: github-copilot model: claude-opus-4.6 + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -211,6 +221,64 @@ roles: - provider: anthropic model: claude-sonnet-* + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: medium + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: medium + - provider: github-copilot + model: claude-opus-4.6 + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + # --------------------------------------------------------------------------- # Capability Roles # --------------------------------------------------------------------------- diff --git a/routing/quality.yaml b/routing/quality.yaml index e9a8da6..50465d1 100644 --- a/routing/quality.yaml +++ b/routing/quality.yaml @@ -23,7 +23,7 @@ name: quality description: "Best available models. Prioritizes capability over cost." -updated: "2026-04-22" +updated: "2026-05-08" roles: general: @@ -124,23 +124,25 @@ roles: critique: description: "Analytical evaluation — finding flaws in existing work, not generating solutions" candidates: - # Anthropic promoted above OpenAI per ordering rule (2026-04-22). + # Capability over thinking budget: Opus+high beats Sonnet+xhigh on critique + # quality (joi-90y, 2026-05-08). xhigh produces longer outputs of the + # same model class, not higher-quality outputs. - provider: anthropic - model: claude-sonnet-* + model: claude-opus-* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: openai model: gpt-?.?-pro* config: - reasoning_effort: xhigh + reasoning_effort: high - provider: gemini model: gemini-*-pro-preview config: - reasoning_effort: xhigh + reasoning_effort: high - provider: github-copilot model: claude-opus-4.6 config: - reasoning_effort: xhigh + reasoning_effort: high creative: description: "Design direction, aesthetic judgment, high-quality creative output" @@ -161,14 +163,20 @@ roles: candidates: - provider: anthropic model: claude-opus-* + config: + reasoning_effort: medium - provider: openai model: gpt-?.?-pro* config: - reasoning_effort: low + reasoning_effort: medium - provider: gemini model: gemini-*-pro-preview + config: + reasoning_effort: medium - provider: github-copilot model: claude-opus-4.6 + config: + reasoning_effort: medium research: description: "Deep investigation, information synthesis across multiple sources" @@ -188,6 +196,64 @@ roles: - provider: github-copilot model: claude-opus-4.6 + debugging: + description: "Hypothesis-driven debugging, incident analysis, session forensics" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + + orchestration: + description: "Root-session coordination, multi-agent task orchestration" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: medium + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: medium + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: medium + - provider: github-copilot + model: claude-opus-4.6 + + evaluation: + description: "Comparing parallel agent outputs, judging quality across candidates" + candidates: + - provider: anthropic + model: claude-opus-* + config: + reasoning_effort: high + - provider: openai + model: gpt-?.?-pro* + config: + reasoning_effort: high + - provider: gemini + model: gemini-*-pro-preview + config: + reasoning_effort: high + - provider: github-copilot + model: claude-opus-4.6 + config: + reasoning_effort: high + vision: description: "Understanding visual input — screenshots, diagrams, UI mockups" candidates: