From 23c896a0f1ba28cd78828e93b8628120e990f787 Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Tue, 19 May 2026 15:15:56 -0700 Subject: [PATCH 1/5] =?UTF-8?q?feat:=20Conductor=20Expert=20=E2=80=94=20op?= =?UTF-8?q?t-in=20knowledge=20base=20for=20Conductor-aware=20agents=20(#18?= =?UTF-8?q?0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a bundled knowledge base that gives agents deep understanding of Conductor's YAML schema, execution model, authoring patterns, and CLI commands. This is Phase 1: reuse existing plugin reference docs with a runtime injection mechanism. Schema changes: - AgentDef.conductor_expert: bool | None (tri-state: None=inherit, True=enable, False=disable). Forbidden on script/workflow/human_gate. - RuntimeConfig.conductor_expert: bool (workflow-wide default, False) Implementation: - src/conductor/expert/ — new package with loader.py and bundled knowledge/ docs (yaml-schema.md, authoring.md, execution.md) - loader.py uses importlib.resources + lru_cache for zero-cost repeated calls, wraps content in tags - AgentExecutor._build_prompt_prefix() composes workspace instructions + expert knowledge, shared by execute() and render_prompt() - WorkflowEngine passes conductor_expert_default to both single- provider and multi-provider executor paths Example YAML: agents: - name: workflow_reviewer conductor_expert: true prompt: Review this workflow... # Or workflow-wide: workflow: runtime: conductor_expert: true Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 6 + examples/conductor-expert.yaml | 82 ++ src/conductor/config/schema.py | 47 + src/conductor/engine/workflow.py | 5 + src/conductor/executor/agent.py | 48 +- src/conductor/expert/__init__.py | 14 + src/conductor/expert/knowledge/authoring.md | 875 ++++++++++++++++++ src/conductor/expert/knowledge/execution.md | 629 +++++++++++++ src/conductor/expert/knowledge/yaml-schema.md | 586 ++++++++++++ src/conductor/expert/loader.py | 69 ++ tests/test_expert/test_conductor_expert.py | 302 ++++++ uv.lock | 2 +- 12 files changed, 2658 insertions(+), 7 deletions(-) create mode 100644 examples/conductor-expert.yaml create mode 100644 src/conductor/expert/__init__.py create mode 100644 src/conductor/expert/knowledge/authoring.md create mode 100644 src/conductor/expert/knowledge/execution.md create mode 100644 src/conductor/expert/knowledge/yaml-schema.md create mode 100644 src/conductor/expert/loader.py create mode 100644 tests/test_expert/test_conductor_expert.py diff --git a/AGENTS.md b/AGENTS.md index 406e14f..3e55d69 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -76,6 +76,10 @@ make validate-examples # validate all examples - `loader.py` - YAML parsing with environment variable resolution (${VAR:-default}) and `!file` tag support - `validator.py` - Cross-reference validation (agent names, routes, parallel groups) +- **expert/**: Conductor Expert knowledge base (opt-in, bundled docs) + - `loader.py` - Loads and caches bundled reference docs, wraps in `` tags + - `knowledge/` - Bundled markdown reference docs (yaml-schema.md, authoring.md, execution.md) + - **engine/**: Workflow execution orchestration - `workflow.py` - Main `WorkflowEngine` class that orchestrates agent execution, parallel groups, for-each groups, and routing - `context.py` - `WorkflowContext` manages accumulated agent outputs with three modes: accumulate, last_only, explicit @@ -127,6 +131,7 @@ make validate-examples # validate all examples - **Route evaluation**: First matching `when` condition wins; no `when` = always matches - **Tool resolution**: `null` = all workflow tools, `[]` = none, `[list]` = subset - **Reasoning effort**: `runtime.default_reasoning_effort` sets a workflow-wide default; per-agent `reasoning.effort` overrides it. Allowed values: `low`, `medium`, `high`, `xhigh`. Each provider translates the unified value to its native API (Copilot: `reasoning_effort` on the session, validated against the model's `supported_reasoning_efforts`; Claude: extended thinking with budget mapping low=2048, medium=8192, high=16384, xhigh=32768 tokens, with `temperature` coerced to 1.0 and `max_tokens` bumped to fit the budget). See `examples/reasoning-effort.yaml`. +- **Conductor Expert**: `runtime.conductor_expert: true` sets a workflow-wide default; per-agent `conductor_expert: true/false` overrides it (tri-state: `null` = inherit, `true` = enable, `false` = disable). When enabled, the bundled Conductor knowledge base (~70KB of YAML schema, execution model, and authoring patterns from `src/conductor/expert/knowledge/`) is prepended to the agent's prompt inside `` tags. Only applies to provider-backed agents (`type: agent` or default). See `examples/conductor-expert.yaml`. ## Tests Structure @@ -138,6 +143,7 @@ Tests mirror source structure in `tests/`: - `test_providers/` - Provider implementation tests - `test_integration/` - Full workflow execution tests - `test_gates/` - Human gate tests +- `test_expert/` - Conductor Expert knowledge base tests Use `pytest.mark.performance` for performance tests (exclude with `-m "not performance"`). diff --git a/examples/conductor-expert.yaml b/examples/conductor-expert.yaml new file mode 100644 index 0000000..e1b7bcc --- /dev/null +++ b/examples/conductor-expert.yaml @@ -0,0 +1,82 @@ +# Conductor Expert Knowledge Base +# +# This example demonstrates the `conductor_expert` opt-in flag, which +# augments agent prompts with Conductor's bundled knowledge base. This +# gives agents deep understanding of Conductor's YAML schema, execution +# model, authoring patterns, and CLI commands — enabling them to +# evaluate, improve, debug, or generate Conductor workflows. +# +# The knowledge base is injected as a prompt preamble wrapped in +# tags, positioned after workspace instructions +# but before the agent's own prompt. +# +# Two levels of opt-in: +# 1. Per-agent: `conductor_expert: true` on an individual agent +# 2. Workflow-wide: `runtime.conductor_expert: true` (all agents) +# +# Per-agent `conductor_expert: false` can disable it even when the +# workflow-wide default is true. +# +# Usage: +# conductor run examples/conductor-expert.yaml \ +# --input workflow_yaml="$(cat examples/simple-qa.yaml)" + +workflow: + name: conductor-expert + description: Demonstrates Conductor Expert knowledge base injection + version: "1.0.0" + entry_point: reviewer + + runtime: + provider: copilot + + input: + workflow_yaml: + type: string + required: true + description: The YAML content of a Conductor workflow to review + +agents: + - name: reviewer + description: Reviews a Conductor workflow for correctness and best practices + model: gpt-5.2 + # This agent gets the full Conductor knowledge base injected + conductor_expert: true + prompt: | + Review the following Conductor workflow YAML for correctness, + best practices, and potential improvements. + + Check for: + - Valid schema usage (field names, types, constraints) + - Appropriate context mode selection + - Route condition correctness + - Sensible failure modes for parallel/for-each groups + - Output schema design + - Missing route fallbacks or unbounded loops + + Workflow YAML: + ```yaml + {{ workflow.input.workflow_yaml }} + ``` + + Provide a structured review with: + 1. Issues found (with severity: error, warning, suggestion) + 2. Recommended improvements + 3. Overall assessment + output: + issues: + type: string + description: List of issues found with severity levels + improvements: + type: string + description: Recommended improvements + assessment: + type: string + description: Overall assessment of the workflow quality + routes: + - to: $end + +output: + issues: "{{ reviewer.output.issues }}" + improvements: "{{ reviewer.output.improvements }}" + assessment: "{{ reviewer.output.assessment }}" diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index 200da59..2de6dec 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -671,6 +671,29 @@ class AgentDef(BaseModel): effort: high """ + conductor_expert: bool | None = None + """Opt-in to the Conductor Expert knowledge base for this agent. + + When ``True``, the agent's prompt is augmented with Conductor's bundled + knowledge base covering the YAML schema, execution model, authoring + patterns, and CLI commands. This enables agents to evaluate, improve, + debug, or generate Conductor workflows with accurate, version-matched + knowledge. + + - ``None`` (default): inherit from ``workflow.runtime.conductor_expert`` + - ``True``: enable regardless of the workflow default + - ``False``: disable regardless of the workflow default + + Only applies to provider-backed agents (type='agent' or None). + + Example YAML:: + + agents: + - name: workflow_reviewer + conductor_expert: true + prompt: "Review this workflow for correctness..." + """ + @field_validator("timeout") @classmethod def validate_timeout(cls, v: int | None) -> int | None: @@ -695,6 +718,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("human_gate agents cannot have 'max_depth'") if self.reasoning is not None: raise ValueError("human_gate agents cannot have 'reasoning'") + if self.conductor_expert is not None: + raise ValueError("human_gate agents cannot have 'conductor_expert'") if self.timeout_seconds is not None: raise ValueError("human_gate agents cannot have 'timeout_seconds'") elif self.type == "script": @@ -731,6 +756,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("script agents cannot have 'max_depth'") if self.reasoning is not None: raise ValueError("script agents cannot have 'reasoning'") + if self.conductor_expert is not None: + raise ValueError("script agents cannot have 'conductor_expert'") if self.timeout_seconds is not None: raise ValueError( "script agents cannot have 'timeout_seconds' " @@ -761,6 +788,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("workflow agents cannot have 'retry'") if self.dialog is not None: raise ValueError("workflow agents cannot have 'dialog'") + if self.conductor_expert is not None: + raise ValueError("workflow agents cannot have 'conductor_expert'") if self.timeout_seconds is not None: raise ValueError("workflow agents cannot have 'timeout_seconds'") else: @@ -918,6 +947,24 @@ class RuntimeConfig(BaseModel): the request through to the SDK. """ + conductor_expert: bool = False + """Workflow-wide default for the Conductor Expert knowledge base. + + When ``True``, all provider-backed agents in the workflow receive the + bundled Conductor knowledge base (YAML schema, execution model, + authoring patterns, CLI commands) prepended to their prompts. + + Individual agents can override this with their own + ``conductor_expert`` field (``True`` to force-enable, ``False`` to + force-disable). + + Example YAML:: + + workflow: + runtime: + conductor_expert: true + """ + class WorkflowDef(BaseModel): """Top-level workflow configuration.""" diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py index c7de451..3e3e888 100644 --- a/src/conductor/engine/workflow.py +++ b/src/conductor/engine/workflow.py @@ -367,6 +367,9 @@ def __init__( # Workspace instructions preamble (inherited by sub-workflows) self._instructions_preamble = instructions_preamble + # Conductor Expert default (workflow-level opt-in) + self._conductor_expert_default = config.workflow.runtime.conductor_expert + # For backward compatibility, create a default executor with single provider # This is used when registry is None if provider is not None: @@ -374,6 +377,7 @@ def __init__( provider, workflow_tools=config.tools, instructions_preamble=self._instructions_preamble, + conductor_expert_default=self._conductor_expert_default, ) self.provider = provider # Keep for backward compatibility else: @@ -575,6 +579,7 @@ async def _get_executor_for_agent(self, agent: AgentDef) -> AgentExecutor: provider, workflow_tools=self.config.tools, instructions_preamble=self._instructions_preamble, + conductor_expert_default=self._conductor_expert_default, ) elif self.executor is not None: # Single provider mode (backward compatibility) diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index a0cc8cc..0b976ab 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -98,6 +98,7 @@ def __init__( provider: AgentProvider, workflow_tools: list[str] | None = None, instructions_preamble: str | None = None, + conductor_expert_default: bool = False, ) -> None: """Initialize the AgentExecutor. @@ -106,10 +107,15 @@ def __init__( workflow_tools: Tools defined at workflow level. Defaults to empty list. instructions_preamble: Optional workspace instructions text to prepend to every agent's rendered prompt. + conductor_expert_default: Workflow-level default for the Conductor + Expert knowledge base. When True, all agents executed by this + executor receive the knowledge base unless the agent explicitly + sets ``conductor_expert: false``. """ self.provider = provider self.workflow_tools = workflow_tools or [] self.instructions_preamble = instructions_preamble + self._conductor_expert_default = conductor_expert_default self.renderer = TemplateRenderer() async def execute( @@ -157,9 +163,10 @@ async def execute( # Render prompt with context rendered_prompt = self.renderer.render(agent.prompt, context) - # Prepend workspace instructions preamble if available - if self.instructions_preamble: - rendered_prompt = self.instructions_preamble + rendered_prompt + # Prepend prompt prefix (workspace instructions + optional expert knowledge) + prefix = self._build_prompt_prefix(agent) + if prefix: + rendered_prompt = prefix + rendered_prompt # Append user guidance section if provided if guidance_section: @@ -245,12 +252,41 @@ def render_prompt(self, agent: AgentDef, context: dict[str, Any]) -> str: context: Context for prompt rendering. Returns: - Rendered prompt string with workspace instructions prepended if configured. + Rendered prompt string with workspace instructions and optional + expert knowledge prepended if configured. Raises: TemplateError: If prompt rendering fails. """ rendered = self.renderer.render(agent.prompt, context) - if self.instructions_preamble: - rendered = self.instructions_preamble + rendered + prefix = self._build_prompt_prefix(agent) + if prefix: + rendered = prefix + rendered return rendered + + def _should_inject_expert(self, agent: AgentDef) -> bool: + """Determine whether to inject Conductor Expert knowledge for an agent. + + Resolution order: + - If the agent explicitly sets ``conductor_expert``, use that value. + - Otherwise, fall back to the workflow-level default. + """ + if agent.conductor_expert is not None: + return agent.conductor_expert + return self._conductor_expert_default + + def _build_prompt_prefix(self, agent: AgentDef) -> str: + """Build the prefix to prepend before an agent's rendered prompt. + + Combines workspace instructions and optional Conductor Expert + knowledge into a single prefix string. This helper is shared by + :meth:`execute` and :meth:`render_prompt` to keep them in sync. + """ + parts: list[str] = [] + if self.instructions_preamble: + parts.append(self.instructions_preamble) + if self._should_inject_expert(agent): + from conductor.expert.loader import load_expert_knowledge + + parts.append(load_expert_knowledge()) + return "".join(parts) diff --git a/src/conductor/expert/__init__.py b/src/conductor/expert/__init__.py new file mode 100644 index 0000000..9c8a4b4 --- /dev/null +++ b/src/conductor/expert/__init__.py @@ -0,0 +1,14 @@ +"""Conductor Expert — reusable knowledge base for Conductor-aware agents. + +This module provides opt-in access to Conductor's bundled knowledge base, +giving agents deep understanding of the YAML schema, execution model, +authoring patterns, and CLI commands. Agents that need to evaluate, improve, +debug, or generate Conductor workflows can enable it via the +``conductor_expert`` flag at the agent or workflow level. + +See :mod:`conductor.expert.loader` for loading and caching details. +""" + +from conductor.expert.loader import load_expert_knowledge + +__all__ = ["load_expert_knowledge"] diff --git a/src/conductor/expert/knowledge/authoring.md b/src/conductor/expert/knowledge/authoring.md new file mode 100644 index 0000000..7ab76aa --- /dev/null +++ b/src/conductor/expert/knowledge/authoring.md @@ -0,0 +1,875 @@ +# Workflow Authoring Guide + +Complete reference for creating and modifying Conductor workflow YAML files. + +## Workflow Configuration + +```yaml +workflow: + name: my-workflow # Required: unique identifier + description: What it does # Optional + version: "1.0.0" # Optional + entry_point: first_agent # Required: starting agent, parallel group, or for-each group + + runtime: + provider: copilot # copilot (default), claude, or openai-agents + default_model: gpt-5.2 # Default model for agents + temperature: 0.7 # 0.0-1.0 (optional) + max_tokens: 4096 # Max output tokens per response (optional) + timeout: 600 # Per-request timeout in seconds (optional) + max_agent_iterations: 50 # Max tool-use roundtrips per agent (1-500, optional) + max_session_seconds: 120 # Wall-clock timeout per agent session (optional) + default_reasoning_effort: medium # Workflow-wide reasoning effort: low, medium, high, xhigh (optional) + + input: # Define workflow inputs + param_name: + type: string # string, number, boolean, array, object + required: true + default: "value" + description: What it is + + context: + mode: accumulate # accumulate, last_only, explicit + + limits: + max_iterations: 10 # Max agent executions (default: 10, max: 500) + timeout_seconds: 600 # Total workflow timeout (optional, no default) + + cost: + show_per_agent: true # Show cost per agent (default: true) + show_summary: true # Show cost summary (default: true) + pricing: # Custom pricing overrides + custom-model: + input_per_mtok: 3.0 + output_per_mtok: 15.0 + + hooks: # Optional lifecycle expressions + on_start: "..." # Evaluated when workflow starts + on_complete: "..." # Evaluated on success + on_error: "..." # Evaluated on failure + + metadata: # Optional arbitrary key-values surfaced in workflow_started events + tracker: ado + work_item_id: 42 + # Merged with --metadata / -m CLI flags (CLI wins on key collision) + + instructions: # Optional workspace context prepended to every agent prompt + - !file ../AGENTS.md # !file include + - "Always respond in English." # Inline string + # For workflows distributed via registry, prefer the --workspace-instructions + # CLI flag (auto-discovers AGENTS.md / CLAUDE.md / .github/copilot-instructions.md + # / .github/instructions/**/*.instructions.md with applyTo: "**") so target-repo + # context is loaded at run time instead of being baked into the YAML. +``` + +## Agent Definition + +```yaml +agents: + - name: my_agent # Required: unique identifier + type: agent # agent (default), human_gate, script, or workflow + description: What it does + model: gpt-5.2 # Override workflow default + provider: claude # Optional: per-agent provider override + + system_prompt: | # Optional: system message (always included) + You are a specialized assistant. + + prompt: | + You are a helpful assistant. + + Input: {{ workflow.input.param }} + + {% if other_agent is defined and other_agent.output %} + Previous output: {{ other_agent.output.field }} + {% endif %} + + output: # Structured output schema + field_name: + type: string + description: What this field contains + + tools: # null = all, [] = none, [list] = subset + - web_search + + max_agent_iterations: 100 # Override workflow default for this agent (optional) + max_session_seconds: 60 # Wall-clock timeout for this agent (optional, soft, between iterations) + timeout_seconds: 120 # Hard wall-clock cancellation for this agent (provider-backed only). + # Engine wraps execution in asyncio.wait_for(); raises AgentTimeoutError. + # Effective limit = min(timeout_seconds, remaining_workflow_timeout). + # Non-retryable. Forbidden on script/human_gate/workflow types. + + retry: # Per-agent retry policy (optional, not allowed on script/human_gate/workflow) + max_attempts: 3 # 1-10, default 1 (no retry) + backoff: exponential # exponential (default) or fixed + delay_seconds: 2.0 # Base delay (0-300, default 2.0) + retry_on: # Default: ["provider_error", "timeout"] + - provider_error # API 500s, rate limits + - timeout # Agent-level timeout exceeded + # Validation errors are never retried. + + dialog: # Optional: conditionally pause for free-form conversation (optional) + trigger_prompt: | + Enter dialog if the agent expresses uncertainty about the user's + intent or needs clarification on ambiguous requirements. + + reasoning: # Override runtime.default_reasoning_effort (optional) + effort: high # low, medium, high, or xhigh + + routes: # Where to go next + - to: next_agent +``` + +### Reasoning Effort + +`reasoning.effort` (per-agent) and `runtime.default_reasoning_effort` (workflow-wide) accept `low`, `medium`, `high`, or `xhigh`. Per-agent overrides the runtime default. The provider translates the unified value to its native API: + +- **Copilot**: forwarded as `reasoning_effort` on the session. Validated against the model's advertised `supported_reasoning_efforts`; raises `ValidationError` for unsupported combinations (skipped in mock-handler mode or when capability metadata is absent). +- **Claude**: enables extended thinking via `thinking={"type": "enabled", "budget_tokens": N}` with mapping `low=2048`, `medium=8192`, `high=16384`, `xhigh=32768`. Auto-coerces `temperature` to `1.0` (logged at INFO) and bumps `max_tokens` to fit `budget + 4096` (capped at 64000, logged at INFO when clamped). Only valid on thinking-capable models (`claude-3-7-*`, `claude-opus-4*`, `claude-sonnet-4*`, `claude-haiku-4*`); raises `ValidationError` otherwise. + +Both providers surface reasoning content via `agent_reasoning` events visible in the dashboard, JSONL logs, and the console at `-vv`. Not allowed on `script`, `human_gate`, or `workflow` agent types. + +```yaml +runtime: + provider: claude + default_model: claude-opus-4-20250514 + default_reasoning_effort: medium # workflow-wide default + +agents: + - name: explainer + prompt: "Explain this algorithm." + # inherits 'medium' + + - name: architect + reasoning: + effort: high # override + prompt: "Design the system architecture." +``` + +See `examples/reasoning-effort.yaml` for a complete example. + +## Routing Patterns + +### Linear + +```yaml +routes: + - to: next_agent +``` + +### Conditional (first match wins) + +```yaml +routes: + - to: success_agent + when: "{{ output.status == 'approved' }}" + - to: failure_agent + when: "{{ output.status == 'rejected' }}" + - to: default_agent # Fallback (no when clause) +``` + +### Loop-back + +```yaml +routes: + - to: $end + when: "{{ output.score >= 90 }}" + - to: self # Loop back to same agent +``` + +### Terminal + +```yaml +routes: + - to: $end # End workflow +``` + +### Route to parallel/for-each group + +```yaml +routes: + - to: parallel_researchers # Route to a parallel group + - to: item_processors # Route to a for-each group +``` + +## Script Steps + +Script steps run shell commands and capture stdout, stderr, and exit_code: + +```yaml +agents: + - name: check_python + type: script + description: Check the installed Python version + command: python3 + args: ["--version"] + timeout: 30 # Per-script timeout in seconds (optional) + working_dir: /tmp # Working directory (optional, Jinja2 templated) + env: # Extra environment variables (optional) + MY_VAR: "value" + routes: + - to: analyzer + when: "exit_code == 0" + - to: error_handler +``` + +### Script Output + +Script steps always produce three fields: + +```jinja2 +{{ script_name.output.stdout }} # Captured standard output (string) +{{ script_name.output.stderr }} # Captured standard error +{{ script_name.output.exit_code }} # Process exit code (0 = success) +``` + +If `stdout` is **valid JSON**, its top-level keys are auto-merged into the agent's output dict alongside `stdout`/`stderr`/`exit_code`. This enables structured `when:` route conditions instead of opaque exit-code matching: + +```yaml +agents: + - name: classify + type: script + command: python3 + args: ["classify.py"] # prints e.g. {"category": "bug", "score": 87} + routes: + - to: bug_handler + when: "category == 'bug'" # field-based, not exit-code-based + - to: triage +``` + +### Script Routing + +Route conditions use `exit_code` directly (simpleeval syntax): + +```yaml +routes: + - to: next_step + when: "exit_code == 0" + - to: error_handler # Fallback for non-zero exit +``` + +### Script Restrictions + +Script agents **cannot** have: `prompt`, `provider`, `model`, `tools`, `output`, `system_prompt`, `options`, `retry`, `reasoning`, `dialog`, `max_session_seconds`, `max_agent_iterations`, `timeout_seconds` (use `timeout:` instead), `input_mapping`, or `max_depth`. +Command and args support Jinja2 templating for dynamic values. + +## Sub-Workflow Agents (`type: workflow`) + +Reference an external workflow YAML file as a black-box step. The sub-workflow runs with its own engine and inherits the parent's provider configuration. + +```yaml +agents: + - name: deep_research + type: workflow + workflow: ./research-pipeline.yaml # Required: path resolved relative to parent YAML + input: # Optional: explicit input declarations (for explicit context mode) + - workflow.input.topic + input_mapping: # Optional: per-call inputs to the sub-workflow + topic: "{{ workflow.input.topic }}" + depth: "{{ research_planner.output.depth }}" + max_depth: 3 # Optional per-agent recursion cap + # (additionally bounded by global MAX_SUBWORKFLOW_DEPTH = 10) + output: # Optional output schema for validation + findings: + type: string + routes: + - to: synthesizer +``` + +**Semantics:** + +- `workflow` path is resolved relative to the parent workflow file. +- Sub-workflow inherits the parent's provider configuration. +- When `input_mapping` is omitted, the parent's `workflow.input.*` is forwarded as-is. +- `input_mapping` keys are sub-workflow input names; values are Jinja2 expressions evaluated against the parent's context. +- Recursive composition is supported with a global `MAX_SUBWORKFLOW_DEPTH = 10`. Self-referential workflows are allowed; bound recursion further with `max_depth`. +- Each invocation emits `subworkflow_started` / `subworkflow_completed` events. The dashboard supports breadcrumb navigation and double-click dive-in. +- Sub-workflow output is accessible via `{{ agent_name.output.field }}`. + +**Sub-workflows in `for_each` groups** — `type: workflow` agents work inside `for_each` groups for dynamic fan-out, with per-iteration `input_mapping` evaluated against the loop variable: + +```yaml +for_each: + - name: plan_issues + type: for_each + source: epic_planner.output.issues + as: issue + max_concurrent: 1 + agent: + type: workflow + workflow: ./plan-and-review.yaml + input_mapping: + work_item_id: "{{ issue.id }}" + title: "{{ issue.title }}" +``` + +**Restrictions** — workflow steps cannot have `prompt`, `model`, `provider`, `tools`, `system_prompt`, `command`, `options`, `retry`, `reasoning`, `dialog`, `max_session_seconds`, `max_agent_iterations`, or `timeout_seconds`. + +## Dialog Mode + +Dialog mode lets an agent conditionally pause after execution and enter a free-form conversation with the user. A lightweight evaluator LLM call inspects the agent's output against `trigger_prompt` and decides whether to engage. Both Copilot and Claude providers are supported, and the dashboard provides dedicated UI (`DialogDetail`, `DialogEngagementPrompt`, `DialogOverlay`). + +```yaml +agents: + - name: researcher + prompt: "Research the given topic thoroughly" + dialog: + trigger_prompt: | + Enter dialog if the agent expresses uncertainty about + the user's intent, encounters ambiguous requirements, + or needs clarification before proceeding. + Do NOT trigger for minor uncertainties the agent can resolve on its own. + routes: + - to: writer +``` + +Only valid on provider-backed agents (not `script`, `human_gate`, or `workflow`). See `examples/dialog-mode.yaml` for a complete example. + +## Workflow Metadata and Workspace Instructions + +### Metadata + +Attach arbitrary key-value metadata to a workflow for downstream tooling (dashboards, work-item trackers, audit logs). Surfaced in the `workflow_started` event payload. + +```yaml +workflow: + name: implement + metadata: + tracker: ado + template_version: 3 +``` + +CLI metadata is merged on top of YAML metadata (CLI wins on key collision; values stay as strings, no type coercion): + +```bash +conductor run workflow.yaml -m work_item_id=1814 -m sprint=Q3 +``` + +### Workspace Instructions + +Prepend workspace context to every agent prompt. Three options: + +1. **YAML `instructions:`** — first-class field, persisted in checkpoints, inherited into sub-workflows. Best for self-contained workflows where the YAML lives alongside the code. + + ```yaml + workflow: + instructions: + - !file ../AGENTS.md + - "Always respond in English." + ``` + +2. **`--workspace-instructions` CLI flag** — auto-discovers files by walking from CWD to the git root: `AGENTS.md`, `CLAUDE.md`, `.github/copilot-instructions.md`, and `.github/instructions/**/*.instructions.md` (only files with `applyTo: "**"` in YAML frontmatter; scoped or absent-`applyTo` files are skipped per the GitHub Copilot convention). Best for workflows distributed via registry/skills where the YAML lives far from the target repo. + +3. **`--instructions PATH` CLI flag** — explicit path to a file (repeatable). + +All three sources are concatenated and prepended to every agent's prompt as a workspace preamble. + +## File Includes (`!file` Tag) + +Include external file content in YAML using the `!file` tag: + +```yaml +agents: + - name: analyzer + system_prompt: !file prompts/system.md + prompt: !file prompts/analyze.md +``` + +- Paths are **relative to the YAML file's directory** +- If the included file is valid YAML, it's parsed as a data structure +- If it's plain text (e.g., Markdown), it's included as a string +- Supports **recursive includes** — included YAML files can use `!file` too +- Circular references are detected and raise an error + +## Parallel Groups + +Static parallel groups run a fixed set of agents concurrently: + +```yaml +parallel: + - name: parallel_researchers + description: Research from multiple sources + agents: + - web_researcher # At least 2 agents required + - academic_researcher + - news_researcher + failure_mode: continue_on_error # fail_fast, continue_on_error, all_or_nothing + routes: + - to: synthesizer +``` + +### Context Isolation + +Each parallel agent gets an **immutable snapshot** of context at group start. Agents cannot see each other's outputs during execution. + +### Accessing Parallel Outputs + +```jinja2 +{{ parallel_researchers.outputs.web_researcher.summary }} +{{ parallel_researchers.outputs.academic_researcher.findings }} + +# Error access (continue_on_error mode) +{% if parallel_researchers.errors %} +{{ parallel_researchers.errors.news_researcher.message }} +{% endif %} +``` + +### Failure Modes + +| Mode | Behavior | +|------|----------| +| `fail_fast` | Stop immediately on first failure (default) | +| `continue_on_error` | Continue all; proceed if at least one succeeds | +| `all_or_nothing` | Continue all; fail if any agent fails | + +## For-Each Groups + +Dynamic parallel groups process variable-length arrays at runtime: + +```yaml +for_each: + - name: kpi_analyzers + type: for_each # Required discriminator + description: Analyze each KPI + source: finder.output.kpis # Array reference (dotted path, 3+ parts) + as: kpi # Loop variable name + max_concurrent: 5 # Batch size (default: 10, max: 100) + failure_mode: continue_on_error + + agent: # Inline agent template + name: kpi_analyzer + model: claude-sonnet-4.5 + prompt: | + Analyze KPI {{ _index + 1 }}: {{ kpi.name }} + Value: {{ kpi.value }} + output: + analysis: + type: string + score: + type: number + + key_by: kpi.kpi_id # Optional: dict-based outputs + + routes: + - to: aggregator +``` + +### Loop Variables + +| Variable | Description | +|----------|-------------| +| `{{ kpi }}` | Current item (name from `as`) | +| `{{ _index }}` | Zero-based index (0, 1, 2...) | +| `{{ _key }}` | Extracted key (only with `key_by`) | + +### Reserved Variable Names + +Cannot use for `as`: `workflow`, `context`, `output`, `_index`, `_key` + +### Accessing For-Each Outputs + +```jinja2 +# Array access (no key_by) +{{ kpi_analyzers.outputs[0].analysis }} +{% for result in kpi_analyzers.outputs %} +- Score: {{ result.score }} +{% endfor %} + +# Dict access (with key_by) +{{ kpi_analyzers.outputs["KPI-123"].analysis }} + +# Metadata +Total: {{ kpi_analyzers.outputs | length }} +Errors: {{ kpi_analyzers.errors | length }} +``` + +## Human Gates + +Pause workflow for user decisions. Uses **list-based** options: + +```yaml +agents: + - name: approval_gate + type: human_gate + prompt: | + Review the design: + {{ designer.output.design }} + options: + - label: "Approve" + value: approved + route: $end + - label: "Request Changes" + value: changes + route: designer + prompt_for: feedback # Collects text input from user + - label: "Reject" + value: rejected + route: $end +``` + +### Gate Output + +Human gates automatically capture: +- `output.selected` - the `value` of the chosen option +- `output.feedback` - text input from `prompt_for` (if specified) + +## Context Modes + +### Accumulate (default) + +All prior agent outputs available to all agents: + +```yaml +context: + mode: accumulate +``` + +### Last Only + +Only the previous agent's output available: + +```yaml +context: + mode: last_only +``` + +### Explicit + +Only specified inputs available — maximum control, minimal tokens: + +```yaml +context: + mode: explicit + +agents: + - name: agent + input: + - workflow.input.question + - other_agent.output.result # Required + - optional_agent.output? # Optional (? suffix) +``` + +## Multi-Provider Workflows + +Override the provider on individual agents: + +```yaml +workflow: + runtime: + provider: copilot # Default provider + default_model: gpt-5.2 + +agents: + - name: fast_classifier + provider: claude # Uses Claude for this agent + model: claude-haiku-4.5 + prompt: "Classify: {{ workflow.input.text }}" + + - name: deep_analyzer + # Uses default copilot provider + model: gpt-5.2 + prompt: "Analyze: {{ fast_classifier.output.category }}" +``` + +## MCP Server Configuration + +### Stdio server + +```yaml +runtime: + mcp_servers: + web-search: + command: npx + args: ["-y", "open-websearch@latest"] + tools: ["*"] +``` + +### HTTP/SSE server + +```yaml +runtime: + mcp_servers: + remote: + type: http # or "sse" + url: https://mcp.server.example.com/ + headers: + Authorization: "Bearer ${API_TOKEN}" + tools: ["*"] +``` + +### With environment variables + +```yaml +runtime: + mcp_servers: + custom: + command: node + args: ["./server.js"] + env: + API_KEY: "${API_KEY}" # Resolved from environment at runtime + tools: ["*"] +``` + +### Selective tool access + +```yaml +tools: ["search", "fetch"] # Only these tools available +``` + +## Template Variables (Jinja2) + +| Variable | Description | +|----------|-------------| +| `{{ workflow.input.param }}` | Workflow input | +| `{{ workflow.name }}` | Workflow name | +| `{{ workflow.dir }}` | Directory of the workflow YAML file (always available, all context modes) | +| `{{ workflow.file }}` | Absolute path to the workflow YAML file | +| `{{ agent_name.output.field }}` | Agent output | +| `{{ output.field }}` | Current agent output (in routes) | +| `{{ group.outputs.agent.field }}` | Parallel group output | +| `{{ group.outputs[i].field }}` | For-each output (index) | +| `{{ group.outputs["key"].field }}` | For-each output (key_by) | + +### Conditionals + +```jinja2 +{% if previous_agent is defined and previous_agent.output %} +Previous: {{ previous_agent.output.result }} +{% endif %} +``` + +### Loops + +```jinja2 +{% for item in agent.output.items %} +- {{ item }} +{% endfor %} +``` + +### Filters + +```jinja2 +{{ value | upper }} # Uppercase +{{ value | default("fallback") }} # Default value +{{ items | join(", ") }} # Join array +{{ data | json }} # JSON serialize +``` + +## Output Schema + +Map agent outputs to workflow output: + +```yaml +output: + answer: "{{ answerer.output.answer }}" + summary: "{{ reviewer.output.summary }}" + results: "{{ processors.outputs | json }}" +``` + +## Output Types + +### String + +```yaml +output: + answer: + type: string + description: The answer +``` + +### Number + +```yaml +output: + score: + type: number + description: Quality score 0-100 +``` + +### Boolean + +```yaml +output: + approved: + type: boolean +``` + +### Array + +```yaml +output: + items: + type: array + description: List of items + items: + type: string +``` + +### Object + +```yaml +output: + result: + type: object + properties: + name: + type: string + count: + type: number +``` + +## Route Conditions + +### Comparison operators + +```yaml +when: "{{ output.score >= 90 }}" +when: "{{ output.score < 50 }}" +when: "{{ output.status == 'done' }}" +when: "{{ output.status != 'error' }}" +``` + +### Logical operators + +```yaml +when: "{{ output.score >= 90 and output.approved }}" +when: "{{ output.retry or output.force }}" +when: "{{ not output.failed }}" +``` + +### String operations + +```yaml +when: "{{ 'error' in output.message }}" +when: "{{ output.status.startswith('success') }}" +``` + +## Common Patterns + +### Single Agent Q&A + +```yaml +workflow: + name: qa + entry_point: answerer + input: + question: + type: string + required: true + +agents: + - name: answerer + prompt: | + Answer: {{ workflow.input.question }} + output: + answer: + type: string + routes: + - to: $end + +output: + answer: "{{ answerer.output.answer }}" +``` + +### Iterative Refinement + +```yaml +workflow: + name: refine + entry_point: creator + limits: + max_iterations: 5 + +agents: + - name: creator + prompt: | + Create content... + {% if reviewer.output %} + Feedback: {{ reviewer.output.feedback }} + {% endif %} + routes: + - to: reviewer + + - name: reviewer + prompt: | + Review and score 0-100: + {{ creator.output.content }} + output: + score: + type: number + feedback: + type: string + routes: + - to: $end + when: "{{ output.score >= 90 }}" + - to: creator +``` + +### Parallel Research Pipeline + +```yaml +workflow: + name: research + entry_point: planner + context: + mode: explicit + +parallel: + - name: researchers + agents: [web_researcher, academic_researcher] + failure_mode: continue_on_error + routes: + - to: synthesizer + +agents: + - name: planner + routes: + - to: researchers + + - name: web_researcher + input: [planner.output] + prompt: "Web research on {{ planner.output.topic }}" + + - name: academic_researcher + input: [planner.output] + prompt: "Academic research on {{ planner.output.topic }}" + + - name: synthesizer + input: [researchers.outputs] + prompt: "Synthesize: {{ researchers.outputs | json }}" + routes: + - to: $end +``` + +### Human Approval Loop + +```yaml +agents: + - name: designer + routes: + - to: approval + + - name: approval + type: human_gate + prompt: "Review: {{ designer.output.summary }}" + options: + - label: Approve + value: approved + route: $end + - label: Revise + value: changes + route: designer + prompt_for: feedback +``` + +## Validation Rules + +- `entry_point` must reference a valid agent, parallel group, or for-each group +- All agents must be reachable from entry_point +- All paths must eventually reach `$end` +- Route `when` conditions must be valid Jinja2 +- Agent names must be unique +- Non-gate agents require at least one route +- Parallel groups need at least 2 agents +- For-each `source` must be dotted path with 3+ parts +- For-each `as` cannot use reserved names diff --git a/src/conductor/expert/knowledge/execution.md b/src/conductor/expert/knowledge/execution.md new file mode 100644 index 0000000..9a9e35c --- /dev/null +++ b/src/conductor/expert/knowledge/execution.md @@ -0,0 +1,629 @@ +# Workflow Execution Guide + +Complete reference for running, validating, and debugging Conductor workflows. + +## CLI Commands + +### conductor run + +Execute a workflow: + +```bash +conductor run [OPTIONS] +``` + +| Option | Description | +|--------|-------------| +| `--input`, `-i NAME=VALUE` | Workflow input (repeatable) | +| `--input.NAME=VALUE` | Alternative input syntax | +| `--metadata`, `-m KEY=VALUE` | Workflow metadata, merged on top of YAML `metadata:` (repeatable; values stay strings) | +| `--provider`, `-p PROVIDER` | Override provider (`copilot`, `claude`, `openai-agents`) | +| `--dry-run` | Show execution plan only | +| `--skip-gates` | Auto-select first option at human gates | +| `--web` | Start real-time web dashboard | +| `--web-bg` | Run in background, print dashboard URL, exit | +| `--web-port PORT` | Port for web dashboard (0 = auto) | +| `--no-interactive` | Disable Esc-to-interrupt capability | +| `--log-file`, `-l PATH` | Write full debug output to file (`auto` for auto-generated) | +| `--workspace-instructions` | Auto-discover `AGENTS.md`, `CLAUDE.md`, `.github/copilot-instructions.md`, and `.github/instructions/**/*.instructions.md` (only files marked `applyTo: "**"`) and prepend them to every agent prompt | +| `--instructions PATH` | Path to a specific instruction file to prepend (repeatable) | + +**Global options** (before the subcommand): + +| Option | Description | +|--------|-------------| +| `--quiet`, `-q` | Minimal output: agent lifecycle and routing only | +| `--silent`, `-s` | No progress output. Only JSON result on stdout | +| `--version`, `-v` | Show version and exit | + +> **Note:** Full output is shown by default (prompts, tool calls, reasoning). Use `-q` for minimal output or `-s` for JSON-only. `--quiet` and `--silent` are mutually exclusive. + +**Examples:** + +```bash +# Standard run (full output by default) +conductor run workflow.yaml --input question="Hello" + +# Quiet mode (lifecycle + routing only) +conductor -q run workflow.yaml --input question="Hello" + +# Silent mode (JSON result only, no progress) +conductor -s run workflow.yaml --input question="Hello" + +# Log full debug output to auto-generated file +conductor run workflow.yaml --log-file auto + +# Silent terminal + full file logging +conductor -s run workflow.yaml --log-file auto + +# Multiple inputs +conductor run workflow.yaml -i topic="AI" -i depth="detailed" + +# Skip human gates for automation +conductor run workflow.yaml --skip-gates + +# Dry run to preview execution plan +conductor run workflow.yaml --dry-run + +# Override provider +conductor run workflow.yaml -p claude + +# Attach metadata (merged on top of YAML metadata; values are strings) +conductor run workflow.yaml -m tracker=ado -m work_item_id=42 + +# Auto-discover workspace instructions and prepend to all prompts +conductor run workflow.yaml --workspace-instructions + +# Prepend explicit instruction file(s) +conductor run workflow.yaml --instructions AGENTS.md --instructions notes.md + +# Start real-time web dashboard +conductor run workflow.yaml --web --input question="Hello" + +# Background mode: prints URL and exits immediately +conductor run workflow.yaml --web-bg --input question="Hello" +``` + +The `--web` flag opens a browser dashboard with a DAG visualization showing live agent status, streaming reasoning/tool calls, and an agent detail panel. The `--web-bg` flag forks a background process and exits immediately. `--web` and `--web-bg` are mutually exclusive. + +Background workflows can be stopped with `conductor stop` (see below) or via the stop button in the web dashboard. + +### conductor stop + +Stop background workflow processes launched with `--web-bg`: + +```bash +conductor stop [OPTIONS] +``` + +| Option | Description | +|--------|-------------| +| `--port PORT` | Stop the workflow running on this specific port | +| `--all` | Stop all background conductor workflows | + +With no options, lists running workflows and auto-stops if exactly one is found. + +**Examples:** + +```bash +# Stop the only running background workflow +conductor stop + +# Stop a specific workflow by port +conductor stop --port 8080 + +# Stop all running background workflows +conductor stop --all +``` + +### conductor update + +Check for the latest version of Conductor and (optionally) launch the installer: + +```bash +conductor update # Check + print install command +conductor update --apply # Check + launch installer (then exit) +``` + +The command: + +1. Fetches the latest release from the GitHub Releases API. +2. Compares the remote version with the locally installed version. +3. **Default:** prints the OS-appropriate `install.sh` / `install.ps1` one-liner you can paste into a fresh shell. +4. **`--apply`:** spawns the install script as a fully detached process and exits the current `conductor` so file locks release. On Windows the installer opens in a new console window; on POSIX `os.execvpe` replaces the process. This is the only way to upgrade-while-running cleanly on Windows (in-process self-upgrade was removed because the running interpreter sits inside the venv that `uv tool install --force` is trying to recreate). +5. Clears the update-check cache on success. + +If already up to date, prints a confirmation and exits. + +The legacy `--force` flag is accepted as a no-op for backward compatibility. + +**Passive update hints:** On every CLI invocation, Conductor checks for updates (cached 24h, 2-second timeout) and prints a one-line hint if a newer version is available. Suppressed when stderr is not a TTY, when `--silent` is set, when the subcommand is `update`, or when `CONDUCTOR_NO_UPDATE_CHECK=1`. + +### conductor resume + +Resume a workflow from a checkpoint after failure. Run-flag parity: most `run` flags work identically. + +```bash +conductor resume [OPTIONS] +conductor resume --from [OPTIONS] +``` + +| Option | Description | +|--------|-------------| +| `--from PATH` | Resume from a specific checkpoint file | +| `--provider`, `-p PROVIDER` | Override provider for the resumed run | +| `--metadata`, `-m KEY=VALUE` | Workflow metadata, merged on top of YAML metadata (repeatable) | +| `--skip-gates` | Auto-select first option at human gates | +| `--log-file`, `-l PATH` | Write debug output to file | +| `--no-interactive` | Disable Esc-to-interrupt | +| `--web` | Start real-time web dashboard for the resumed run | +| `--web-port PORT` | Port for the dashboard (0 = auto) | +| `--web-bg` | Fork a detached resume + dashboard process and exit | + +`--web` and `--web-bg` are mutually exclusive. The dashboard only shows events from the resumed agent forward — events emitted in the original process before the checkpoint are not replayed. + +Intentionally **not** mirrored on `resume`: `--input` / `--workspace-instructions` / `--instructions` (restored from the checkpoint), and `--dry-run` (incompatible with mid-run resumption). + +When a workflow fails, Conductor automatically saves a checkpoint to `$TMPDIR/conductor/checkpoints/`. The checkpoint contains all prior agent outputs, the workflow state, and the resolved `instructions_preamble`, enabling seamless resumption from the failed agent. + +**Examples:** + +```bash +conductor resume workflow.yaml # latest checkpoint +conductor resume --from /tmp/conductor/checkpoints/my-workflow-20260303-153000.json +conductor resume workflow.yaml --provider claude +conductor resume workflow.yaml --metadata tracker=ado -m work_item_id=42 +conductor resume workflow.yaml --web +conductor resume workflow.yaml --web-bg +conductor resume workflow.yaml --log-file auto +``` + +**Behavior:** +- If the workflow file has changed since the checkpoint was saved, a warning is displayed but resumption proceeds +- Execution resumes from the exact agent that failed +- All prior agent outputs and the workspace `instructions_preamble` are restored from the checkpoint + +### conductor checkpoints + +List available workflow checkpoints: + +```bash +conductor checkpoints [workflow.yaml] +``` + +Shows all checkpoint files with metadata: workflow name, timestamp, failed agent, and error type. Optionally filter by workflow file. + +**Examples:** + +```bash +# List all checkpoints +conductor checkpoints + +# List checkpoints for a specific workflow +conductor checkpoints workflow.yaml +``` + +### conductor validate + +Validate without executing: + +```bash +conductor validate +``` + +Performs both schema and **semantic** checks: + +- YAML syntax +- Required fields and schema structure (unknown fields on `AgentDef`, `ParallelGroup`, `ForEachDef`, and `WorkflowConfig` are rejected, not silently dropped) +- Agent references and route targets +- Route reachability and `$end` reachability +- Template syntax +- Parallel group agent references +- For-each `source` format and reserved names +- Stale agent references and undeclared explicit-mode dependencies in `prompt`, `system_prompt`, `command`, `args`, `working_dir`, `input_mapping`, parallel-group inputs, and workflow `output:` templates +- Warning when an agent defines `system_prompt` but no `prompt:` (portability hazard since the Claude provider drops `system_prompt`) + +The success summary table includes Parallel Groups and For-each Groups counts. + +### conductor show + +Show inputs, agents, parallel/for-each groups, and outputs for a workflow without running it. Accepts a local file path or a registry reference. + +```bash +conductor show +``` + +**Examples:** + +```bash +conductor show ./my-workflow.yaml +conductor show qa-bot +conductor show qa-bot@my-registry@1.0.0 +``` + +Prints a sample `conductor run …` command pre-populated with the discovered inputs. + +### conductor replay + +Replay a recorded workflow run in the dashboard with a timeline scrubber: + +```bash +conductor replay [--web-port PORT] +``` + +The log file can be: +- A JSON array downloaded from the dashboard (`GET /api/logs`) +- A JSONL file written by the `EventLogSubscriber` (e.g. `$TMPDIR/conductor/conductor--.events.jsonl`) + +```bash +conductor replay conductor-logs.json +conductor replay /tmp/conductor/conductor-my-workflow-20260101-120000.events.jsonl +``` + +### conductor registry + +Manage workflow registries — named sources of shared workflows (GitHub repos or local directories). + +```bash +conductor registry [OPTIONS] +``` + +| Subcommand | Description | +|------------|-------------| +| `list [name]` | List registries, or workflows in a specific registry (also shows latest tags) | +| `add ` | Add a registry. Options: `--type github\|path` (default github), `--default` | +| `remove ` | Remove a registry | +| `set-default ` | Set the default registry | +| `update [name]` | Refresh index and re-resolve latest versions | +| `show ` | Show metadata for a workflow reference | + +**Examples:** + +```bash +conductor registry add official myorg/conductor-workflows --default +conductor registry add local /path/to/workflows --type path +conductor registry list # show configured registries +conductor registry list official # show workflows in a registry +conductor registry set-default official +conductor registry update # refresh all indexes +conductor registry show qa-bot@official@1.0.0 # show workflow metadata +``` + +**Running from a registry:** + +```bash +conductor run qa-bot # latest from default registry +conductor run qa-bot@official # latest from named registry +conductor run qa-bot@official@1.2.3 # explicit version +conductor run qa-bot@@1.2.3 # explicit version from default registry +conductor run sdd/plan#main # branch/tag/SHA via "#ref" suffix +conductor run sdd/plan#abc1234 # explicit commit +``` + +`latest` (and bare `name@registry` refs without a `#ref`) resolve to the **default branch HEAD**, not the newest tag — pin explicitly with `workflow#v1.2.3` for releases. Registry workflows are cached locally at `~/.conductor/cache/registries/`. Explicit refs are immutable; bare names re-resolve on `conductor registry update`. + +## Execution Flow + +1. **Load** — Parse YAML and validate structure +2. **Initialize** — Set up provider(s) and MCP servers +3. **Execute** — Run agents following routes: + - Sequential agents execute one at a time + - Parallel groups execute agents concurrently with context snapshots + - For-each groups spawn N agent instances from runtime array +4. **Collect** — Gather outputs per schema +5. **Return** — Output final result as JSON + +### Iteration Counting + +- Each agent execution counts as 1 iteration +- Parallel agents count individually (3 parallel agents = 3 iterations) +- For-each instances each count as 1 iteration +- Loop-back patterns increment the counter on each cycle + +## Cost Tracking + +Conductor tracks token usage and costs automatically: + +```yaml +cost: + show_per_agent: true # Per-agent cost breakdown + show_summary: true # Total cost summary at end + pricing: # Override default pricing + custom-model: + input_per_mtok: 3.0 + output_per_mtok: 15.0 + cache_read_per_mtok: 0.3 + cache_write_per_mtok: 3.75 +``` + +Output includes input/output token counts and estimated costs per agent and in total. + +## Debugging + +### Default Output + +Full output is shown by default: + +```bash +conductor run workflow.yaml --input question="test" +``` + +Shows: +- Agent execution order +- Full prompt content (untruncated) +- Output received +- Route decisions +- Tool call arguments and reasoning +- Token usage and costs per agent + +Use `--quiet` for minimal output (lifecycle + routing only) or `--silent` for JSON-only. + +### Log File + +```bash +conductor run workflow.yaml --log-file auto +conductor -s run workflow.yaml --log-file debug.log +``` + +Capture full debug output to a file. Combine with `--silent` for quiet terminal with full logging. Auto mode generates files in `$TMPDIR/conductor/`. + +### Dry Run + +```bash +conductor run workflow.yaml --dry-run +``` + +Preview execution plan without running agents. Shows the workflow graph, agent order, and configuration. + +### Web Dashboard + +```bash +conductor run workflow.yaml --web --input question="test" +``` + +Real-time browser dashboard for visualizing and interacting with workflows as they run: + +- **Interactive DAG graph** — Zoomable, draggable workflow graph with animated edges showing execution flow and conditional routing +- **Live agent streaming** — Watch agent reasoning, tool calls, and outputs stream in real-time as each step executes +- **Three-pane layout** — Resizable panels for the graph, agent detail, and a tabbed output pane (Log, Activity, Output) +- **In-browser human gates** — Respond to human-in-the-loop decisions directly in the dashboard +- **Per-node detail** — Click any node to see its prompt, metadata (model, tokens, cost), activity stream, and output +- **Background mode** — Run with `--web-bg` to start in background, print URL, and exit + +```bash +# Background mode: prints dashboard URL and exits +conductor run workflow.yaml --web-bg --input question="test" + +# Stop background workflow +conductor stop +``` + +### Validate First + +```bash +conductor validate workflow.yaml +``` + +Catch configuration errors before execution of new workflows. Reports agent count, parallel groups, for-each groups, human gates, and more. + +### Check Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success | +| 1 | Workflow error | +| 2 | Validation error | +| 3 | Timeout | +| 4 | Max iterations exceeded | + +## Common Errors + +### "Missing required input" + +``` +Error: Missing required input: question +``` + +**Fix:** Provide all required inputs: +```bash +conductor run workflow.yaml --input question="value" +``` + +### "Unknown agent: X" + +``` +Error: Route references unknown agent: reviewer +``` + +**Fix:** Check agent/group name spelling matches in routes. + +### "Unreachable agent" + +``` +Error: Agent 'helper' is not reachable from entry_point +``` + +**Fix:** Add route to the agent or remove if unused. + +### "Max iterations exceeded" + +``` +Error: Workflow exceeded max_iterations (10) +``` + +**Fix:** Increase limit or fix loop condition: +```yaml +limits: + max_iterations: 50 # Max: 500 +``` + +### "Timeout" + +``` +Error: Workflow timed out after 600 seconds +``` + +**Fix:** Increase timeout: +```yaml +limits: + timeout_seconds: 1200 +``` + +### Template Errors + +``` +Error: Undefined variable 'agent_name' in template +``` + +**Fix:** Check variable exists or use conditional: +```jinja2 +{% if agent_name is defined %} +{{ agent_name.output.field }} +{% endif %} +``` + +### "Parallel groups must contain at least 2 agents" + +**Fix:** Add at least 2 agents to the parallel group. + +### "Invalid source format" (for-each) + +**Fix:** Use dotted path with 3+ parts: `agent_name.output.field` + +### "Loop variable conflicts with reserved name" + +**Fix:** Choose a different `as` name. Reserved: `workflow`, `context`, `output`, `_index`, `_key` + +## Human Gates + +When workflow reaches a human gate: + +1. **Display** — Shows prompt and options in terminal +2. **Wait** — Pauses for user selection +3. **Capture** — Records selected value and optional text input (prompt_for) +4. **Route** — Continues to the route specified on the selected option + +### Skip Gates for Automation + +```bash +conductor run workflow.yaml --skip-gates +``` + +Auto-selects the first option at each gate. + +## Interactive Interrupt + +During execution, press **Esc** or **Ctrl+G** to pause the workflow. An interactive menu appears with these actions: + +| Action | Description | +|--------|-------------| +| **Continue with guidance** | Provide text guidance that is appended to subsequent agent prompts | +| **Skip to agent** | Jump to a specific agent in the workflow | +| **Stop** | Stop the workflow entirely | +| **Cancel** | Resume execution as-is | + +Guidance text accumulates across multiple interrupts and is injected into agent context. + +Disable with `--no-interactive`. In `--skip-gates` mode, interrupts auto-cancel. + +## Checkpoint & Resume + +When a workflow fails, Conductor automatically saves a checkpoint containing: +- All completed agent outputs +- Current workflow state and iteration count +- Workflow file hash (to detect changes) +- Failure details (agent, error type, message) + +Checkpoints are stored in `$TMPDIR/conductor/checkpoints/`. + +```bash +# List available checkpoints +conductor checkpoints + +# Resume from latest checkpoint for a workflow +conductor resume workflow.yaml + +# Resume from a specific checkpoint file +conductor resume --from /tmp/conductor/checkpoints/my-workflow-20260303-153000.json +``` + +If the workflow file has changed since the checkpoint was saved, a warning is displayed but resumption proceeds. + +## Provider Configuration + +### Override Provider + +```bash +conductor run workflow.yaml -p claude # Use Claude for all agents +conductor run workflow.yaml -p copilot # Use Copilot (default) +conductor run workflow.yaml -p openai-agents # Use OpenAI Agents SDK +``` + +### Per-Agent Provider Override + +Set `provider` on individual agents in YAML for multi-provider workflows: + +```yaml +agents: + - name: fast_task + provider: claude + model: claude-haiku-4.5 + - name: complex_task + # Uses workflow default provider + model: gpt-5.2 +``` + +## Output Handling + +Workflow output is JSON: + +```json +{ + "answer": "Python is a programming language...", + "confidence": 0.95 +} +``` + +### Capture Output + +```bash +# Save to file +conductor run workflow.yaml --input q="test" > output.json + +# Parse with jq +conductor run workflow.yaml --input q="test" | jq '.answer' +``` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `GITHUB_TOKEN` | GitHub Copilot authentication | +| `ANTHROPIC_API_KEY` | Claude provider API key | +| `OPENAI_API_KEY` | OpenAI Agents provider API key (when `provider: openai-agents`) | +| `CONDUCTOR_LOG_LEVEL` | Logging level (DEBUG, INFO, WARNING, ERROR) | +| `CONDUCTOR_NO_UPDATE_CHECK` | Set to `1` to suppress the passive update-check hint | + +Environment variables in YAML configs support `${VAR}` and `${VAR:-default}` interpolation syntax. + +## Performance Tips + +1. **Use appropriate models** — Smaller models (Haiku) for simple tasks, larger (Sonnet/Opus) for complex reasoning +2. **Use `explicit` context mode** — Reduces token usage by only passing declared inputs +3. **Set timeouts** — Prevent runaway workflows with `limits.timeout_seconds` +4. **Use parallel groups** — Run independent agents concurrently +5. **Use for-each groups** — Process arrays in parallel with `max_concurrent` batching +6. **Set `max_tokens`** — Limit output length to save costs (especially with Claude) +7. **Use per-agent provider** — Pick the best model/provider for each task + +## Debugging Checklist + +1. [ ] Run `conductor validate workflow.yaml` +2. [ ] Check all agent/group names match between definition and routes +3. [ ] Verify entry_point exists as an agent, parallel group, or for-each group +4. [ ] Ensure all paths lead to `$end` +5. [ ] Test with `--dry-run` first +6. [ ] Check template variables are defined before use +7. [ ] Verify for-each `source` resolves to an array +8. [ ] Check parallel groups have 2+ agents +9. [ ] Review cost output for unexpected token usage diff --git a/src/conductor/expert/knowledge/yaml-schema.md b/src/conductor/expert/knowledge/yaml-schema.md new file mode 100644 index 0000000..a2be3bf --- /dev/null +++ b/src/conductor/expert/knowledge/yaml-schema.md @@ -0,0 +1,586 @@ +# Conductor Schema Reference + +Complete reference for all YAML configuration options. Derived from the Pydantic models in `src/conductor/config/schema.py`. + +## Top-Level Structure + +```yaml +workflow: WorkflowDef # Required: workflow configuration +tools: [string] # Optional: workflow-level tool names +agents: [AgentDef] # Required: agent definitions +parallel: [ParallelGroup] # Optional: static parallel groups +for_each: [ForEachDef] # Optional: dynamic parallel groups +output: {field: template} # Optional: final output templates +``` + +## Workflow Schema + +```yaml +workflow: + # Required fields + name: string # Unique workflow identifier + entry_point: string # Name of first agent, parallel group, or for-each group + + # Optional fields + description: string # Human-readable description + version: string # Semantic version (e.g., "1.0.0") + + # Runtime configuration + runtime: + provider: string # "copilot" (default), "claude", or "openai-agents" + default_model: string # Default model for all agents + temperature: float # 0.0-1.0, controls randomness (optional) + max_tokens: integer # Max OUTPUT tokens per response, 1-200000 (optional) + timeout: float # Per-request timeout in seconds (optional, default: 600) + max_agent_iterations: integer # Max tool-use roundtrips per agent (1-500, optional) + max_session_seconds: float # Wall-clock timeout per agent session in seconds (optional) + default_reasoning_effort: string # Workflow-wide reasoning/thinking effort: low, medium, high, xhigh (optional) + mcp_servers: # MCP server configurations + : + type: string # "stdio" (default), "http", or "sse" + command: string # Command to run (required for stdio) + args: [string] # Command arguments (stdio only) + url: string # Server URL (required for http/sse) + headers: {string: string} # HTTP headers (http/sse only) + timeout: integer # Timeout in milliseconds (optional) + tools: [string] # Tool whitelist, ["*"] for all (default: ["*"]) + env: {string: string} # Environment variables (stdio only) + + # Input parameters + input: + : + type: string # "string", "number", "boolean", "array", "object" + required: boolean # Default: true + default: any # Default value (must match declared type) + description: string # Parameter description + + # Context management + context: + mode: string # "accumulate" (default), "last_only", "explicit" + max_tokens: integer # Maximum context tokens (optional) + trim_strategy: string # "truncate", "drop_oldest", "summarize" + + # Safety limits + limits: + max_iterations: integer # Max agent executions (default: 10, range: 1-500) + timeout_seconds: integer # Total workflow timeout in seconds (optional, no default) + + # Cost tracking + cost: + show_per_agent: boolean # Show cost per agent in verbose output (default: true) + show_summary: boolean # Show cost summary at end (default: true) + pricing: # Custom pricing overrides + : + input_per_mtok: float # Cost per million input tokens (USD) + output_per_mtok: float # Cost per million output tokens (USD) + cache_read_per_mtok: float # Cost per million cache read tokens (default: 0.0) + cache_write_per_mtok: float # Cost per million cache write tokens (default: 0.0) + + # Lifecycle hooks + hooks: + on_start: string # Template executed at workflow start + on_complete: string # Template executed on success + on_error: string # Template executed on failure + + # Arbitrary metadata for downstream tooling (dashboards, work-item trackers) + # Surfaced verbatim in the workflow_started event. + metadata: {string: any} # Optional. Merged with --metadata / -m CLI flags (CLI wins). + + # Workspace context prepended to every agent prompt + # Each entry is either a !file include or an inline string. + # For workflows distributed via registry, prefer the --workspace-instructions + # CLI flag for runtime auto-discovery. + instructions: + - !file ../AGENTS.md + - "Always respond in English." +``` + +## Agent Schema + +```yaml +agents: + - # Required fields + name: string # Unique agent identifier + + # Optional fields + type: string # "agent" (default), "human_gate", "script", or "workflow" + description: string # What this agent does + model: string # Override default_model + provider: string # Per-agent provider override ("copilot" or "claude") + + # Input specification (for explicit context mode) + input: + - string # Reference paths, e.g., "workflow.input.question" + # Use "?" suffix for optional: "other_agent.output?" + + # Prompt templates + system_prompt: string # System message (always included, optional) + prompt: string # Jinja2 template for agent instructions + + # Output schema + output: + : + type: string # "string", "number", "boolean", "array", "object" + description: string # Field description + items: # For array types: schema of items + type: string + properties: # For object types: schema of properties + : + type: string + description: string + + # Routing rules (evaluated in order, first match wins) + routes: + - to: string # Target: agent name, parallel group, for-each group, "$end", or "self" + when: string # Optional Jinja2 condition + output: {string: string} # Optional output transformation templates + + # Agent-level tools + tools: # null = all workflow tools, [] = none, [list] = subset + - string + + # Agent-level limits (override workflow runtime defaults) + max_agent_iterations: integer # Max tool-use roundtrips for this agent (1-500, optional) + max_session_seconds: float # Soft wall-clock timeout per session (checked between iterations) + timeout_seconds: float # Hard wall-clock timeout (>=1.0); engine wraps in asyncio.wait_for(). + # Effective limit = min(timeout_seconds, remaining_workflow_timeout). + # Raises AgentTimeoutError; non-retryable. + # Forbidden on script (use 'timeout' instead), human_gate, workflow. + + # Per-agent reasoning effort (overrides runtime.default_reasoning_effort) + # Not allowed for script, human_gate, or workflow agent types. + reasoning: + effort: string # low, medium, high, or xhigh + + # Per-agent retry policy (optional, not allowed for script, human_gate, or workflow agents) + retry: + max_attempts: integer # Max attempts including first (1-10, default: 1 = no retry) + backoff: string # "exponential" (default) or "fixed" + delay_seconds: float # Base delay in seconds (0-300, default: 2.0) + retry_on: # Error categories to retry (default: ["provider_error", "timeout"]) + - string # "provider_error" (API 500s, rate limits) or "timeout" + + # Conditional dialog mode (optional, only on provider-backed agents) + dialog: + trigger_prompt: string # Criteria evaluated against agent output by an LLM gate + + # Sub-workflow fields (type: workflow) + workflow: string # Path to sub-workflow YAML (relative to parent), required + input_mapping: # Optional Jinja2 expressions per sub-workflow input parameter + : string # e.g. "{{ task_manager.output.current_issue_id }}" + max_depth: integer # Optional per-agent recursion cap (1-10). + # Bounded additionally by global MAX_SUBWORKFLOW_DEPTH = 10. + + # Script-only fields (type: script) + command: string # Command to execute (Jinja2 templated) + args: [string] # Command arguments (each Jinja2 templated) + env: {string: string} # Extra environment variables + working_dir: string # Working directory (Jinja2 templated) + timeout: integer # Per-script timeout in seconds +``` + +**Script agent restrictions:** Cannot have `prompt`, `provider`, `model`, `tools`, `output`, `system_prompt`, `options`, `retry`, `reasoning`, `dialog`, `max_session_seconds`, `max_agent_iterations`, `timeout_seconds` (use `timeout`), `input_mapping`, or `max_depth`. Output is always `{stdout, stderr, exit_code}`. If `stdout` is valid JSON, its top-level keys are auto-merged into the output dict. + +**Workflow agent restrictions (`type: workflow`):** Cannot have `prompt`, `model`, `provider`, `tools`, `system_prompt`, `command`, `options`, `retry`, `reasoning`, `dialog`, `max_session_seconds`, `max_agent_iterations`, or `timeout_seconds`. Requires `workflow:` path. Supports `input_mapping` and `max_depth`. Allowed inside `for_each` groups for dynamic fan-out. + +**Reasoning effort:** `reasoning.effort` (and `runtime.default_reasoning_effort`) accepts `low`, `medium`, `high`, or `xhigh`. Per-agent value overrides the runtime default. Each provider translates the unified value to its native API: + +- **Copilot**: forwards `reasoning_effort` to the session. Validated against the model's advertised `supported_reasoning_efforts` (when available); raises `ValidationError` for unsupported combinations. +- **Claude**: enables extended thinking via `thinking={"type":"enabled","budget_tokens":N}` with mapping low=2048, medium=8192, high=16384, xhigh=32768. Auto-coerces `temperature=1.0` (Anthropic API requirement) and bumps `max_tokens` to fit `budget+4096` (capped at 64000). Only valid on thinking-capable models (Claude 3.7+, Opus/Sonnet/Haiku 4.x); raises `ValidationError` otherwise. + +Both providers continue to surface reasoning content via `agent_reasoning` events visible in the dashboard, JSONL logs, and console at `-vv`. + +Forbidden on agent types: `script`, `human_gate`, `workflow`. + +## Script Agent Schema + +Script agents run shell commands instead of LLM prompts: + +```yaml +agents: + - name: string + type: script # Required + description: string # Optional + command: string # Required: command to run (Jinja2 templated) + args: [string] # Optional: arguments (each Jinja2 templated) + env: {string: string} # Optional: extra environment variables + working_dir: string # Optional: working directory (Jinja2 templated) + timeout: integer # Optional: timeout in seconds + input: [string] # Optional: context dependencies + routes: # Required: routing rules + - to: string + when: string # Can use exit_code (simpleeval syntax) +``` + +### Script Output + +Script agents always produce: + +```jinja2 +{{ script_name.output.stdout }} # Captured standard output +{{ script_name.output.stderr }} # Captured standard error +{{ script_name.output.exit_code }} # Process exit code (0 = success) +``` + +## File Includes (`!file` Tag) + +Include external file content anywhere in YAML: + +```yaml +agents: + - name: analyzer + system_prompt: !file prompts/system.md # Included as string + prompt: !file prompts/analyze.md # Included as string + output: !file schemas/analyzer-output.yaml # Included as YAML structure +``` + +- Paths resolve **relative to the YAML file's directory** +- Plain text files (Markdown, etc.) are included as strings +- YAML files are parsed and included as data structures +- Supports **recursive includes** (included YAML files can use `!file`) +- Circular references are detected and raise `ConfigurationError` + +## Human Gate Schema + +Human gates use a **list-based** options format: + +```yaml +agents: + - name: string + type: human_gate + prompt: string # Jinja2 template shown to user + + options: # List of choices (required for human_gate) + - label: string # Display text for the option + value: string # Value stored when selected + route: string # Agent to route to when selected + prompt_for: string # Optional: field name to collect text input from user + + output: # Captured automatically + selected: # The selected option value + type: string + feedback: # Text from prompt_for (if used) + type: string +``` + +## Parallel Group Schema + +Static parallel groups execute a fixed list of agents concurrently: + +```yaml +parallel: + - name: string # Unique group identifier + description: string # Optional description + agents: # At least 2 agent names required + - string + failure_mode: string # "fail_fast" (default), "continue_on_error", "all_or_nothing" + routes: # Routes after group completes + - to: string + when: string +``` + +### Accessing Parallel Outputs + +```jinja2 +{{ group_name.outputs.agent_name.field }} # Successful agent output +{{ group_name.errors.agent_name.message }} # Error details (continue_on_error) +``` + +## For-Each (Dynamic Parallel) Schema + +For-each groups spawn N agent instances at runtime from an array: + +```yaml +for_each: + - name: string # Unique group identifier + type: for_each # Required discriminator + description: string # Optional description + source: string # Array reference (e.g., "finder.output.items") + # Must be dotted path with at least 3 parts + as: string # Loop variable name (must be valid identifier) + # Reserved: workflow, context, output, _index, _key + max_concurrent: integer # Concurrent limit per batch (default: 10, range: 1-100) + failure_mode: string # "fail_fast" (default), "continue_on_error", "all_or_nothing" + key_by: string # Optional: path to extract key for dict-based outputs + + agent: # Inline agent definition (template for each item) + name: string + model: string + prompt: string # Has access to {{ }}, {{ _index }}, {{ _key }} + output: + : {type: string} + + routes: + - to: string + when: string +``` + +### Loop Variables + +| Variable | Description | +|----------|-------------| +| `{{ }}` | Current item from the source array | +| `{{ _index }}` | Zero-based index of current item | +| `{{ _key }}` | Extracted key value (only when `key_by` is set) | + +### Accessing For-Each Outputs + +```jinja2 +# Without key_by (array access) +{{ group_name.outputs[0].field }} +{{ group_name.outputs | length }} +{% for result in group_name.outputs %}...{% endfor %} + +# With key_by (dict access) +{{ group_name.outputs["key_value"].field }} + +# Errors +{{ group_name.errors }} # Dict of failed items +{{ group_name.errors | length }} +``` + +## Output Schema + +```yaml +output: + : string # Jinja2 template referencing agent outputs + # e.g., "{{ agent_name.output.field }}" +``` + +## Type System + +### Supported Types + +| Type | Description | Example Values | +|------|-------------|----------------| +| `string` | Text | `"hello"`, `"multi\nline"` | +| `number` | Integer or float | `42`, `3.14` | +| `boolean` | True/false | `true`, `false` | +| `array` | List of items | `["a", "b", "c"]` | +| `object` | Key-value pairs | `{"key": "value"}` | + +### Array Type Definition + +```yaml +output: + items: + type: array + description: List of items + items: + type: string +``` + +### Object Type Definition + +```yaml +output: + result: + type: object + description: Structured result + properties: + name: + type: string + description: Item name + count: + type: number + description: Item count +``` + +## Template Syntax + +### Variable Access + +```jinja2 +{{ workflow.input.param_name }} # Workflow input +{{ workflow.name }} # Workflow name +{{ workflow.description }} # Workflow description +{{ workflow.dir }} # Directory containing the workflow YAML (all context modes) +{{ workflow.file }} # Absolute path to the workflow YAML +{{ agent_name.output.field }} # Agent output field +{{ output.field }} # Current agent output (in routes) +``` + +### Conditionals + +```jinja2 +{% if condition %} + content +{% elif other_condition %} + other content +{% else %} + fallback content +{% endif %} +``` + +### Checking for Defined Variables + +```jinja2 +{% if agent_name is defined and agent_name.output %} + {{ agent_name.output.field }} +{% endif %} +``` + +### Loops + +```jinja2 +{% for item in agent_name.output.items %} + - {{ item }} +{% endfor %} +``` + +### Filters + +```jinja2 +{{ value | upper }} # Uppercase +{{ value | lower }} # Lowercase +{{ value | default("fallback") }} # Default value +{{ value | length }} # Length +{{ value | join(", ") }} # Join array +{{ value | json }} # JSON serialize +``` + +## Route Conditions + +### Comparison Operators + +```yaml +when: "{{ output.score >= 90 }}" # Greater than or equal +when: "{{ output.score < 50 }}" # Less than +when: "{{ output.status == 'done' }}" # Equality +when: "{{ output.status != 'error' }}"# Inequality +``` + +### Logical Operators + +```yaml +when: "{{ output.score >= 90 and output.approved }}" +when: "{{ output.retry or output.force }}" +when: "{{ not output.failed }}" +``` + +### String Operations + +```yaml +when: "{{ 'error' in output.message }}" +when: "{{ output.status.startswith('success') }}" +``` + +### simpleeval Syntax (legacy) + +```yaml +when: "status == 'success'" # Without Jinja2 braces +when: "score > 5 and valid" +``` + +## MCP Server Examples + +### Stdio Server + +```yaml +runtime: + mcp_servers: + web-search: + command: sh + args: ["-c", "MODE=stdio DEFAULT_SEARCH_ENGINE=bing exec npx -y open-websearch@latest"] + tools: ["*"] +``` + +### HTTP Server + +```yaml +runtime: + mcp_servers: + remote-api: + type: http + url: https://mcp.server.example.com/ + headers: + Authorization: "Bearer ${API_TOKEN}" + tools: ["*"] +``` + +### SSE Server + +```yaml +runtime: + mcp_servers: + streaming: + type: sse + url: https://sse.server.example.com/ + tools: ["*"] +``` + +### With Environment Variables + +```yaml +runtime: + mcp_servers: + custom: + command: node + args: ["./server.js"] + env: + API_KEY: "${API_KEY}" + DEBUG: "true" + tools: ["*"] +``` + +### Selective Tool Access + +```yaml +runtime: + mcp_servers: + web-search: + command: npx + args: ["-y", "open-websearch@latest"] + tools: ["search", "fetch"] # Only these tools (not ["*"]) +``` + +## Validation Rules + +### Workflow Validation + +- `name` must be present and non-empty +- `entry_point` must reference a valid agent, parallel group, or for-each group +- All referenced agents/groups must be defined +- Input parameter names must be valid identifiers +- Unknown fields on `WorkflowConfig`, `AgentDef`, `ParallelGroup`, and `ForEachDef` are **rejected** (not silently dropped) + +### Agent Validation + +- `name` must be unique within workflow +- `routes` required for type `agent` (not for `human_gate`) +- All route targets must be valid agent names, group names, `$end`, or `self` +- `when` conditions must be valid Jinja2 expressions +- `human_gate` agents require `options` and `prompt` + +### Parallel Group Validation + +- Must contain at least 2 agents +- All referenced agents must exist +- Route targets must be valid + +### For-Each Validation + +- `source` must be dotted path with at least 3 parts (e.g., `agent.output.field`) +- `as` must be a valid Python identifier, not a reserved name +- `max_concurrent` must be 1-100 +- Nested for-each groups are not allowed + +### Routing Validation + +- At least one route must be reachable (not all conditional) +- Circular routes are allowed but require `max_iterations` +- All agents must be reachable from `entry_point` +- All paths must eventually reach `$end` + +## Error Messages + +| Error | Cause | Solution | +|-------|-------|----------| +| `Missing entry_point` | No `entry_point` in workflow | Add `entry_point: agent_name` | +| `Unknown agent: X` | Route targets non-existent agent/group | Check names match | +| `Unreachable agent: X` | Agent not reachable from entry | Add route to agent or remove | +| `No terminal route` | No path reaches `$end` | Add `$end` route | +| `Invalid condition` | Malformed `when` clause | Check Jinja2 syntax | +| `Parallel needs 2+ agents` | Parallel group has < 2 agents | Add more agents | +| `Invalid source format` | For-each source path invalid | Use `agent.output.field` format | +| `Reserved loop variable` | `as` uses reserved name | Choose different variable name | diff --git a/src/conductor/expert/loader.py b/src/conductor/expert/loader.py new file mode 100644 index 0000000..38cd132 --- /dev/null +++ b/src/conductor/expert/loader.py @@ -0,0 +1,69 @@ +"""Load and cache the bundled Conductor knowledge base. + +The knowledge base consists of reference docs originally authored for the +Conductor Claude-Code plugin and bundled as package data under +``conductor/expert/knowledge/``. Content is loaded once per process via +:func:`functools.lru_cache` and wrapped in ```` tags +for clean separation from workspace instructions. + +Phase 1 bundles three documents: + +* ``yaml-schema.md`` — complete YAML field reference +* ``authoring.md`` — authoring patterns and best practices +* ``execution.md`` — CLI commands, debugging, checkpoint/resume +""" + +from __future__ import annotations + +import functools +import logging +from importlib import resources + +logger = logging.getLogger(__name__) + +# Documents to include, in presentation order. +_KNOWLEDGE_DOCS = [ + "yaml-schema.md", + "authoring.md", + "execution.md", +] + +_HEADER = ( + "The following is the Conductor knowledge base — comprehensive reference " + "documentation for Conductor's YAML workflow schema, execution model, " + "authoring patterns, and CLI commands. Use this knowledge when evaluating, " + "improving, debugging, or generating Conductor workflows." +) + + +@functools.lru_cache(maxsize=1) +def load_expert_knowledge() -> str: + """Load the bundled Conductor knowledge base and return it as a tagged string. + + The result is wrapped in ```` tags and cached for + the lifetime of the process (subsequent calls return the same string + with zero I/O). + + Returns: + A string containing all knowledge documents separated by ``---`` + dividers and wrapped in XML-style tags. + """ + knowledge_pkg = resources.files("conductor.expert") / "knowledge" + sections: list[str] = [] + + for name in _KNOWLEDGE_DOCS: + resource = knowledge_pkg / name + text = resource.read_text(encoding="utf-8").strip() + if text: + sections.append(f"# Knowledge: {name}\n\n{text}") + + combined = "\n\n---\n\n".join(sections) + + total_size_kb = len(combined.encode("utf-8")) / 1024 + logger.info( + "Loaded Conductor Expert knowledge base (%.1fKB from %d documents)", + total_size_kb, + len(sections), + ) + + return f"\n{_HEADER}\n\n{combined}\n\n\n" diff --git a/tests/test_expert/test_conductor_expert.py b/tests/test_expert/test_conductor_expert.py new file mode 100644 index 0000000..5c5170d --- /dev/null +++ b/tests/test_expert/test_conductor_expert.py @@ -0,0 +1,302 @@ +"""Tests for the Conductor Expert knowledge base feature. + +Covers: +- Knowledge loader: loading, caching, wrapper tags +- Schema: field acceptance and type-based validation +- Executor integration: expert knowledge injection via prompt prefix +""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from conductor.config.schema import ( + AgentDef, + GateOption, + RuntimeConfig, +) +from conductor.executor.agent import AgentExecutor +from conductor.expert.loader import load_expert_knowledge +from conductor.providers.copilot import CopilotProvider + +# --------------------------------------------------------------------------- +# Knowledge loader tests +# --------------------------------------------------------------------------- + + +class TestLoadExpertKnowledge: + """Tests for the Conductor Expert knowledge loader.""" + + def setup_method(self) -> None: + """Clear the lru_cache before each test.""" + load_expert_knowledge.cache_clear() + + def test_loads_successfully(self) -> None: + """Knowledge base loads without error.""" + result = load_expert_knowledge() + assert isinstance(result, str) + assert len(result) > 0 + + def test_wrapped_in_conductor_knowledge_tags(self) -> None: + """Result is wrapped in tags.""" + result = load_expert_knowledge() + assert result.startswith("\n") + assert "" in result + + def test_contains_all_three_documents(self) -> None: + """Result includes content from yaml-schema, authoring, and execution docs.""" + result = load_expert_knowledge() + assert "# Knowledge: yaml-schema.md" in result + assert "# Knowledge: authoring.md" in result + assert "# Knowledge: execution.md" in result + + def test_contains_header_text(self) -> None: + """Result includes the descriptive header.""" + result = load_expert_knowledge() + assert "Conductor knowledge base" in result + assert "YAML workflow schema" in result + + def test_documents_separated_by_dividers(self) -> None: + """Documents are separated by --- dividers.""" + result = load_expert_knowledge() + assert "\n\n---\n\n" in result + + def test_caching_returns_same_object(self) -> None: + """Subsequent calls return the cached object (same id).""" + first = load_expert_knowledge() + second = load_expert_knowledge() + assert first is second + + def test_substantial_content_size(self) -> None: + """Knowledge base contains substantial content (>50KB from three docs).""" + result = load_expert_knowledge() + size_kb = len(result.encode("utf-8")) / 1024 + assert size_kb > 50, f"Expected >50KB, got {size_kb:.1f}KB" + + +# --------------------------------------------------------------------------- +# Schema tests — AgentDef.conductor_expert +# --------------------------------------------------------------------------- + + +class TestAgentDefConductorExpert: + """Tests for the conductor_expert field on AgentDef.""" + + def test_defaults_to_none(self) -> None: + """conductor_expert defaults to None (inherit from workflow).""" + agent = AgentDef(name="a", model="gpt-4", prompt="Hello") + assert agent.conductor_expert is None + + def test_explicit_true(self) -> None: + """conductor_expert can be set to True.""" + agent = AgentDef(name="a", model="gpt-4", prompt="Hello", conductor_expert=True) + assert agent.conductor_expert is True + + def test_explicit_false(self) -> None: + """conductor_expert can be set to False (explicit opt-out).""" + agent = AgentDef(name="a", model="gpt-4", prompt="Hello", conductor_expert=False) + assert agent.conductor_expert is False + + def test_forbidden_on_script_agent(self) -> None: + """script agents cannot have conductor_expert.""" + with pytest.raises(ValidationError, match="script agents cannot have 'conductor_expert'"): + AgentDef( + name="s", + type="script", + command="echo hi", + conductor_expert=True, + ) + + def test_forbidden_on_workflow_agent(self) -> None: + """workflow agents cannot have conductor_expert.""" + with pytest.raises(ValidationError, match="workflow agents cannot have 'conductor_expert'"): + AgentDef( + name="w", + type="workflow", + workflow="sub.yaml", + conductor_expert=True, + ) + + def test_forbidden_on_human_gate(self) -> None: + """human_gate agents cannot have conductor_expert.""" + with pytest.raises( + ValidationError, match="human_gate agents cannot have 'conductor_expert'" + ): + AgentDef( + name="g", + type="human_gate", + prompt="Choose:", + options=[GateOption(label="Yes", value="y", route="next")], + conductor_expert=True, + ) + + def test_allowed_on_regular_agent(self) -> None: + """Regular (provider-backed) agents accept conductor_expert.""" + agent = AgentDef( + name="reviewer", + type="agent", + model="gpt-4", + prompt="Review this workflow", + conductor_expert=True, + ) + assert agent.conductor_expert is True + + def test_allowed_on_default_type_agent(self) -> None: + """Agents with no explicit type accept conductor_expert.""" + agent = AgentDef( + name="reviewer", + model="gpt-4", + prompt="Review this workflow", + conductor_expert=True, + ) + assert agent.conductor_expert is True + assert agent.type is None + + +# --------------------------------------------------------------------------- +# Schema tests — RuntimeConfig.conductor_expert +# --------------------------------------------------------------------------- + + +class TestRuntimeConfigConductorExpert: + """Tests for the conductor_expert field on RuntimeConfig.""" + + def test_defaults_to_false(self) -> None: + """conductor_expert defaults to False.""" + config = RuntimeConfig() + assert config.conductor_expert is False + + def test_can_be_enabled(self) -> None: + """conductor_expert can be set to True.""" + config = RuntimeConfig(conductor_expert=True) + assert config.conductor_expert is True + + +# --------------------------------------------------------------------------- +# Executor integration tests +# --------------------------------------------------------------------------- + + +class TestExecutorConductorExpert: + """Tests for Conductor Expert injection in AgentExecutor.""" + + def setup_method(self) -> None: + """Clear the knowledge cache before each test.""" + load_expert_knowledge.cache_clear() + + def test_not_injected_by_default(self) -> None: + """Expert knowledge is NOT injected when neither flag is set.""" + provider = CopilotProvider() + executor = AgentExecutor(provider) + + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world") + context: dict = {} + rendered = executor.render_prompt(agent, context) + + assert "" not in rendered + assert "Hello world" in rendered + + def test_injected_when_agent_flag_true(self) -> None: + """Expert knowledge IS injected when agent.conductor_expert=True.""" + provider = CopilotProvider() + executor = AgentExecutor(provider) + + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", conductor_expert=True) + context: dict = {} + rendered = executor.render_prompt(agent, context) + + assert "" in rendered + assert "" in rendered + assert "Hello world" in rendered + + def test_injected_when_workflow_default_true(self) -> None: + """Expert knowledge IS injected via workflow-level default.""" + provider = CopilotProvider() + executor = AgentExecutor(provider, conductor_expert_default=True) + + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world") + context: dict = {} + rendered = executor.render_prompt(agent, context) + + assert "" in rendered + assert "Hello world" in rendered + + def test_agent_false_overrides_workflow_true(self) -> None: + """Agent conductor_expert=False overrides workflow default=True.""" + provider = CopilotProvider() + executor = AgentExecutor(provider, conductor_expert_default=True) + + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", conductor_expert=False) + context: dict = {} + rendered = executor.render_prompt(agent, context) + + assert "" not in rendered + assert "Hello world" in rendered + + def test_agent_true_overrides_workflow_false(self) -> None: + """Agent conductor_expert=True works even when workflow default=False.""" + provider = CopilotProvider() + executor = AgentExecutor(provider, conductor_expert_default=False) + + agent = AgentDef(name="a", model="gpt-4", prompt="Hello world", conductor_expert=True) + context: dict = {} + rendered = executor.render_prompt(agent, context) + + assert "" in rendered + assert "Hello world" in rendered + + def test_expert_appears_before_prompt(self) -> None: + """Expert knowledge appears before the agent's rendered prompt.""" + provider = CopilotProvider() + executor = AgentExecutor(provider, conductor_expert_default=True) + + agent = AgentDef(name="a", model="gpt-4", prompt="MY_PROMPT_HERE") + context: dict = {} + rendered = executor.render_prompt(agent, context) + + expert_pos = rendered.index("") + prompt_pos = rendered.index("MY_PROMPT_HERE") + assert expert_pos < prompt_pos + + def test_expert_appears_after_instructions_preamble(self) -> None: + """Expert knowledge comes after workspace instructions but before prompt.""" + provider = CopilotProvider() + preamble = "\nFollow conventions.\n\n\n" + executor = AgentExecutor( + provider, + instructions_preamble=preamble, + conductor_expert_default=True, + ) + + agent = AgentDef(name="a", model="gpt-4", prompt="MY_PROMPT_HERE") + context: dict = {} + rendered = executor.render_prompt(agent, context) + + instructions_pos = rendered.index("") + expert_pos = rendered.index("") + prompt_pos = rendered.index("MY_PROMPT_HERE") + assert instructions_pos < expert_pos < prompt_pos + + def test_should_inject_expert_tristate_logic(self) -> None: + """Test the _should_inject_expert tri-state resolution.""" + provider = CopilotProvider() + + # Workflow default False, agent None → False + executor = AgentExecutor(provider, conductor_expert_default=False) + agent_none = AgentDef(name="a", model="gpt-4", prompt="p") + assert executor._should_inject_expert(agent_none) is False + + # Workflow default True, agent None → True + executor = AgentExecutor(provider, conductor_expert_default=True) + assert executor._should_inject_expert(agent_none) is True + + # Agent explicitly True overrides any default + agent_true = AgentDef(name="a", model="gpt-4", prompt="p", conductor_expert=True) + executor = AgentExecutor(provider, conductor_expert_default=False) + assert executor._should_inject_expert(agent_true) is True + + # Agent explicitly False overrides any default + agent_false = AgentDef(name="a", model="gpt-4", prompt="p", conductor_expert=False) + executor = AgentExecutor(provider, conductor_expert_default=True) + assert executor._should_inject_expert(agent_false) is False diff --git a/uv.lock b/uv.lock index b7b4d9b..0808209 100644 --- a/uv.lock +++ b/uv.lock @@ -150,7 +150,7 @@ wheels = [ [[package]] name = "conductor-cli" -version = "0.1.15" +version = "0.1.16" source = { editable = "." } dependencies = [ { name = "anthropic" }, From 732a55ab2fe021bd82f661e83c48512f4c096ad6 Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Tue, 19 May 2026 18:39:26 -0700 Subject: [PATCH 2/5] fix: address PR review findings and fix CI test failures R2-001 (warning): Document conductor_expert in bundled knowledge docs - yaml-schema.md: add conductor_expert to runtime and agent field refs - authoring.md: add conductor_expert section with examples and tri-state semantics, update quick-reference YAML snippets R1-002 (suggestion): Add error handling to knowledge loader - Wrap resource.read_text() in try/except for FileNotFoundError, OSError, UnicodeDecodeError; raise RuntimeError with reinstall guidance R1-001 (suggestion): Declare explicit package data artifacts - Add artifacts rule for expert/knowledge/*.md in pyproject.toml Fix CI: Update integration tests that assert exact RuntimeConfig dumps - conductor_expert: False now appears in model_dump(exclude_none=True) - Update assertions in test_existing_workflows_integration.py and test_mixed_providers.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../skills/conductor/references/authoring.md | 35 +++++++++++++++++++ .../conductor/references/yaml-schema.md | 5 +++ pyproject.toml | 1 + src/conductor/expert/knowledge/authoring.md | 35 +++++++++++++++++++ src/conductor/expert/knowledge/yaml-schema.md | 5 +++ src/conductor/expert/loader.py | 8 ++++- .../test_existing_workflows_integration.py | 2 +- .../test_integration/test_mixed_providers.py | 2 +- 8 files changed, 90 insertions(+), 3 deletions(-) diff --git a/plugins/conductor/skills/conductor/references/authoring.md b/plugins/conductor/skills/conductor/references/authoring.md index 7ab76aa..a253228 100644 --- a/plugins/conductor/skills/conductor/references/authoring.md +++ b/plugins/conductor/skills/conductor/references/authoring.md @@ -20,6 +20,7 @@ workflow: max_agent_iterations: 50 # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: 120 # Wall-clock timeout per agent session (optional) default_reasoning_effort: medium # Workflow-wide reasoning effort: low, medium, high, xhigh (optional) + conductor_expert: true # Inject Conductor knowledge into all agents (default: false, optional) input: # Define workflow inputs param_name: @@ -116,6 +117,10 @@ agents: reasoning: # Override runtime.default_reasoning_effort (optional) effort: high # low, medium, high, or xhigh + conductor_expert: true # Inject Conductor knowledge into this agent (optional, tri-state) + # null = inherit runtime.conductor_expert, true = force enable, + # false = force disable. Not allowed on script/human_gate/workflow. + routes: # Where to go next - to: next_agent ``` @@ -148,6 +153,36 @@ agents: See `examples/reasoning-effort.yaml` for a complete example. +### Conductor Expert Knowledge Base + +`conductor_expert` enables opt-in injection of Conductor's bundled knowledge base (~70KB) into agent prompts. This gives agents deep understanding of the YAML schema, execution model, authoring patterns, and CLI commands — enabling them to evaluate, improve, debug, or generate Conductor workflows. + +**Tri-state per-agent field:** +- `null` (default) — inherit from `runtime.conductor_expert` +- `true` — force enable, regardless of workflow default +- `false` — force disable, regardless of workflow default + +**Workflow-wide default:** `runtime.conductor_expert: true` enables it for all provider-backed agents. Individual agents can override with `conductor_expert: false`. + +Not allowed on `script`, `human_gate`, or `workflow` agent types. The knowledge is injected between workspace instructions and the agent prompt, wrapped in `` tags. + +```yaml +workflow: + runtime: + conductor_expert: true # all agents get knowledge + +agents: + - name: workflow_reviewer + conductor_expert: true # per-agent opt-in + prompt: "Review this workflow for correctness..." + + - name: simple_agent + conductor_expert: false # opt out even when runtime default is true + prompt: "Do something simple." +``` + +See `examples/conductor-expert.yaml` for a complete example. + ## Routing Patterns ### Linear diff --git a/plugins/conductor/skills/conductor/references/yaml-schema.md b/plugins/conductor/skills/conductor/references/yaml-schema.md index a2be3bf..15ba1a9 100644 --- a/plugins/conductor/skills/conductor/references/yaml-schema.md +++ b/plugins/conductor/skills/conductor/references/yaml-schema.md @@ -35,6 +35,7 @@ workflow: max_agent_iterations: integer # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: float # Wall-clock timeout per agent session in seconds (optional) default_reasoning_effort: string # Workflow-wide reasoning/thinking effort: low, medium, high, xhigh (optional) + conductor_expert: boolean # Inject bundled Conductor knowledge into provider-backed agents (default: false) mcp_servers: # MCP server configurations : type: string # "stdio" (default), "http", or "sse" @@ -152,6 +153,10 @@ agents: reasoning: effort: string # low, medium, high, or xhigh + # Conductor Expert knowledge base (optional, only on provider-backed agents) + # Tri-state: null = inherit runtime.conductor_expert, true = enable, false = disable + conductor_expert: boolean # Inject Conductor knowledge into this agent's prompt (optional) + # Per-agent retry policy (optional, not allowed for script, human_gate, or workflow agents) retry: max_attempts: integer # Max attempts including first (1-10, default: 1 = no retry) diff --git a/pyproject.toml b/pyproject.toml index ddd1f1f..5ec4d3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/conductor"] +artifacts = ["src/conductor/expert/knowledge/*.md"] exclude = [ "src/conductor/web/frontend", ] diff --git a/src/conductor/expert/knowledge/authoring.md b/src/conductor/expert/knowledge/authoring.md index 7ab76aa..a253228 100644 --- a/src/conductor/expert/knowledge/authoring.md +++ b/src/conductor/expert/knowledge/authoring.md @@ -20,6 +20,7 @@ workflow: max_agent_iterations: 50 # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: 120 # Wall-clock timeout per agent session (optional) default_reasoning_effort: medium # Workflow-wide reasoning effort: low, medium, high, xhigh (optional) + conductor_expert: true # Inject Conductor knowledge into all agents (default: false, optional) input: # Define workflow inputs param_name: @@ -116,6 +117,10 @@ agents: reasoning: # Override runtime.default_reasoning_effort (optional) effort: high # low, medium, high, or xhigh + conductor_expert: true # Inject Conductor knowledge into this agent (optional, tri-state) + # null = inherit runtime.conductor_expert, true = force enable, + # false = force disable. Not allowed on script/human_gate/workflow. + routes: # Where to go next - to: next_agent ``` @@ -148,6 +153,36 @@ agents: See `examples/reasoning-effort.yaml` for a complete example. +### Conductor Expert Knowledge Base + +`conductor_expert` enables opt-in injection of Conductor's bundled knowledge base (~70KB) into agent prompts. This gives agents deep understanding of the YAML schema, execution model, authoring patterns, and CLI commands — enabling them to evaluate, improve, debug, or generate Conductor workflows. + +**Tri-state per-agent field:** +- `null` (default) — inherit from `runtime.conductor_expert` +- `true` — force enable, regardless of workflow default +- `false` — force disable, regardless of workflow default + +**Workflow-wide default:** `runtime.conductor_expert: true` enables it for all provider-backed agents. Individual agents can override with `conductor_expert: false`. + +Not allowed on `script`, `human_gate`, or `workflow` agent types. The knowledge is injected between workspace instructions and the agent prompt, wrapped in `` tags. + +```yaml +workflow: + runtime: + conductor_expert: true # all agents get knowledge + +agents: + - name: workflow_reviewer + conductor_expert: true # per-agent opt-in + prompt: "Review this workflow for correctness..." + + - name: simple_agent + conductor_expert: false # opt out even when runtime default is true + prompt: "Do something simple." +``` + +See `examples/conductor-expert.yaml` for a complete example. + ## Routing Patterns ### Linear diff --git a/src/conductor/expert/knowledge/yaml-schema.md b/src/conductor/expert/knowledge/yaml-schema.md index a2be3bf..15ba1a9 100644 --- a/src/conductor/expert/knowledge/yaml-schema.md +++ b/src/conductor/expert/knowledge/yaml-schema.md @@ -35,6 +35,7 @@ workflow: max_agent_iterations: integer # Max tool-use roundtrips per agent (1-500, optional) max_session_seconds: float # Wall-clock timeout per agent session in seconds (optional) default_reasoning_effort: string # Workflow-wide reasoning/thinking effort: low, medium, high, xhigh (optional) + conductor_expert: boolean # Inject bundled Conductor knowledge into provider-backed agents (default: false) mcp_servers: # MCP server configurations : type: string # "stdio" (default), "http", or "sse" @@ -152,6 +153,10 @@ agents: reasoning: effort: string # low, medium, high, or xhigh + # Conductor Expert knowledge base (optional, only on provider-backed agents) + # Tri-state: null = inherit runtime.conductor_expert, true = enable, false = disable + conductor_expert: boolean # Inject Conductor knowledge into this agent's prompt (optional) + # Per-agent retry policy (optional, not allowed for script, human_gate, or workflow agents) retry: max_attempts: integer # Max attempts including first (1-10, default: 1 = no retry) diff --git a/src/conductor/expert/loader.py b/src/conductor/expert/loader.py index 38cd132..d166723 100644 --- a/src/conductor/expert/loader.py +++ b/src/conductor/expert/loader.py @@ -53,7 +53,13 @@ def load_expert_knowledge() -> str: for name in _KNOWLEDGE_DOCS: resource = knowledge_pkg / name - text = resource.read_text(encoding="utf-8").strip() + try: + text = resource.read_text(encoding="utf-8").strip() + except (FileNotFoundError, OSError, UnicodeDecodeError) as e: + raise RuntimeError( + f"Conductor Expert knowledge file '{name}' is missing or unreadable. " + "This usually indicates a broken install; try reinstalling conductor." + ) from e if text: sections.append(f"# Knowledge: {name}\n\n{text}") diff --git a/tests/test_integration/test_existing_workflows_integration.py b/tests/test_integration/test_existing_workflows_integration.py index 261e206..794374c 100644 --- a/tests/test_integration/test_existing_workflows_integration.py +++ b/tests/test_integration/test_existing_workflows_integration.py @@ -181,7 +181,7 @@ async def test_schema_changes_dont_affect_copilot_provider(): # Serialization excludes None values dumped = runtime.model_dump(exclude_none=True) - assert dumped == {"provider": "copilot", "mcp_servers": {}} + assert dumped == {"provider": "copilot", "mcp_servers": {}, "conductor_expert": False} # Verify provider can be instantiated provider = CopilotProvider() diff --git a/tests/test_integration/test_mixed_providers.py b/tests/test_integration/test_mixed_providers.py index 81f22de..0a811b3 100644 --- a/tests/test_integration/test_mixed_providers.py +++ b/tests/test_integration/test_mixed_providers.py @@ -89,7 +89,7 @@ def test_claude_fields_ignored_by_copilot_provider(self, tmp_path): # Serialization excludes None values dumped = runtime.model_dump(exclude_none=True) - assert dumped == {"provider": "copilot", "mcp_servers": {}} + assert dumped == {"provider": "copilot", "mcp_servers": {}, "conductor_expert": False} def test_provider_parameter_isolation(self, tmp_path): """Test that provider-specific parameters don't interfere. From f304a122c925d8e9ac89aa47b499a79f4674d0ce Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Tue, 19 May 2026 19:13:29 -0700 Subject: [PATCH 3/5] fix(example): drop hardcoded model from conductor-expert example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the provider's default model apply — the conductor_expert flag is model-agnostic. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- examples/conductor-expert.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/conductor-expert.yaml b/examples/conductor-expert.yaml index e1b7bcc..b9c4227 100644 --- a/examples/conductor-expert.yaml +++ b/examples/conductor-expert.yaml @@ -39,7 +39,6 @@ workflow: agents: - name: reviewer description: Reviews a Conductor workflow for correctness and best practices - model: gpt-5.2 # This agent gets the full Conductor knowledge base injected conductor_expert: true prompt: | From cdab5696c60db5f47b4c394e2ffdcaf1ccaa3685 Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Tue, 19 May 2026 21:35:46 -0700 Subject: [PATCH 4/5] test: cover error-handling branch in expert loader (100% patch coverage) Add test for RuntimeError when a knowledge doc is missing/unreadable, covering the except block at loader.py:58-59 flagged by Codecov. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_expert/test_conductor_expert.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_expert/test_conductor_expert.py b/tests/test_expert/test_conductor_expert.py index 5c5170d..65772a9 100644 --- a/tests/test_expert/test_conductor_expert.py +++ b/tests/test_expert/test_conductor_expert.py @@ -74,6 +74,20 @@ def test_substantial_content_size(self) -> None: size_kb = len(result.encode("utf-8")) / 1024 assert size_kb > 50, f"Expected >50KB, got {size_kb:.1f}KB" + def test_raises_on_missing_knowledge_file(self) -> None: + """Raises RuntimeError with reinstall guidance when a doc is missing.""" + from unittest.mock import patch + + load_expert_knowledge.cache_clear() + + original_fn = load_expert_knowledge.__wrapped__ + + with patch("conductor.expert.loader._KNOWLEDGE_DOCS", ["nonexistent-doc.md"]): + with pytest.raises(RuntimeError, match="missing or unreadable"): + original_fn() + + load_expert_knowledge.cache_clear() + # --------------------------------------------------------------------------- # Schema tests — AgentDef.conductor_expert From bb8a6707e2da82924a7a2e0452712d956ee14d7f Mon Sep 17 00:00:00 2001 From: Brent Rusinow Date: Tue, 19 May 2026 21:51:24 -0700 Subject: [PATCH 5/5] style: combine nested with statements (SIM117) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_expert/test_conductor_expert.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_expert/test_conductor_expert.py b/tests/test_expert/test_conductor_expert.py index 65772a9..2f8c155 100644 --- a/tests/test_expert/test_conductor_expert.py +++ b/tests/test_expert/test_conductor_expert.py @@ -82,9 +82,11 @@ def test_raises_on_missing_knowledge_file(self) -> None: original_fn = load_expert_knowledge.__wrapped__ - with patch("conductor.expert.loader._KNOWLEDGE_DOCS", ["nonexistent-doc.md"]): - with pytest.raises(RuntimeError, match="missing or unreadable"): - original_fn() + with ( + patch("conductor.expert.loader._KNOWLEDGE_DOCS", ["nonexistent-doc.md"]), + pytest.raises(RuntimeError, match="missing or unreadable"), + ): + original_fn() load_expert_knowledge.cache_clear()