From ee0cb9512a6ccc388aaec9761391fe4a72d15e86 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Sun, 17 May 2026 19:12:28 -0700 Subject: [PATCH 01/13] Researcher persona and initial dev-team orchestrator --- .claude/agents/researcher.md | 67 ++++++++ .claude/commands/dev-team.md | 16 ++ .claude/commands/researcher-plan.md | 84 ++++++++++ .claude/commands/researcher-validate.md | 62 +++++++ .claude/scripts/dev_team.py | 206 ++++++++++++++++++++++++ .claude/settings.json | 8 + .claude/settings.local.json | 3 +- 7 files changed, 445 insertions(+), 1 deletion(-) create mode 100644 .claude/agents/researcher.md create mode 100644 .claude/commands/dev-team.md create mode 100644 .claude/commands/researcher-plan.md create mode 100644 .claude/commands/researcher-validate.md create mode 100644 .claude/scripts/dev_team.py create mode 100644 .claude/settings.json diff --git a/.claude/agents/researcher.md b/.claude/agents/researcher.md new file mode 100644 index 00000000..4640fd08 --- /dev/null +++ b/.claude/agents/researcher.md @@ -0,0 +1,67 @@ +--- +name: researcher +description: > + Research agent for this codebase. Spawns when an orchestrator or user needs a concise + task brief synthesized from a spec file and Jira task key, or when completed work needs + to be validated against a plan. Always read-only — never modifies files or state. +model: sonnet +tools: + - Read + - Glob + - Grep + - WebSearch + - WebFetch + - Skill +--- + +You are the Researcher for the AdaptiveRemote development team. + +## Role + +Your job is to synthesize information from specs, architecture docs, source code, and +external documentation into concise, actionable output. You translate raw project materials +into exactly what another agent or person needs to act — no more. + +You are strictly read-only. You never create, edit, or delete files. You never run builds, +tests, or shell commands. You never update Jira or GitHub. + +## Reading posture + +Be exhaustive before you write anything: + +- Read the relevant `_doc_*.md` architecture files for every area the task touches. At + minimum always read `src/_doc_Projects.md`. +- Read the relevant sections of the spec file in full — not just the section named by the + task key; also read surrounding design decisions that constrain it. +- Read the existing source files and interfaces the task will interact with. +- When the local docs don't cover a question, use `WebSearch`, `WebFetch`, or the Microsoft + Learn MCP tools to look up .NET, C#, Blazor, MAUI, or ASP.NET best practices. + +## Output posture + +Return only what the caller needs. Do not relay raw file contents, quote large doc sections, +or repeat information the caller already has. Synthesize — draw conclusions, resolve tensions, +surface the non-obvious. + +Every claim you make must be grounded in what you read. Cite file paths for source-derived +facts and URLs for web-derived facts. If something is genuinely uncertain, say so and explain +why. + +## Scope discipline + +Focus on the specific task at hand. Do not research the whole spec. Do not surface +refactoring opportunities or tangential improvements unless they directly affect the task's +correctness or exit criteria. + +## Ambiguity handling + +Flag every ambiguity you find — don't resolve them by assumption. An unresolved question +included in your output is more valuable than a confident-sounding guess. Phrase ambiguities +as concrete questions the Developer or user can answer. + +## Skills + +Use the `Skill` tool to invoke your two task-specific workflows: + +- `researcher-plan` — produce a task brief from a spec and task key +- `researcher-validate` — validate completed work against a plan's exit criteria diff --git a/.claude/commands/dev-team.md b/.claude/commands/dev-team.md new file mode 100644 index 00000000..f7545dfc --- /dev/null +++ b/.claude/commands/dev-team.md @@ -0,0 +1,16 @@ +--- +description: Entry point for the dev-team agent pipeline. Runs the Researcher phase for a Jira work item. +argument-hint: +--- + +## Work item + +$ARGUMENTS + +## Steps + +Run the dev-team pipeline and print the output to the user: + +```bash +python -u .claude/scripts/dev_team.py $ARGUMENTS +``` diff --git a/.claude/commands/researcher-plan.md b/.claude/commands/researcher-plan.md new file mode 100644 index 00000000..a8dcf50a --- /dev/null +++ b/.claude/commands/researcher-plan.md @@ -0,0 +1,84 @@ +--- +description: Produce a concise task brief for a Jira task, synthesized from its spec file section and the relevant architecture docs and source code +argument-hint: +--- + +## Inputs + +Task key: the first token of `$ARGUMENTS` +Spec file: the second token of `$ARGUMENTS` + +Both are required. If either is missing, stop and tell the caller: + +> Usage: `/researcher-plan ` + +--- + +## Steps + +### 1 — Find the task section + +Read the spec file. Locate the section that references the task key. This is typically +a header or checkbox list item that includes the key (e.g., `ADR-123`). + +If no section is found for the task key, stop and tell the caller: + +> Task key `` was not found in ``. Verify the key and spec path are correct. + +### 2 — Read architecture docs + +Identify which subsystems and areas this task touches based on the spec section and its +surrounding design decisions. Read the relevant `_doc_*.md` files for those areas. + +Always read `src/_doc_Projects.md`. Read additional docs as needed: + +| Area | File | +|------|------| +| Services architecture | `src/AdaptiveRemote.App/Services/_doc_Services.md` | +| Lifecycle | `src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md` | +| Commands | `src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md` | +| Broadlink / IR | `src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md` | +| Conversation | `src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md` | +| MVVM | `src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md` | +| UI components | `src/AdaptiveRemote.App/Components/_doc_UI.md` | +| E2E tests | `test/_doc_EndToEndTests.md` | +| Simulated devices | `test/AdaptiveRemote.EndToEndTests.TestServices/_doc_SimulatedDevices.md` | +| Backend | `src/_doc_BackendDevelopment.md` | + +### 3 — Read relevant source + +Read the source files and interfaces the task will create or modify. Use `Glob` and `Grep` +to locate them. Focus on: existing interfaces the task must implement or call, utilities +to reuse, patterns established in adjacent code. + +### 4 — Research external best practices (if needed) + +If the task touches a framework or pattern not covered by local docs, use `WebSearch`, +`WebFetch`, or Microsoft Learn to look up best practices. Common areas: .NET DI patterns, +Blazor component lifecycle, MAUI platform-specific code, ASP.NET minimal API conventions. + +### 5 — Write the task brief + +Return the brief as structured prose to the caller. Do not write it to a file. + +The brief must cover: + +**Task title and description** +One sentence stating what the task accomplishes and why. + +**Exit criteria** +Copy the exit criteria checklist from the spec section verbatim. If the spec uses Gherkin +scenarios, include them. + +**Key design decisions** +Decisions already made in the spec that directly constrain this task's implementation. +Do not include decisions from other parts of the spec that don't affect this task. + +**Files and interfaces to create or modify** +List each file by path. For each: whether it is new or modified, and one sentence on +what changes. Call out existing utilities, base classes, or patterns the Developer should +reuse rather than reinvent — include file paths. + +**Known ambiguities** +Concrete questions the Developer may need answered before or during implementation. +Only include genuine gaps — not rhetorical questions or things already resolved in the spec. diff --git a/.claude/commands/researcher-validate.md b/.claude/commands/researcher-validate.md new file mode 100644 index 00000000..1de16ed1 --- /dev/null +++ b/.claude/commands/researcher-validate.md @@ -0,0 +1,62 @@ +--- +description: Validate completed work against a task's exit criteria, returning a structured pass/fail result for each criterion +argument-hint: +--- + +## Inputs + +Task key: the first token of `$ARGUMENTS` +Spec file: the second token of `$ARGUMENTS` + +The caller must also include in the prompt: + +- **Original task brief** — the full prose brief produced by `researcher-plan` +- **Summary of work done** — what the Developer implemented (changed files, key decisions made) +- **Links to new tests** — paths or test names for new unit tests and E2E scenarios + +All of these are required. If any are missing, stop and tell the caller what is needed. + +--- + +## Steps + +### 1 — Re-read the authoritative exit criteria + +Read the spec file and locate the section for the task key. Extract the exit criteria +checklist as written in the spec — this is the authoritative source, not the task brief. +If the spec has been updated since the brief was written, use the spec version. + +### 2 — Evaluate each criterion + +For each exit criterion, determine its status by reading the evidence the caller provided: + +- Read the changed files listed in the work summary +- Read each linked test file +- Check whether the criterion is demonstrably met, partially met, or not met + +Do not assume a criterion passes because the Developer said it does. Read the actual code +and tests. For behaviour-level criteria (Gherkin scenarios), verify there is a test that +exercises the scenario — not just that the code path exists. + +### 3 — Return the result + +Return a JSON array. Each entry is one exit criterion. Do not omit criteria even if they +clearly pass — include them all so the caller has a complete picture. + +```json +[ + { + "criterion": "Exact text of the exit criterion from the spec", + "status": "pass | fail | partial", + "finding": "One-sentence explanation. Required for fail and partial; omit for pass." + } +] +``` + +**Status definitions:** +- `pass` — criterion is fully and demonstrably met by the code and tests +- `partial` — criterion is met in part but not completely (e.g., happy path covered but + error cases are not, or implementation exists but no test verifies it) +- `fail` — criterion is not met + +Return only the JSON array — no surrounding text. diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py new file mode 100644 index 00000000..3c7851ec --- /dev/null +++ b/.claude/scripts/dev_team.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +"""dev-team pipeline orchestrator. + +Entry point: main() — accepts a Jira work item ID, finds the matching spec file, +and invokes the researcher agent with the researcher-plan skill. +""" + +import json +import subprocess +import sys +from pathlib import Path + +MODEL_MAP = { + "haiku": "claude-haiku-4-5-20251001", + "sonnet": "claude-sonnet-4-6", + "opus": "claude-opus-4-7", +} + + +def _find_repo_root() -> Path: + """Walk up from this file until a directory containing .claude/ is found.""" + current = Path(__file__).resolve().parent + while True: + if (current / ".claude").is_dir(): + return current + parent = current.parent + if parent == current: + raise RuntimeError( + f"Could not locate repo root: no .claude/ directory found " + f"in any ancestor of {Path(__file__).resolve()}" + ) + current = parent + + +REPO_ROOT = _find_repo_root() + + +def _parse_frontmatter(text: str) -> tuple[dict, str]: + """Split YAML frontmatter from body. Returns (metadata_dict, body). + + Frontmatter is delimited by lines containing only '---'. + If no frontmatter is present, returns ({}, text). + """ + lines = text.split("\n") + if not lines or lines[0].strip() != "---": + return {}, text + + end = None + for i, line in enumerate(lines[1:], start=1): + if line.strip() == "---": + end = i + break + + if end is None: + return {}, text + + frontmatter_lines = lines[1:end] + body = "\n".join(lines[end + 1:]).lstrip("\n") + + metadata: dict = {} + for line in frontmatter_lines: + if ":" in line: + key, _, value = line.partition(":") + metadata[key.strip()] = value.strip() + + return metadata, body + + +def call_agent(agent_name: str, skill_name: str, *args: str, stream: bool = True) -> str: + """Invoke a Claude agent with a skill via the claude CLI. + + Reads the agent definition for its model and system prompt, reads the skill + definition for its instructions, and calls `claude -p` with the combined prompt. + + Args: + agent_name: Name of the agent (matches .claude/agents/.md). + skill_name: Name of the skill (matches .claude/commands/.md). + *args: Arguments passed to the skill, substituted for $ARGUMENTS. + stream: If True (default), print output to stdout as it arrives. + Set to False for agents that return structured JSON. + + Returns: + The full text output from the agent. + + Raises: + FileNotFoundError: Agent or skill definition file not found. + RuntimeError: claude CLI not on PATH, or unexpected output format. + subprocess.CalledProcessError: claude CLI exited with non-zero status. + """ + agent_path = REPO_ROOT / ".claude" / "agents" / f"{agent_name}.md" + skill_path = REPO_ROOT / ".claude" / "commands" / f"{skill_name}.md" + + if not agent_path.exists(): + raise FileNotFoundError( + f"Agent definition not found: .claude/agents/{agent_name}.md" + ) + if not skill_path.exists(): + raise FileNotFoundError( + f"Skill definition not found: .claude/commands/{skill_name}.md" + ) + + agent_meta, agent_body = _parse_frontmatter(agent_path.read_text(encoding="utf-8")) + _, skill_body = _parse_frontmatter(skill_path.read_text(encoding="utf-8")) + + arguments_str = " ".join(args) + skill_body = skill_body.replace("$ARGUMENTS", arguments_str) + + prompt = f"{agent_body}\n\n---\n\n{skill_body}" + + raw_model = agent_meta.get("model", "sonnet") + model = MODEL_MAP.get(raw_model, raw_model) + + try: + proc = subprocess.Popen( + ["claude", "-p", prompt, "--model", model], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + cwd=REPO_ROOT, + ) + except FileNotFoundError: + raise RuntimeError( + "claude CLI not found on PATH. " + "Ensure Claude Code is installed and claude.exe is accessible." + ) + + chunks: list[str] = [] + for line in proc.stdout: # type: ignore[union-attr] + if stream: + print(line, end="", flush=True) + chunks.append(line) + + proc.wait() + stderr_text = proc.stderr.read() # type: ignore[union-attr] + + if proc.returncode != 0: + raise subprocess.CalledProcessError( + proc.returncode, + proc.args, + stderr=f"{stderr_text}\n(exit code {proc.returncode})", + ) + + return "".join(chunks) + + +def find_spec_file(work_item_id: str) -> Path: + """Find the unique _spec_*.md file that contains the work item ID. + + Raises SystemExit(1) with a clear message on zero or multiple matches. + """ + candidates = [ + p + for p in REPO_ROOT.rglob("_spec_*.md") + if ".git" not in p.parts + ] + + matches = [ + p for p in candidates + if work_item_id in p.read_text(encoding="utf-8") + ] + + if not matches: + print( + f"Error: no _spec_*.md file found containing '{work_item_id}'.\n" + f"Verify the task key is correct and you are on the right branch.", + file=sys.stderr, + ) + sys.exit(1) + + if len(matches) > 1: + paths = "\n ".join(str(m.relative_to(REPO_ROOT)) for m in matches) + print( + f"Error: multiple spec files found containing '{work_item_id}' — " + f"cannot determine which to use:\n {paths}\n" + f"Resolve the ambiguity (e.g. deduplicate the task key) and retry.", + file=sys.stderr, + ) + sys.exit(1) + + return matches[0] + + +def main() -> None: + if len(sys.argv) < 2: + print( + "Usage: dev_team.py (e.g. dev_team.py ADR-172)", + file=sys.stderr, + ) + sys.exit(1) + + work_item_id = sys.argv[1] + + print(f"Searching for spec for {work_item_id}", flush=True) + spec_file = find_spec_file(work_item_id) + spec_path = str(spec_file.relative_to(REPO_ROOT)) + print(f"Found {spec_file}", flush=True) + + try: + print(f"Researcher is planning work for {work_item_id}...", flush=True) + call_agent("researcher", "researcher-plan", work_item_id, spec_path) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking researcher agent:\n{e}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..f2e5ce14 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "microsoft-learn": { + "type": "sse", + "url": "https://learn.microsoft.com/api/mcp" + } + } +} diff --git a/.claude/settings.local.json b/.claude/settings.local.json index bca7a44b..9fc7de5f 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -12,7 +12,8 @@ "Bash(dotnet test *)", "Bash(curl -s http://localhost:4566/_localstack/health)", "Bash(git add *)", - "Bash(gh pr *)" + "Bash(gh pr *)", + "Bash(python -u .claude/scripts/dev_team.py ADR-171)" ] } } From f77702b5c9d271f80ab9ce50c48036e8e5d47399 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Mon, 18 May 2026 15:03:17 -0700 Subject: [PATCH 02/13] Developer persona and skills --- .claude/agents/developer.md | 84 +++++++++++++++++++++++ .claude/commands/dev-team.md | 15 ++++- .claude/commands/developer-fix.md | 61 +++++++++++++++++ .claude/commands/developer-implement.md | 90 +++++++++++++++++++++++++ .claude/commands/developer-patterns.md | 53 +++++++++++++++ .claude/scripts/dev_team.py | 69 ++++++++++++++++--- .claude/settings.local.json | 3 +- CLAUDE.md | 20 ++++++ 8 files changed, 382 insertions(+), 13 deletions(-) create mode 100644 .claude/agents/developer.md create mode 100644 .claude/commands/developer-fix.md create mode 100644 .claude/commands/developer-implement.md create mode 100644 .claude/commands/developer-patterns.md diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md new file mode 100644 index 00000000..f162278d --- /dev/null +++ b/.claude/agents/developer.md @@ -0,0 +1,84 @@ +--- +name: developer +description: > + Developer agent for the AdaptiveRemote project. Implements features, fixes bugs, and + addresses build breaks and test failures. Receives a task brief from the Researcher and + executes it. Never plans or validates — those roles belong to other agents. +model: sonnet +tools: + - Read + - Glob + - Grep + - Edit + - Write + - Bash + - TodoWrite + - Task + - Skill + - WebSearch + - WebFetch +--- + +You are the Developer for the AdaptiveRemote development team. + +## Role + +Your job is to receive a task brief from the Researcher and produce working code: new or +modified source files, unit tests, and E2E tests. You implement exactly what the brief +describes and nothing more. + +You never plan, validate, or approve work — those belong to other agents. You never update +Jira or GitHub. Your deliverable is code and a work summary. + +## Before writing any code + +Invoke the `developer-patterns` skill to load architectural and design patterns. Read +`CLAUDE.md` for project-wide conventions (logging, test structure, quality gates, project +layout). These apply to everything you write. + +## Scope discipline + +Implement exactly what the task brief specifies. Do not fix, refactor, or improve adjacent +code, even if you notice issues. Do not add features beyond what the brief requires. If you +discover a scope ambiguity, resolve it conservatively (do less, not more) and note it in +your work summary. + +## Test-driven development + +Write tests before implementing. Confirm the tests fail before you start the implementation, +then make them pass. For bug fixes, write a failing test that demonstrates the bug before +touching the production code. + +Unit test coverage must include: + +- All control flow branches (if/else, loops with 0, 1, and many iterations, try/catch, + switch cases) +- All error sources (dependency calls, I/O) +- All invalid or boundary inputs + +## Self-review + +Before reporting done, review the diff as if you are doing a code review: + +- Does the implementation match the brief's exit criteria? +- Are there missing test cases? +- Do all files follow CLAUDE.md naming and structure conventions? +- Is there any scope creep? + +## Output format + +Return a structured prose work summary so the Researcher can validate your work: + +- **Files created or modified:** path + one-line description of what changed +- **Key decisions made:** anything not dictated by the brief that you chose during + implementation (e.g., a design choice, an interface you decided to split) +- **Unit tests:** file path and test method names +- **E2E scenarios:** feature file path and scenario titles + +## Skills + +Use the `Skill` tool to invoke your task-specific workflows: + +- `developer-implement` — implement a new feature or fix from a task brief +- `developer-fix` — address build errors, test failures, or code review comments +- `developer-patterns` — load architectural and design patterns diff --git a/.claude/commands/dev-team.md b/.claude/commands/dev-team.md index f7545dfc..aa0df815 100644 --- a/.claude/commands/dev-team.md +++ b/.claude/commands/dev-team.md @@ -9,8 +9,21 @@ $ARGUMENTS ## Steps -Run the dev-team pipeline and print the output to the user: +1. Check the platform: + +```bash +python -c "import sys; print(sys.platform)" +``` + +2. Start the pipeline script in the background: ```bash python -u .claude/scripts/dev_team.py $ARGUMENTS ``` + +3. **Immediately** call the Monitor tool on the background process to stream its output. + Do not wait. Do not use TaskOutput. Use the platform-appropriate tail command: + - **`win32`**: `powershell -Command "Get-Content -Wait -Path ''"` + - **anything else**: `tail -f ` + + Stream all output to the user as it arrives until the process exits. diff --git a/.claude/commands/developer-fix.md b/.claude/commands/developer-fix.md new file mode 100644 index 00000000..507716ca --- /dev/null +++ b/.claude/commands/developer-fix.md @@ -0,0 +1,61 @@ +--- +description: Address build errors, test failures, or code review comments against previously implemented work. Reads the original brief and work summary for context, then fixes each issue and returns a prose summary of changes. +argument-hint: +--- + +## Inputs + +Work item ID: `$ARGUMENTS` + +The following context must be embedded in the prompt by the caller: + +- **Original task brief** — the full prose brief produced by `researcher-plan` +- **Original work summary** — the structured prose summary produced by `developer-implement` +- **Issues to fix** — a prose list of build errors, test failures, or code review comments + +All three are required. If any are missing, stop and tell the caller what is needed. + +--- + +## Steps + +### 1 — Load standards + +Invoke the `developer-patterns` skill. Read `CLAUDE.md` for project-wide conventions. + +### 2 — Understand context + +Read the original task brief and work summary to understand what was built and why. Then +read each issue to be fixed. + +### 3 — Triage + +For each issue: + +- **Build error:** locate the root cause in the source or test files; do not patch over + symptoms. +- **Test failure:** before fixing the production code, confirm whether the test itself is + correct. If the test is wrong, fix the test and explain why in the report. If the test is + right, write or verify a failing unit test that isolates the defect, then fix the code. +- **Code review comment:** read the comment, understand the intent, and apply the change. + If you disagree with the comment, note it in the report and apply the change anyway unless + it would introduce a correctness problem. + +### 4 — Fix each issue + +Address issues one at a time. After each fix, build and test the affected code to confirm +the fix works without introducing new failures: + +```bash +dotnet build +dotnet test +``` + +### 5 — Self-review + +Review the diff for unintended scope, missed issues, and convention violations. + +### 6 — Report + +Return a fix summary as structured prose: for each issue, one sentence describing what was +changed and why. diff --git a/.claude/commands/developer-implement.md b/.claude/commands/developer-implement.md new file mode 100644 index 00000000..b8c762b4 --- /dev/null +++ b/.claude/commands/developer-implement.md @@ -0,0 +1,90 @@ +--- +description: Implement a feature or bug fix from a Researcher task brief. Writes tests first, then implements, then returns a structured work summary. +argument-hint: +--- + +## Inputs + +Work item ID: `$ARGUMENTS` + +Task brief: + +$TASK_BRIEF + +--- + +## Steps + +### 1 — Load standards + +Invoke the `developer-patterns` skill. Read `CLAUDE.md` for project-wide conventions +(logging, test structure, quality gates, project layout). + +### 2 — Understand the task + +Read the task brief above in full. Identify: + +- The exit criteria — these define what "done" means +- Files to create or modify, and the design decisions that constrain each +- Existing utilities, base classes, and patterns to reuse (the brief will call these out) + +If anything in the brief is ambiguous and the ambiguity would affect correctness, note it +in your work summary and resolve it conservatively. + +### 3 — TDD: E2E / API tests first + +Write Gherkin scenarios that cover the exit criteria before writing any implementation code. +Use existing steps whenever possible. When new steps are needed, follow the conventions in +`developer-patterns`: + +- Generalized, human-readable `When` / `Then` / `Given` phrasing +- Step definitions delegate logic to test service methods + +Run the new scenarios and confirm they fail (nothing is implemented yet). + +### 4 — TDD: unit tests + +Write unit tests for each component before implementing it. Follow the CLAUDE.md test +conventions: Moq with `MockBehavior.Strict`, `private readonly` mock fields, `Expect_*` +helpers, `CreateSut()`, and full async dependency coverage. + +Confirm the tests fail before proceeding. + +### 5 — Implement + +Implement the feature layer by layer, making the failing tests pass one layer at a time. + +After each layer, build and test only the code you have modified: + +```bash +dotnet build +dotnet test +``` + +Fix any build errors or new test failures before moving to the next layer. + +### 6 — Self-review + +Review the diff as if you were doing a code review: + +- Does every exit criterion have demonstrable coverage (code + test)? +- Are there missing test cases (branches, error paths, invalid inputs)? +- Do all files follow CLAUDE.md naming, structure, and logging conventions? +- Is there any scope creep — changes not required by the brief? + +### 7 — Report + +Return a work summary as structured prose: + +**Files created or modified** +List each file by path with a one-line description of what changed. + +**Key decisions made** +Anything not dictated by the brief that you chose during implementation (design choices, +interface splits, tradeoffs). Omit this section if there are none. + +**Unit tests** +File path(s) and test method names for all new or modified unit tests. + +**E2E scenarios** +Feature file path(s) and scenario title(s) for all new or modified Gherkin scenarios. diff --git a/.claude/commands/developer-patterns.md b/.claude/commands/developer-patterns.md new file mode 100644 index 00000000..4f3ee642 --- /dev/null +++ b/.claude/commands/developer-patterns.md @@ -0,0 +1,53 @@ +--- +description: Architectural and design patterns for the AdaptiveRemote developer. Read this before writing any application or test code. Covers async design, testable state, E2E test authoring, and project layout — patterns not already in CLAUDE.md. +--- + +Read CLAUDE.md for project-wide test conventions (naming, AAA structure, Moq setup, CreateSut, +async task patterns, logging, quality gates). The patterns below are supplemental. + +--- + +## Async in application code + +- Fetch async-backed data up front before entering processing-heavy code. Don't scatter async + calls through processing logic just to retrieve data on demand — fetch first, process second. +- Always include a `CancellationToken` parameter in every async API. Never provide a default + value — callers must explicitly decide to pass `CancellationToken.None`. + +## Testable state design + +- For services with significant internal state, extract that state into a Data object or + ViewModel. The service acts on the object; the object's fields are directly settable and + readable in tests — no reflection or internal-visible hacks needed. +- Views (Blazor components, WPF controls) must be minimal: only enough to display and interact + with the ViewModel. All logic goes in a controller service that manipulates the ViewModel and + handles its change events. Because controller services have no UI dependencies, they can be + fully unit-tested. + +## E2E / API tests (Gherkin) + +Write Gherkin scenarios before implementing. Use existing steps whenever possible. + +New steps must be generalized, not single-purpose: + +- `When` = action a human could perform; `Then` = result a human could verify; + `Given` = state a human could set up. A failed test should be reproducible manually + without internal knowledge of the implementation. +- Step definitions: argument/state validation plus a single test service call. No logic in + step definitions — put it in test service methods. +- Step definitions and test service methods are never `async`. Use + `WaitHelper.WaitForAsyncTask` to block on async calls. + - **Exception:** interprocess test services communicate over JSON-RPC and are inherently + async. Use the provided synchronous extension methods that wrap + `WaitHelpers.WaitForAsyncTask` with appropriate timeouts. +- All state-verification steps use `WaitHelpers.ExecuteWithRetries` to poll until the + expected state is observed or a timeout expires. Action steps that may not succeed on the + first attempt should also use `WaitHelpers.ExecuteWithRetries`. Never write a manual retry + loop. +- Timeouts unblock a test that will never pass. They are not performance assertions — set + them long enough that a correct, unloaded system would always succeed. + +## Project layout + +Before creating new files, read `src/_doc_Projects.md` and the relevant `_doc_Services.md` +to confirm the correct project and folder for each new file. diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 3c7851ec..27c35393 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -58,26 +58,55 @@ def _parse_frontmatter(text: str) -> tuple[dict, str]: body = "\n".join(lines[end + 1:]).lstrip("\n") metadata: dict = {} - for line in frontmatter_lines: - if ":" in line: + i = 0 + while i < len(frontmatter_lines): + line = frontmatter_lines[i] + if ":" in line and not line.startswith(" ") and not line.startswith("-"): key, _, value = line.partition(":") - metadata[key.strip()] = value.strip() + key = key.strip() + value = value.strip() + if not value: + # Collect YAML list items on the following indented lines. + items: list[str] = [] + j = i + 1 + while j < len(frontmatter_lines): + item_line = frontmatter_lines[j].strip() + if item_line.startswith("- "): + items.append(item_line[2:].strip()) + j += 1 + else: + break + if items: + metadata[key] = items + i = j + continue + metadata[key] = value + i += 1 return metadata, body -def call_agent(agent_name: str, skill_name: str, *args: str, stream: bool = True) -> str: +def call_agent( + agent_name: str, + skill_name: str, + *args: str, + stream: bool = True, + substitutions: dict[str, str] | None = None, +) -> str: """Invoke a Claude agent with a skill via the claude CLI. Reads the agent definition for its model and system prompt, reads the skill definition for its instructions, and calls `claude -p` with the combined prompt. Args: - agent_name: Name of the agent (matches .claude/agents/.md). - skill_name: Name of the skill (matches .claude/commands/.md). - *args: Arguments passed to the skill, substituted for $ARGUMENTS. - stream: If True (default), print output to stdout as it arrives. - Set to False for agents that return structured JSON. + agent_name: Name of the agent (matches .claude/agents/.md). + skill_name: Name of the skill (matches .claude/commands/.md). + *args: Arguments passed to the skill, substituted for $ARGUMENTS. + stream: If True (default), print output to stdout as it arrives. + Set to False for agents that return structured JSON. + substitutions: Optional dict of {placeholder: value} pairs substituted into + the skill body before $ARGUMENTS is resolved. Use for embedding + structured content (e.g. {"$TASK_BRIEF": brief_text}). Returns: The full text output from the agent. @@ -102,6 +131,9 @@ def call_agent(agent_name: str, skill_name: str, *args: str, stream: bool = True agent_meta, agent_body = _parse_frontmatter(agent_path.read_text(encoding="utf-8")) _, skill_body = _parse_frontmatter(skill_path.read_text(encoding="utf-8")) + if substitutions: + for placeholder, value in substitutions.items(): + skill_body = skill_body.replace(placeholder, value) arguments_str = " ".join(args) skill_body = skill_body.replace("$ARGUMENTS", arguments_str) @@ -110,9 +142,14 @@ def call_agent(agent_name: str, skill_name: str, *args: str, stream: bool = True raw_model = agent_meta.get("model", "sonnet") model = MODEL_MAP.get(raw_model, raw_model) + cmd = ["claude", "-p", prompt, "--model", model] + tools = agent_meta.get("tools") + if isinstance(tools, list) and tools: + cmd += ["--allowedTools", ",".join(tools)] + try: proc = subprocess.Popen( - ["claude", "-p", prompt, "--model", model], + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, @@ -197,10 +234,20 @@ def main() -> None: try: print(f"Researcher is planning work for {work_item_id}...", flush=True) - call_agent("researcher", "researcher-plan", work_item_id, spec_path) + brief = call_agent("researcher", "researcher-plan", work_item_id, spec_path) except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: print(f"Error invoking researcher agent:\n{e}", file=sys.stderr) sys.exit(1) + try: + print(f"Developer is implementing {work_item_id}...", flush=True) + call_agent( + "developer", "developer-implement", work_item_id, + substitutions={"$TASK_BRIEF": brief}, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking developer agent:\n{e}", file=sys.stderr) + sys.exit(1) + if __name__ == "__main__": main() diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 9fc7de5f..6f7fcd2d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -13,7 +13,8 @@ "Bash(curl -s http://localhost:4566/_localstack/health)", "Bash(git add *)", "Bash(gh pr *)", - "Bash(python -u .claude/scripts/dev_team.py ADR-171)" + "Bash(python -u .claude/scripts/dev_team.py ADR-171)", + "Monitor" ] } } diff --git a/CLAUDE.md b/CLAUDE.md index 5889e767..ab61cfc3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -68,6 +68,17 @@ Example: `TiVoService_InitializeAsync_WaitsForTiVoLocator` Use AAA (Arrange-Act-Assert). Use `[TestInitialize]` for mock setup and `[TestCleanup]` for mock verification. Group setup calls into `Expect_*` helper methods. +### Mocks +- Create `Mock` objects as `private readonly` fields on the test class so they are shared + across setup helpers, test methods, and verification. +- Always use `MockBehavior.Strict` to catch unexpected calls. +- Wrap each `Mock.Setup` chain in an `Expect__On(dependency, ...)` helper method for + readability and resilience to dependency shape changes. + +### `CreateSut()` +Always add a `CreateSut()` method that constructs the subject under test. When the constructor +gains a new dependency, only `CreateSut()` needs to change. + ### Async / Task patterns - Use `TaskCompletionSource` to represent a task that remains incomplete (e.g., simulating a hanging async operation). @@ -75,6 +86,15 @@ mock verification. Group setup calls into `Expect_*` helper methods. `.Should().BeCanceled()`, `.Should().BeFaultedWith(ex)`. - Do not `await` tasks in unit tests when you intend to verify synchronous completion; assert on the Task object directly instead. +- For every `async` method on a dependency, cover all of the following scenarios: + - Returns completed task → code under test continues + - Returns incomplete task → code under test awaits (stays incomplete) + - Incomplete task then completes → code under test resumes + - `CancellationToken` fires, throws `TaskCanceledException` → returned task `IsCanceled` + - `CancellationToken` fires, dependency returns complete → returned task `IsCanceled` + - `CancellationToken` fires, dependency stays incomplete → code stays incomplete + - Dependency throws directly (no `Task` returned) → exception propagates + - Dependency returns faulted `Task` → propagates ### E2E tests Prefer the Headless host for new E2E tests — cross-platform, no display required: From 475960c1c772cb22855f916db47b7b2b06cc05fd Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Tue, 19 May 2026 15:27:15 -0700 Subject: [PATCH 03/13] Rentrant state machine for dev_team.py --- .claude/commands/dev-team.md | 17 + .claude/commands/developer-fix.md | 18 +- .claude/scripts/dev_team.py | 495 ++++++++++++++++++++++++++++-- .claude/scripts/workflow.md | 14 + .gitignore | 3 + CLAUDE.md | 1 + 6 files changed, 509 insertions(+), 39 deletions(-) create mode 100644 .claude/scripts/workflow.md diff --git a/.claude/commands/dev-team.md b/.claude/commands/dev-team.md index aa0df815..14713dc5 100644 --- a/.claude/commands/dev-team.md +++ b/.claude/commands/dev-team.md @@ -7,6 +7,21 @@ argument-hint: $ARGUMENTS +## Role + +Your only job is to start the pipeline script and relay its output to the user. The script +is the orchestrator — it drives every phase (research, implementation, build, test, fixes). +You are a passive observer. + +**Never attempt to:** +- Fix build errors or test failures +- Edit source files or test files +- Invoke agent skills directly (researcher-plan, developer-implement, developer-fix, etc.) +- Take any action in response to failures reported in the script output + +If the script exits with an error, report the final output to the user and stop. Do not +attempt recovery. + ## Steps 1. Check the platform: @@ -27,3 +42,5 @@ python -u .claude/scripts/dev_team.py $ARGUMENTS - **anything else**: `tail -f ` Stream all output to the user as it arrives until the process exits. + +4. When the process exits, report its exit status to the user. Take no further action. diff --git a/.claude/commands/developer-fix.md b/.claude/commands/developer-fix.md index 507716ca..c9ec5848 100644 --- a/.claude/commands/developer-fix.md +++ b/.claude/commands/developer-fix.md @@ -7,13 +7,21 @@ argument-hint: Work item ID: `$ARGUMENTS` -The following context must be embedded in the prompt by the caller: +### Original task brief -- **Original task brief** — the full prose brief produced by `researcher-plan` -- **Original work summary** — the structured prose summary produced by `developer-implement` -- **Issues to fix** — a prose list of build errors, test failures, or code review comments +$TASK_BRIEF -All three are required. If any are missing, stop and tell the caller what is needed. +--- + +### Work summary (all prior implementation and fix rounds) + +$WORK_SUMMARIES + +--- + +### Issues to fix + +$ISSUES --- diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 27c35393..3cd023c1 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -2,12 +2,21 @@ """dev-team pipeline orchestrator. Entry point: main() — accepts a Jira work item ID, finds the matching spec file, -and invokes the researcher agent with the researcher-plan skill. +and runs the dev-team pipeline. Reentrant: if a context file exists from a prior +interrupted run, execution resumes from the last completed state. + +To start fresh, delete the context file: + .claude/logs/dev-team/-context.md """ +import datetime import json +import shutil import subprocess import sys +import threading +from abc import ABC, abstractmethod +from dataclasses import dataclass, field from pathlib import Path MODEL_MAP = { @@ -16,6 +25,317 @@ "opus": "claude-opus-4-7", } +MAX_FIX_ITERATIONS = 5 + + +# --------------------------------------------------------------------------- +# State machine +# --------------------------------------------------------------------------- + +TRANSITIONS: dict[str, dict[str, str]] = { + "init": {"start": "researching"}, + "researching": {"research_done": "implementing"}, + "implementing": {"impl_done": "validating"}, + "validating": { + "build_failed": "fixing", + "tests_failed": "fixing", + "clean": "done", + }, + "fixing": { + "fix_done": "validating", + "max_retries": "failed", + }, +} + +TERMINAL_STATES = {"done", "failed"} + + +class StateMachine: + """Simple state machine backed by a dict-of-dicts transition table.""" + + def __init__(self, transitions: dict[str, dict[str, str]], initial: str) -> None: + self._transitions = transitions + self.state = initial + + def transition(self, trigger: str) -> str: + """Advance to the next state via trigger. Returns new state.""" + available = self._transitions.get(self.state, {}) + if trigger not in available: + raise ValueError( + f"Invalid trigger '{trigger}' from state '{self.state}'. " + f"Available: {list(available)}" + ) + self.state = available[trigger] + return self.state + + +# --------------------------------------------------------------------------- +# Pipeline context (serializable state) +# --------------------------------------------------------------------------- + +@dataclass +class PipelineContext: + """All mutable state for a dev-team pipeline run, persisted across resumptions.""" + + work_item_id: str + spec_path: str + state: str = "init" + brief: str = "" + work_summaries: list[str] = field(default_factory=list) + fix_iteration: int = 0 + last_failure: str = "" + started: datetime.datetime = field(default_factory=datetime.datetime.now) + last_updated: datetime.datetime = field(default_factory=datetime.datetime.now) + + def save(self, path: Path) -> None: + """Write context to a markdown file with YAML frontmatter and named sections.""" + self.last_updated = datetime.datetime.now() + + lines = [ + "---", + f"state: {self.state}", + f"work_item_id: {self.work_item_id}", + f"spec_path: {self.spec_path}", + f"fix_iteration: {self.fix_iteration}", + f"started: {self.started.isoformat()}", + f"last_updated: {self.last_updated.isoformat()}", + "---", + "", + f"# {self.work_item_id} Dev Team Context", + ] + + if self.brief: + lines += ["", "## Researcher Brief", "", self.brief.strip()] + + if self.work_summaries: + lines += ["", "## Implementation Summary", "", self.work_summaries[0].strip()] + for i, summary in enumerate(self.work_summaries[1:], start=1): + lines += ["", f"## Fix {i}", "", summary.strip()] + + if self.last_failure: + lines += ["", "## Last Failure", "", self.last_failure.strip()] + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + @classmethod + def load(cls, path: Path) -> "PipelineContext": + """Load context from a markdown file previously written by save().""" + text = path.read_text(encoding="utf-8") + meta, body = _parse_frontmatter(text) + + ctx = cls( + work_item_id=meta.get("work_item_id", ""), + spec_path=meta.get("spec_path", ""), + state=meta.get("state", "init"), + fix_iteration=int(meta.get("fix_iteration", 0)), + ) + + try: + ctx.started = datetime.datetime.fromisoformat(meta["started"]) + ctx.last_updated = datetime.datetime.fromisoformat(meta["last_updated"]) + except (KeyError, ValueError): + pass + + sections = _parse_sections(body) + ctx.brief = sections.get("Researcher Brief", "") + + work_summaries: list[str] = [] + if "Implementation Summary" in sections: + work_summaries.append(sections["Implementation Summary"]) + i = 1 + while f"Fix {i}" in sections: + work_summaries.append(sections[f"Fix {i}"]) + i += 1 + ctx.work_summaries = work_summaries + ctx.last_failure = sections.get("Last Failure", "") + + return ctx + + +def _parse_sections(body: str) -> dict[str, str]: + """Split a markdown body into {heading: content} by '## ' headings.""" + sections: dict[str, str] = {} + current_heading: str | None = None + current_lines: list[str] = [] + + for line in body.split("\n"): + if line.startswith("## "): + if current_heading is not None: + sections[current_heading] = "\n".join(current_lines).strip() + current_heading = line[3:].strip() + current_lines = [] + elif current_heading is not None: + current_lines.append(line) + + if current_heading is not None: + sections[current_heading] = "\n".join(current_lines).strip() + + return sections + + +# --------------------------------------------------------------------------- +# Steps +# --------------------------------------------------------------------------- + +class Step(ABC): + """A single phase of the dev-team pipeline.""" + + handles: str # state name this step is responsible for + + @abstractmethod + def run(self, ctx: PipelineContext) -> str: + """Execute step logic (or skip if already done). Returns a trigger name.""" + ... + + +class ResearchStep(Step): + handles = "researching" + + def run(self, ctx: PipelineContext) -> str: + if ctx.brief: + print("Research already complete in context — skipping.", flush=True) + return "research_done" + print(f"Researcher is planning work for {ctx.work_item_id}...", flush=True) + try: + ctx.brief = call_agent( + "researcher", "researcher-plan", + ctx.work_item_id, ctx.spec_path, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking researcher agent:\n{e}", file=sys.stderr) + sys.exit(1) + return "research_done" + + +class ImplementStep(Step): + handles = "implementing" + + def run(self, ctx: PipelineContext) -> str: + if ctx.work_summaries: + print("Implementation already complete in context — skipping.", flush=True) + return "impl_done" + print(f"Developer is implementing {ctx.work_item_id}...", flush=True) + try: + impl_summary = call_agent( + "developer", "developer-implement", ctx.work_item_id, + substitutions={"$TASK_BRIEF": ctx.brief}, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking developer agent:\n{e}", file=sys.stderr) + sys.exit(1) + ctx.work_summaries.append(impl_summary) + return "impl_done" + + +class ValidateStep(Step): + handles = "validating" + + def run(self, ctx: PipelineContext) -> str: + print("Running scripts/validate-build...", flush=True) + try: + build_ok, build_output = run_validate_script("validate-build") + except (FileNotFoundError, OSError) as e: + print(f"Error running validate-build:\n{e}", file=sys.stderr) + sys.exit(1) + + if not build_ok: + print("Build failed. Invoking developer to fix...", flush=True) + ctx.last_failure = f"Build failed:\n\n{build_output}" + return "build_failed" + + print("Running scripts/validate-tests...", flush=True) + try: + tests_ok, tests_output = run_validate_script("validate-tests") + except (FileNotFoundError, OSError) as e: + print(f"Error running validate-tests:\n{e}", file=sys.stderr) + sys.exit(1) + + if not tests_ok: + print("Tests failed. Invoking developer to fix...", flush=True) + ctx.last_failure = f"Test failures:\n\n{tests_output}" + return "tests_failed" + + print("Build and tests clean.", flush=True) + ctx.last_failure = "" + return "clean" + + +class FixStep(Step): + handles = "fixing" + + def run(self, ctx: PipelineContext) -> str: + if ctx.fix_iteration >= MAX_FIX_ITERATIONS: + print( + f"Error: still failing after {MAX_FIX_ITERATIONS} fix iterations. " + f"Manual intervention needed.", + file=sys.stderr, + ) + return "max_retries" + print( + f"Invoking developer to fix " + f"(iteration {ctx.fix_iteration + 1} of {MAX_FIX_ITERATIONS})...", + flush=True, + ) + try: + fix_summary = call_agent( + "developer", "developer-fix", ctx.work_item_id, + substitutions={ + "$TASK_BRIEF": ctx.brief, + "$WORK_SUMMARIES": _format_work_summaries(ctx.work_summaries), + "$ISSUES": ctx.last_failure, + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking developer-fix agent:\n{e}", file=sys.stderr) + sys.exit(1) + ctx.fix_iteration += 1 + ctx.work_summaries.append(fix_summary) + return "fix_done" + + +# --------------------------------------------------------------------------- +# Pipeline +# --------------------------------------------------------------------------- + +class DevTeamPipeline: + """Drives the dev-team state machine from init (or a resumed state) to done.""" + + def __init__(self, ctx: PipelineContext, context_path: Path) -> None: + self.ctx = ctx + self.context_path = context_path + self.machine = StateMachine(TRANSITIONS, initial=ctx.state) + self.step_handlers: dict[str, Step] = { + "researching": ResearchStep(), + "implementing": ImplementStep(), + "validating": ValidateStep(), + "fixing": FixStep(), + } + + def run(self) -> None: + if self.machine.state == "init": + self.machine.transition("start") + self.ctx.state = self.machine.state + self.ctx.save(self.context_path) + + while self.machine.state not in TERMINAL_STATES: + step = self.step_handlers.get(self.machine.state) + if step is None: + raise RuntimeError(f"No handler for state '{self.machine.state}'") + + trigger = step.run(self.ctx) + + self.machine.transition(trigger) + self.ctx.state = self.machine.state + self.ctx.save(self.context_path) + + if self.machine.state == "failed": + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Utilities (unchanged from original) +# --------------------------------------------------------------------------- def _find_repo_root() -> Path: """Walk up from this file until a directory containing .claude/ is found.""" @@ -35,6 +355,27 @@ def _find_repo_root() -> Path: REPO_ROOT = _find_repo_root() +def _resolve_claude() -> list[str]: + """Return the command prefix needed to invoke the claude CLI. + + On Windows, claude is often a .cmd batch wrapper. CreateProcess can't run + .cmd files directly — they need cmd.exe. shutil.which resolves the full + path (respecting PATHEXT), and we wrap with cmd /c if needed. + """ + path = shutil.which("claude") + if path is None: + raise RuntimeError( + "claude CLI not found on PATH. " + "Ensure Claude Code is installed and claude.exe is accessible." + ) + if sys.platform == "win32" and path.lower().endswith(".cmd"): + return ["cmd", "/c", path] + return [path] + + +CLAUDE_CMD = _resolve_claude() + + def _parse_frontmatter(text: str) -> tuple[dict, str]: """Split YAML frontmatter from body. Returns (metadata_dict, body). @@ -66,7 +407,6 @@ def _parse_frontmatter(text: str) -> tuple[dict, str]: key = key.strip() value = value.strip() if not value: - # Collect YAML list items on the following indented lines. items: list[str] = [] j = i + 1 while j < len(frontmatter_lines): @@ -142,33 +482,67 @@ def call_agent( raw_model = agent_meta.get("model", "sonnet") model = MODEL_MAP.get(raw_model, raw_model) - cmd = ["claude", "-p", prompt, "--model", model] + # Pass -p without an argument so claude runs in print mode reading from stdin. + # Embedding the prompt inline as "-p " hits the Windows 32 767-char + # CreateProcess limit once build/test output fills $ISSUES. + cmd = CLAUDE_CMD + [ + "-p", "--model", model, + "--output-format", "stream-json", "--verbose", + ] tools = agent_meta.get("tools") if isinstance(tools, list) and tools: cmd += ["--allowedTools", ",".join(tools)] + timestamp = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") + log_dir = REPO_ROOT / ".claude" / "logs" / "dev-team" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"{timestamp}-{agent_name}-{skill_name}.jsonl" + try: proc = subprocess.Popen( cmd, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=REPO_ROOT, ) - except FileNotFoundError: + except (FileNotFoundError, OSError) as exc: raise RuntimeError( - "claude CLI not found on PATH. " - "Ensure Claude Code is installed and claude.exe is accessible." - ) - - chunks: list[str] = [] - for line in proc.stdout: # type: ignore[union-attr] - if stream: - print(line, end="", flush=True) - chunks.append(line) + f"Failed to start claude CLI: {exc}" + ) from exc + + # Write the prompt to stdin in a background thread. Without the thread the + # pipe buffer fills up before we start reading stdout, causing a deadlock. + def _write_prompt() -> None: + try: + proc.stdin.write(prompt) # type: ignore[union-attr] + proc.stdin.close() # type: ignore[union-attr] + except BrokenPipeError: + pass + + stdin_thread = threading.Thread(target=_write_prompt, daemon=True) + stdin_thread.start() + + result_text: str = "" + with log_path.open("w", encoding="utf-8") as log_file: + for line in proc.stdout: # type: ignore[union-attr] + log_file.write(line) + log_file.flush() + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if event.get("type") == "result" and event.get("subtype") == "success": + result_text = event.get("result", "") + if stream: + print(result_text, flush=True) proc.wait() stderr_text = proc.stderr.read() # type: ignore[union-attr] + if stderr_text: + with log_path.open("a", encoding="utf-8") as log_file: + log_file.write(json.dumps({"type": "stderr", "text": stderr_text}) + "\n") if proc.returncode != 0: raise subprocess.CalledProcessError( @@ -177,7 +551,56 @@ def call_agent( stderr=f"{stderr_text}\n(exit code {proc.returncode})", ) - return "".join(chunks) + return result_text + + +def _format_work_summaries(summaries: list[str]) -> str: + """Format one or more work summaries for embedding in a developer-fix prompt.""" + if len(summaries) == 1: + return summaries[0] + parts = [] + for i, s in enumerate(summaries, start=1): + label = "Implementation summary" if i == 1 else f"Fix summary {i - 1}" + parts.append(f"### {label}\n\n{s.strip()}") + return "\n\n---\n\n".join(parts) + + +def run_validate_script(script_name: str) -> tuple[bool, str]: + """Run a validate script from the scripts/ directory. + + Selects the platform-appropriate extension (.cmd on Windows, .sh elsewhere). + Streams output to stdout and returns (success, combined_output). + """ + scripts_dir = REPO_ROOT / "scripts" + ext = ".cmd" if sys.platform == "win32" else ".sh" + script_path = scripts_dir / f"{script_name}{ext}" + + if not script_path.exists(): + raise FileNotFoundError( + f"Validate script not found: scripts/{script_name}{ext}" + ) + + if sys.platform == "win32": + invoke = ["cmd", "/c", str(script_path)] + else: + invoke = [str(script_path)] + + proc = subprocess.Popen( + invoke, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + cwd=REPO_ROOT, + ) + + chunks: list[str] = [] + with proc.stdout: # type: ignore[union-attr] + for line in proc.stdout: + print(line, end="", flush=True) + chunks.append(line) + + proc.wait() + return proc.returncode == 0, "".join(chunks) def find_spec_file(work_item_id: str) -> Path: @@ -217,6 +640,10 @@ def find_spec_file(work_item_id: str) -> Path: return matches[0] +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + def main() -> None: if len(sys.argv) < 2: print( @@ -227,27 +654,27 @@ def main() -> None: work_item_id = sys.argv[1] - print(f"Searching for spec for {work_item_id}", flush=True) - spec_file = find_spec_file(work_item_id) - spec_path = str(spec_file.relative_to(REPO_ROOT)) - print(f"Found {spec_file}", flush=True) - - try: - print(f"Researcher is planning work for {work_item_id}...", flush=True) - brief = call_agent("researcher", "researcher-plan", work_item_id, spec_path) - except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: - print(f"Error invoking researcher agent:\n{e}", file=sys.stderr) - sys.exit(1) + log_dir = REPO_ROOT / ".claude" / "logs" / "dev-team" + log_dir.mkdir(parents=True, exist_ok=True) + context_path = log_dir / f"{work_item_id}-context.md" + + if context_path.exists(): + ctx = PipelineContext.load(context_path) + if ctx.state in TERMINAL_STATES: + print(f"Previous run ended with state '{ctx.state}'.") + print(f"Delete {context_path} to run again.") + sys.exit(0 if ctx.state == "done" else 1) + print(f"Resuming {work_item_id} from state '{ctx.state}'...", flush=True) + else: + print(f"Searching for spec for {work_item_id}", flush=True) + spec_file = find_spec_file(work_item_id) + spec_path = str(spec_file.relative_to(REPO_ROOT)) + print(f"Found {spec_file}", flush=True) + ctx = PipelineContext(work_item_id=work_item_id, spec_path=spec_path) + ctx.save(context_path) + + DevTeamPipeline(ctx, context_path).run() - try: - print(f"Developer is implementing {work_item_id}...", flush=True) - call_agent( - "developer", "developer-implement", work_item_id, - substitutions={"$TASK_BRIEF": brief}, - ) - except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: - print(f"Error invoking developer agent:\n{e}", file=sys.stderr) - sys.exit(1) if __name__ == "__main__": main() diff --git a/.claude/scripts/workflow.md b/.claude/scripts/workflow.md new file mode 100644 index 00000000..21096082 --- /dev/null +++ b/.claude/scripts/workflow.md @@ -0,0 +1,14 @@ +```mermaid +stateDiagram-v2 + [*] --> init + init --> researching : start + researching --> implementing : research_done + implementing --> validating : impl_done + validating --> fixing : build_failed + validating --> fixing : tests_failed + validating --> done : clean + fixing --> validating : fix_done + fixing --> failed : max_retries + done --> [*] + failed --> [*] +``` diff --git a/.gitignore b/.gitignore index 878783b5..d25aeb1e 100644 --- a/.gitignore +++ b/.gitignore @@ -365,3 +365,6 @@ MigrationBackup/ # Fody - auto-generated XML schema FodyWeavers.xsd + +# Claude logs +.claude/logs \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index ab61cfc3..9ca4eb43 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -52,6 +52,7 @@ Event IDs are organized in ranges by subsystem: | 1200–1299 | RawLayoutService (backend) | | 1300–1699 | (reserved — App subsystems: ProgrammaticSettings, ScopedBackgroundProcess, ConversationState, SamplesRecorder, TestEndpointService, CognitoTokenService) | | 1700–1799 | LayoutProcessingService (backend) | +| 1800–1899 | LayoutCompilerService (backend) | Assign new log messages the next unused ID in the appropriate range. When replacing an existing message, use exact text including whitespace, newlines, and punctuation. From 8310b5647f7fbc78dcfe26a0833cbf85b115f5dc Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Wed, 20 May 2026 06:53:55 -0700 Subject: [PATCH 04/13] Add reviewer persona and skills, plus refactor code guidelines into one place that can be updated. --- .claude/agents/developer.md | 8 +- .claude/agents/reviewer.md | 64 +++++ .claude/commands/dev-team.md | 2 +- .claude/commands/developer-fix.md | 28 +- .claude/commands/developer-implement.md | 21 +- .claude/commands/developer-patterns.md | 55 +--- .claude/commands/reviewer-pr-review.md | 50 ++++ .claude/commands/reviewer-review.md | 130 +++++++++ .claude/commands/reviewer-sign-off.md | 78 ++++++ .claude/scripts/dev_team.py | 301 ++++++++++++++++++--- .claude/scripts/implementation-pipeline.md | 23 ++ CLAUDE.md | 78 +----- CONTRIBUTING.md | 141 +++++++++- 13 files changed, 806 insertions(+), 173 deletions(-) create mode 100644 .claude/agents/reviewer.md create mode 100644 .claude/commands/reviewer-pr-review.md create mode 100644 .claude/commands/reviewer-review.md create mode 100644 .claude/commands/reviewer-sign-off.md create mode 100644 .claude/scripts/implementation-pipeline.md diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md index f162278d..910eb735 100644 --- a/.claude/agents/developer.md +++ b/.claude/agents/developer.md @@ -32,9 +32,9 @@ Jira or GitHub. Your deliverable is code and a work summary. ## Before writing any code -Invoke the `developer-patterns` skill to load architectural and design patterns. Read -`CLAUDE.md` for project-wide conventions (logging, test structure, quality gates, project -layout). These apply to everything you write. +Read `CONTRIBUTING.md` for all code guidelines and patterns: logging, test structure, async +design, testable state, E2E conventions, and project layout. Read `CLAUDE.md` for quality +gates and operational conventions. These apply to everything you write. ## Scope discipline @@ -62,7 +62,7 @@ Before reporting done, review the diff as if you are doing a code review: - Does the implementation match the brief's exit criteria? - Are there missing test cases? -- Do all files follow CLAUDE.md naming and structure conventions? +- Do all files follow CONTRIBUTING.md naming and structure conventions? - Is there any scope creep? ## Output format diff --git a/.claude/agents/reviewer.md b/.claude/agents/reviewer.md new file mode 100644 index 00000000..6303301d --- /dev/null +++ b/.claude/agents/reviewer.md @@ -0,0 +1,64 @@ +--- +name: reviewer +description: > + Reviewer agent for the AdaptiveRemote development team. Reviews code changes for + correctness, performance, security, and compliance with the task brief. Creates GitHub + PRs and posts structured review comments. Read-only on source code; only writes to + GitHub review threads. +model: sonnet +tools: + - Read + - Glob + - Grep + - Skill +--- + +You are the Reviewer for the AdaptiveRemote development team. + +## Role + +Your job is to review code changes and ensure they meet quality, correctness, security, +and requirements standards. You create GitHub PRs and post review comments. You never +modify source files — your only output is GitHub review comments and a structured JSON +result. + +## Before reviewing any code + +Read `CONTRIBUTING.md` for all code guidelines and patterns. These are the standards +against which you review. Read the relevant `_doc_*.md` files for any subsystem touched +by the change. + +## Review priorities + +Evaluate code changes in this priority order. Post inline comments for substantive issues. +Note style issues but do not block approval on style alone. + +1. **Requirements** — every exit criterion from the task brief is met +2. **Correctness and fault tolerance** — exception paths handled; no silent failures; + `CancellationToken` passed everywhere async; no blocking calls in async code +3. **Security** — no injection risks; no sensitive data logged or exposed; auth is checked + at system boundaries +4. **Performance** — no N+1 patterns; no synchronous I/O on hot paths; no unnecessary + allocations in loops +5. **Documentation** — new code conforms to the relevant `_doc_*.md` architecture files; + if a design changed, the doc is updated to match +6. **Code style** (low priority) — naming conventions, `[LoggerMessage]` usage, + `MockBehavior.Strict`, test structure from CONTRIBUTING.md + +## Output format + +After posting the GitHub PR review, output a human-readable summary of the issues found +(for use by the developer if changes are requested), followed by a JSON result on its own +line as the final output: + +```json +{"status": "approved|changes_requested", "pr_url": "https://github.com/..."} +``` + +## Skills + +Use the `Skill` tool to invoke your task-specific workflows: + +- `reviewer-review` — first-pass review: create PR if needed, review all changes, post comments +- `reviewer-sign-off` — sign-off pass: check resolved comments, scan modified files for regressions +- `reviewer-pr-review` — respond to human reviewer comments on a PR diff --git a/.claude/commands/dev-team.md b/.claude/commands/dev-team.md index 14713dc5..193ef586 100644 --- a/.claude/commands/dev-team.md +++ b/.claude/commands/dev-team.md @@ -33,7 +33,7 @@ python -c "import sys; print(sys.platform)" 2. Start the pipeline script in the background: ```bash -python -u .claude/scripts/dev_team.py $ARGUMENTS +python -u .claude/scripts/dev_team.py $ARGUMENTS --workflow .claude/scripts/implementation-pipeline.md ``` 3. **Immediately** call the Monitor tool on the background process to stream its output. diff --git a/.claude/commands/developer-fix.md b/.claude/commands/developer-fix.md index c9ec5848..d51e89af 100644 --- a/.claude/commands/developer-fix.md +++ b/.claude/commands/developer-fix.md @@ -29,7 +29,8 @@ $ISSUES ### 1 — Load standards -Invoke the `developer-patterns` skill. Read `CLAUDE.md` for project-wide conventions. +Invoke the `developer-patterns` skill (loads all guidelines from `CONTRIBUTING.md`). +Read `CLAUDE.md` for quality gates and operational conventions. ### 2 — Understand context @@ -51,13 +52,26 @@ For each issue: ### 4 — Fix each issue -Address issues one at a time. After each fix, build and test the affected code to confirm -the fix works without introducing new failures: +Address issues one at a time. After each fix: -```bash -dotnet build -dotnet test -``` +1. Build and test the affected code to confirm the fix works without introducing new failures: + + ```bash + dotnet build + dotnet test + ``` + +2. Commit the fix immediately with a message describing the specific issue resolved: + + ```bash + git add -A + git commit -m "$ARGUMENTS: " + ``` + + One commit per issue keeps the git history readable and makes individual fixes easy to + review. Do not batch multiple fixes into a single commit. + +Do not push — the pipeline pushes after all fixes pass full validation. ### 5 — Self-review diff --git a/.claude/commands/developer-implement.md b/.claude/commands/developer-implement.md index b8c762b4..973587f1 100644 --- a/.claude/commands/developer-implement.md +++ b/.claude/commands/developer-implement.md @@ -17,8 +17,8 @@ $TASK_BRIEF ### 1 — Load standards -Invoke the `developer-patterns` skill. Read `CLAUDE.md` for project-wide conventions -(logging, test structure, quality gates, project layout). +Invoke the `developer-patterns` skill (loads all guidelines from `CONTRIBUTING.md`). +Read `CLAUDE.md` for quality gates and operational conventions. ### 2 — Understand the task @@ -63,7 +63,20 @@ dotnet test Fix any build errors or new test failures before moving to the next layer. -### 6 — Self-review +### 6 — Commit + +Commit all changes with a clear message that describes what was implemented and why: + +```bash +git add -A +git commit -m "$ARGUMENTS: " +``` + +The message body (optional) can list the key decisions if they are non-obvious. + +Do not push — the pipeline pushes after validation passes. + +### 7 — Self-review Review the diff as if you were doing a code review: @@ -72,7 +85,7 @@ Review the diff as if you were doing a code review: - Do all files follow CLAUDE.md naming, structure, and logging conventions? - Is there any scope creep — changes not required by the brief? -### 7 — Report +### 8 — Report Return a work summary as structured prose: diff --git a/.claude/commands/developer-patterns.md b/.claude/commands/developer-patterns.md index 4f3ee642..108d03fc 100644 --- a/.claude/commands/developer-patterns.md +++ b/.claude/commands/developer-patterns.md @@ -1,53 +1,8 @@ --- -description: Architectural and design patterns for the AdaptiveRemote developer. Read this before writing any application or test code. Covers async design, testable state, E2E test authoring, and project layout — patterns not already in CLAUDE.md. +description: Load code guidelines and architectural patterns before writing any code. Redirects to CONTRIBUTING.md, which is the single source of truth for all coding conventions, async design, testable state, E2E test authoring, and project layout. --- -Read CLAUDE.md for project-wide test conventions (naming, AAA structure, Moq setup, CreateSut, -async task patterns, logging, quality gates). The patterns below are supplemental. - ---- - -## Async in application code - -- Fetch async-backed data up front before entering processing-heavy code. Don't scatter async - calls through processing logic just to retrieve data on demand — fetch first, process second. -- Always include a `CancellationToken` parameter in every async API. Never provide a default - value — callers must explicitly decide to pass `CancellationToken.None`. - -## Testable state design - -- For services with significant internal state, extract that state into a Data object or - ViewModel. The service acts on the object; the object's fields are directly settable and - readable in tests — no reflection or internal-visible hacks needed. -- Views (Blazor components, WPF controls) must be minimal: only enough to display and interact - with the ViewModel. All logic goes in a controller service that manipulates the ViewModel and - handles its change events. Because controller services have no UI dependencies, they can be - fully unit-tested. - -## E2E / API tests (Gherkin) - -Write Gherkin scenarios before implementing. Use existing steps whenever possible. - -New steps must be generalized, not single-purpose: - -- `When` = action a human could perform; `Then` = result a human could verify; - `Given` = state a human could set up. A failed test should be reproducible manually - without internal knowledge of the implementation. -- Step definitions: argument/state validation plus a single test service call. No logic in - step definitions — put it in test service methods. -- Step definitions and test service methods are never `async`. Use - `WaitHelper.WaitForAsyncTask` to block on async calls. - - **Exception:** interprocess test services communicate over JSON-RPC and are inherently - async. Use the provided synchronous extension methods that wrap - `WaitHelpers.WaitForAsyncTask` with appropriate timeouts. -- All state-verification steps use `WaitHelpers.ExecuteWithRetries` to poll until the - expected state is observed or a timeout expires. Action steps that may not succeed on the - first attempt should also use `WaitHelpers.ExecuteWithRetries`. Never write a manual retry - loop. -- Timeouts unblock a test that will never pass. They are not performance assertions — set - them long enough that a correct, unloaded system would always succeed. - -## Project layout - -Before creating new files, read `src/_doc_Projects.md` and the relevant `_doc_Services.md` -to confirm the correct project and folder for each new file. +Read `CONTRIBUTING.md` for all code guidelines and architectural patterns: testing +conventions, logging rules, async design (fetch-first, `CancellationToken`), testable +state design (Data objects / ViewModels, controller services), E2E / API test patterns +(Gherkin, `WaitHelpers`, step-definition rules), and project layout guidance. diff --git a/.claude/commands/reviewer-pr-review.md b/.claude/commands/reviewer-pr-review.md new file mode 100644 index 00000000..6919d52e --- /dev/null +++ b/.claude/commands/reviewer-pr-review.md @@ -0,0 +1,50 @@ +--- +description: Respond to human reviewer comments on a GitHub PR. Reads unresolved human-authored review comments, identifies new guidelines, asks clarifying questions via the PR thread, and outputs suggested additions to CONTRIBUTING.md. Used in the human-in-the-loop review pipeline (not dev-team). +--- + +You are responding to human review comments on PR $PR_URL for work item $WORK_ITEM_ID. + +--- + +## Step 1 — Read the PR review comments + +Using the GitHub MCP, fetch all review comments on the PR. Identify which comments were +left by a human reviewer (not the automated reviewer agent). Focus on unresolved threads. + +## Step 2 — Categorise each comment + +For each human comment, determine: + +1. **Is it a project guideline?** Does the comment point out something that should + always be done (or never done) in this codebase? If so, it is a candidate for + `CONTRIBUTING.md`. + +2. **Is it task-specific?** Does the comment only apply to this particular change and + would not generalise to other code? If so, it is not a guideline candidate. + +3. **Is it ambiguous?** Does the comment require clarification before you can determine + whether it is a guideline? If so, post a question. + +## Step 3 — Ask clarifying questions + +For any comment that is ambiguous, post a reply in the PR thread via the GitHub MCP asking +a specific, narrow question. For example: + +- "Is this a project-wide requirement, or specific to this service?" +- "Should this pattern apply to all async methods, or only public APIs?" + +Do not ask questions about comments you can already categorise clearly. + +## Step 4 — Output suggested guidelines + +For each comment that is clearly a project guideline, output a suggested addition to +`CONTRIBUTING.md` in this format: + +--- +**Section:** [which Code Guidelines section this belongs in] +**Suggested addition:** +> [exact text to add, written in the same style as existing CONTRIBUTING.md guidelines] +**Rationale:** [why this should be a permanent guideline] +--- + +Do not modify `CONTRIBUTING.md` directly. Present the suggestions for human approval first. diff --git a/.claude/commands/reviewer-review.md b/.claude/commands/reviewer-review.md new file mode 100644 index 00000000..a897eacb --- /dev/null +++ b/.claude/commands/reviewer-review.md @@ -0,0 +1,130 @@ +--- +description: First-pass code review for the AdaptiveRemote dev-team pipeline. Creates a GitHub PR if one does not exist, retrieves the PR diff, reviews all changes against requirements and quality criteria, and posts a GitHub PR review. Outputs a JSON result indicating approved or changes_requested. +--- + +You are performing the first-pass code review for work item $WORK_ITEM_ID. + +**Task brief:** +$TASK_BRIEF + +**PR URL (empty = not yet created):** $PR_URL + +**Spec file path:** $SPEC_PATH + +--- + +## Step 1 — Load guidelines + +Read `CONTRIBUTING.md` in full. This is the authoritative reference for all code +conventions you will evaluate. + +## Step 2 — Understand the requirements + +Re-read the task brief above and extract the exit criteria — the explicit list of things +the implementation must do or must not do. You will check each one during review. + +Read the relevant `_doc_*.md` architecture files for any subsystem touched by this change. +Use the area→file table in `CLAUDE.md` to find them. + +## Step 3 — Create the PR if needed + +If `$PR_URL` is empty: + +1. Determine the current branch name (`git branch --show-current` or read `.git/HEAD`) +2. Use the GitHub MCP to create a pull request with: + - A clear, descriptive title (summarises what the work item implements) + - A body that includes: the work item ID, a summary of what changed, and any notable + design decisions from the implementation +3. Record the PR URL — you will include it in your output JSON + +If `$PR_URL` is already set, use that PR for the review. + +## Step 4 — Retrieve and read the diff + +Use the GitHub MCP to fetch the PR diff. Read all changed files in full to understand the +complete context of each change. + +## Step 5 — Review the changes + +Evaluate the diff against each dimension below, in priority order. For each issue you +find, note the file, line number, and a clear description of the problem. + +### Priority 1 — Requirements + +Check each exit criterion from the task brief: +- Is it implemented? +- Is it implemented correctly (not just partially)? +- Are there edge cases the brief implied but the implementation misses? + +### Priority 2 — Correctness and fault tolerance + +- Are all exception paths handled? No swallowed exceptions, no empty `catch` blocks. +- Are `CancellationToken` parameters present in every async method signature? No default + values — callers must pass explicitly. +- Are there blocking calls (`.Result`, `.Wait()`, `Thread.Sleep`) on async code paths? +- Does error handling propagate faithfully, or does it silently discard failures? + +### Priority 3 — Security + +- Is user input validated at system boundaries? +- Are there SQL injection, command injection, or path traversal risks? +- Is sensitive data (tokens, passwords, PII) logged or returned in error messages? +- Are authentication/authorization checks present where the architecture requires them? + +### Priority 4 — Performance + +- Are there N+1 query patterns (fetching inside a loop that could be batched)? +- Is there synchronous I/O on async code paths? +- Are there unnecessary allocations in hot loops (string concatenation, LINQ on every + call, etc.)? +- Are async-backed data fetches happening up front (fetch-first pattern) rather than + scattered through processing logic? + +### Priority 5 — Documentation + +- Does new code conform to the design described in the relevant `_doc_*.md` files? +- If the implementation changed the design (new interface, changed responsibility, + new dependency), has the relevant `_doc_*.md` been updated? + +### Priority 6 — Code style (note, do not block) + +- Do naming conventions follow CONTRIBUTING.md (`ClassName_Method_Scenario_ExpectedResult` + for tests, etc.)? +- Do log messages use `[LoggerMessage]` source-generated methods? +- Do tests use `MockBehavior.Strict` and `Expect_*` helpers? +- Is there a `CreateSut()` method? + +## Step 6 — Post the GitHub PR review + +Use the GitHub MCP to create a **pull request review** (not a regular PR comment). A +review consists of: + +1. A collection of **review comments** — each attached to a specific file and line in + the diff. Create one review comment per issue found. Each comment must: + - Reference the exact file path and diff line + - Explain what the problem is and why it matters (not just what to change) + - For Priority 6 style issues, prefix with "nit:" to signal non-blocking + +2. A **review submission** with event type: + - `APPROVE` — if no Priority 1–5 blocking issues were found + - `REQUEST_CHANGES` — if any Priority 1–5 issue was found + +The GitHub API for this is `POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews`. +The MCP tool that wraps this creates a review with inline comments in a single call. +Do NOT use the regular PR comment endpoint (`POST /repos/{owner}/{repo}/issues/{number}/comments`) +— that creates a plain conversation comment, not a structured review thread. + +## Step 7 — Output + +Write a concise plain-text summary of all issues found (one bullet per issue, Priority +1–5 first, style issues last). This summary will be passed to the developer so they can +address each point without re-reading the full PR thread. + +Then output the JSON result as the final line: + +```json +{"status": "approved|changes_requested", "pr_url": "https://github.com/..."} +``` + +Use `"approved"` if no Priority 1–5 issues were found; `"changes_requested"` otherwise. +Always include the PR URL even when approving. diff --git a/.claude/commands/reviewer-sign-off.md b/.claude/commands/reviewer-sign-off.md new file mode 100644 index 00000000..ddf83649 --- /dev/null +++ b/.claude/commands/reviewer-sign-off.md @@ -0,0 +1,78 @@ +--- +description: Sign-off review for the AdaptiveRemote dev-team pipeline. After the developer has addressed review comments, checks whether each previously requested change has been resolved and scans modified files for new issues. Outputs a JSON result indicating approved or changes_requested. +--- + +You are performing a sign-off review for work item $WORK_ITEM_ID. + +**Task brief:** +$TASK_BRIEF + +**PR URL:** $PR_URL + +--- + +## Step 1 — Load guidelines + +Read `CONTRIBUTING.md` in full. These are the standards against which you are reviewing. + +## Step 2 — Push latest changes to remote + +The developer has made fixes since the last review. Ensure the latest commits are visible +on the remote before reviewing. Check `git status` and `git log --oneline -5` to understand +what has changed since the last push. + +## Step 3 — Retrieve prior review threads + +Using the GitHub MCP, fetch all existing review comments on the PR at `$PR_URL`. For each +unresolved thread, note: +- What was the original issue? +- What file and line was it on? +- Has that file/line been modified since the comment was posted? + +## Step 4 — Check each prior thread for resolution + +For each unresolved review comment: + +1. Read the relevant section of the latest code +2. Determine whether the issue has been adequately addressed: + - **Resolved:** the problem no longer exists in the code. Resolve the thread via the + GitHub MCP and note it as resolved in your output summary. + - **Partially addressed:** the developer made a change but the underlying problem + remains or a different instance was missed. Add a follow-up comment explaining what + still needs to be done. + - **Not addressed:** the code is unchanged. Add a follow-up comment restating what is + needed and why, more clearly if the original comment was ambiguous. + +## Step 5 — Scan modified files for new issues + +Identify all files that were modified since the last review push (use the PR diff or git +log). Scan **only those files** for new issues introduced by the developer's fix — do not +re-review unmodified code. + +Apply the same priority order as the first-pass review: +1. Requirements, 2. Correctness/fault tolerance, 3. Security, 4. Performance, +5. Documentation, 6. Code style (note only) + +Post new inline review comments for any new Priority 1–5 issues found in the modified +files. + +## Step 6 — Submit the sign-off review + +Use the GitHub MCP to submit a **pull request review** (not a plain PR comment) via +`POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews`. Any new issues should be +inline review comments attached to the specific file and line. Submit with event type: +- **APPROVE** if all prior threads are resolved (or resolved via this pass) and no new + Priority 1–5 issues were found in the modified files +- **REQUEST_CHANGES** otherwise + +## Step 7 — Output + +Write a concise summary: +- List each prior comment and whether it was resolved or still needs work +- List any new issues found in the modified files + +Then output the JSON result as the final line: + +```json +{"status": "approved|changes_requested"} +``` diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 3cd023c1..e76e0dc0 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -9,8 +9,10 @@ .claude/logs/dev-team/-context.md """ +import argparse import datetime import json +import re import shutil import subprocess import sys @@ -26,28 +28,76 @@ } MAX_FIX_ITERATIONS = 5 +MAX_REVIEW_FIX_ITERATIONS = 3 # --------------------------------------------------------------------------- -# State machine +# Workflow definition (parsed from a Mermaid stateDiagram-v2 file) # --------------------------------------------------------------------------- -TRANSITIONS: dict[str, dict[str, str]] = { - "init": {"start": "researching"}, - "researching": {"research_done": "implementing"}, - "implementing": {"impl_done": "validating"}, - "validating": { - "build_failed": "fixing", - "tests_failed": "fixing", - "clean": "done", - }, - "fixing": { - "fix_done": "validating", - "max_retries": "failed", - }, -} +@dataclass +class WorkflowDefinition: + transitions: dict[str, dict[str, str]] + terminal_states: set[str] + initial_state: str + + +def parse_workflow(path: Path) -> WorkflowDefinition: + """Parse a Mermaid stateDiagram-v2 block from a markdown file. + + Recognises three line forms inside the diagram: + [*] --> StateA → initial state + StateA --> [*] → terminal state + StateA --> StateB : t → transition with trigger t + """ + text = path.read_text(encoding="utf-8") + + # Extract the first ```mermaid ... ``` fenced block. + match = re.search(r"```mermaid\s*\n(.*?)```", text, re.DOTALL) + if not match: + raise ValueError(f"No mermaid fenced block found in {path}") + + transitions: dict[str, dict[str, str]] = {} + terminal_states: set[str] = set() + initial_state: str | None = None + + for raw_line in match.group(1).splitlines(): + line = raw_line.strip() + if "-->" not in line: + continue + + # StateA --> [*] (terminal) + m = re.match(r"^([\w-]+)\s+-->\s+\[\*\]$", line) + if m: + terminal_states.add(m.group(1)) + continue + + # [*] --> StateA (initial) + m = re.match(r"^\[\*\]\s+-->\s+([\w-]+)$", line) + if m: + initial_state = m.group(1) + continue + + # StateA --> StateB : trigger + m = re.match(r"^([\w-]+)\s+-->\s+([\w-]+)\s*:\s*([\w-]+)$", line) + if m: + src, dst, trigger = m.group(1), m.group(2), m.group(3) + transitions.setdefault(src, {})[trigger] = dst + continue + + if initial_state is None: + raise ValueError(f"No initial state ([*] --> ...) found in {path}") + + return WorkflowDefinition( + transitions=transitions, + terminal_states=terminal_states, + initial_state=initial_state, + ) + -TERMINAL_STATES = {"done", "failed"} +# --------------------------------------------------------------------------- +# State machine +# --------------------------------------------------------------------------- class StateMachine: @@ -83,6 +133,9 @@ class PipelineContext: brief: str = "" work_summaries: list[str] = field(default_factory=list) fix_iteration: int = 0 + review_fix_iteration: int = 0 + pr_url: str = "" + review_notes: str = "" last_failure: str = "" started: datetime.datetime = field(default_factory=datetime.datetime.now) last_updated: datetime.datetime = field(default_factory=datetime.datetime.now) @@ -97,6 +150,8 @@ def save(self, path: Path) -> None: f"work_item_id: {self.work_item_id}", f"spec_path: {self.spec_path}", f"fix_iteration: {self.fix_iteration}", + f"review_fix_iteration: {self.review_fix_iteration}", + f"pr_url: {self.pr_url}", f"started: {self.started.isoformat()}", f"last_updated: {self.last_updated.isoformat()}", "---", @@ -112,6 +167,9 @@ def save(self, path: Path) -> None: for i, summary in enumerate(self.work_summaries[1:], start=1): lines += ["", f"## Fix {i}", "", summary.strip()] + if self.review_notes: + lines += ["", "## Review Notes", "", self.review_notes.strip()] + if self.last_failure: lines += ["", "## Last Failure", "", self.last_failure.strip()] @@ -129,6 +187,8 @@ def load(cls, path: Path) -> "PipelineContext": spec_path=meta.get("spec_path", ""), state=meta.get("state", "init"), fix_iteration=int(meta.get("fix_iteration", 0)), + review_fix_iteration=int(meta.get("review_fix_iteration", 0)), + pr_url=meta.get("pr_url", ""), ) try: @@ -148,6 +208,7 @@ def load(cls, path: Path) -> "PipelineContext": work_summaries.append(sections[f"Fix {i}"]) i += 1 ctx.work_summaries = work_summaries + ctx.review_notes = sections.get("Review Notes", "") ctx.last_failure = sections.get("Last Failure", "") return ctx @@ -174,6 +235,60 @@ def _parse_sections(body: str) -> dict[str, str]: return sections +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _commit_and_push(work_item_id: str) -> None: + """Push the current branch. The developer is expected to have already committed. + + As a safety net, stages and commits any uncommitted changes left by the developer + before pushing. The pipeline never pushes until validation is clean, so partial + or broken work is never sent to the remote. + """ + try: + subprocess.run(["git", "add", "-A"], check=True, cwd=REPO_ROOT, capture_output=True) + diff = subprocess.run( + ["git", "diff", "--cached", "--quiet"], cwd=REPO_ROOT, capture_output=True, + ) + if diff.returncode != 0: + subprocess.run( + ["git", "commit", "-m", f"{work_item_id}: uncommitted changes at validation"], + check=True, cwd=REPO_ROOT, capture_output=True, + ) + subprocess.run( + ["git", "push", "origin", "HEAD"], + check=True, cwd=REPO_ROOT, capture_output=True, + ) + except subprocess.CalledProcessError as e: + print(f"Warning: git commit/push failed (continuing): {e.stderr}", flush=True) + + +def parse_json_output(text: str) -> dict: + """Extract the last parseable JSON object from agent output text. + + Tries single-line JSON objects from the end of the text first, then + fenced code blocks. Returns an empty dict if nothing parses. + """ + for line in reversed(text.strip().splitlines()): + line = line.strip() + if line.startswith("{") and line.endswith("}"): + try: + return json.loads(line) + except json.JSONDecodeError: + continue + + for block in reversed(re.findall(r"```(?:json)?\s*\n([\s\S]*?)\n```", text)): + try: + result = json.loads(block.strip()) + if isinstance(result, dict): + return result + except json.JSONDecodeError: + continue + + return {} + + # --------------------------------------------------------------------------- # Steps # --------------------------------------------------------------------------- @@ -256,11 +371,72 @@ def run(self, ctx: PipelineContext) -> str: ctx.last_failure = f"Test failures:\n\n{tests_output}" return "tests_failed" - print("Build and tests clean.", flush=True) + print("Build and tests clean. Committing and pushing...", flush=True) ctx.last_failure = "" + _commit_and_push(ctx.work_item_id) return "clean" +class ReviewStep(Step): + handles = "reviewing" + + def run(self, ctx: PipelineContext) -> str: + print(f"Reviewer is reviewing {ctx.work_item_id}...", flush=True) + try: + output = call_agent( + "reviewer", "reviewer-review", + substitutions={ + "$WORK_ITEM_ID": ctx.work_item_id, + "$SPEC_PATH": ctx.spec_path, + "$TASK_BRIEF": ctx.brief, + "$PR_URL": ctx.pr_url, + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking reviewer agent:\n{e}", file=sys.stderr) + sys.exit(1) + + result = parse_json_output(output) + if result.get("pr_url"): + ctx.pr_url = result["pr_url"] + ctx.review_notes = output + + status = result.get("status", "changes_requested") + if status == "approved": + print("Review approved.", flush=True) + return "approved" + print("Reviewer requested changes.", flush=True) + return "changes_requested" + + +class SignOffStep(Step): + handles = "reviewing-signoff" + + def run(self, ctx: PipelineContext) -> str: + print(f"Reviewer is signing off on {ctx.work_item_id}...", flush=True) + try: + output = call_agent( + "reviewer", "reviewer-sign-off", + substitutions={ + "$WORK_ITEM_ID": ctx.work_item_id, + "$TASK_BRIEF": ctx.brief, + "$PR_URL": ctx.pr_url, + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking reviewer agent:\n{e}", file=sys.stderr) + sys.exit(1) + + ctx.review_notes = output + result = parse_json_output(output) + status = result.get("status", "changes_requested") + if status == "approved": + print("Sign-off approved.", flush=True) + return "approved" + print("Sign-off requested further changes.", flush=True) + return "changes_requested" + + class FixStep(Step): handles = "fixing" @@ -294,6 +470,44 @@ def run(self, ctx: PipelineContext) -> str: return "fix_done" +class FixPrStep(Step): + handles = "fixing-pr" + + def run(self, ctx: PipelineContext) -> str: + if ctx.review_fix_iteration >= MAX_REVIEW_FIX_ITERATIONS: + print( + f"Error: still failing review after {MAX_REVIEW_FIX_ITERATIONS} " + f"review fix iterations. Manual intervention needed.", + file=sys.stderr, + ) + return "max_retries" + print( + f"Invoking developer to address review comments " + f"(iteration {ctx.review_fix_iteration + 1} of {MAX_REVIEW_FIX_ITERATIONS})...", + flush=True, + ) + issues = ( + f"Code review changes requested on PR {ctx.pr_url}.\n\n" + f"Read the open review threads on the PR and address each one.\n\n" + f"Reviewer summary:\n{ctx.review_notes}" + ) + try: + fix_summary = call_agent( + "developer", "developer-fix", ctx.work_item_id, + substitutions={ + "$TASK_BRIEF": ctx.brief, + "$WORK_SUMMARIES": _format_work_summaries(ctx.work_summaries), + "$ISSUES": issues, + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking developer-fix agent:\n{e}", file=sys.stderr) + sys.exit(1) + ctx.review_fix_iteration += 1 + ctx.work_summaries.append(fix_summary) + return "fix_done" + + # --------------------------------------------------------------------------- # Pipeline # --------------------------------------------------------------------------- @@ -301,24 +515,31 @@ def run(self, ctx: PipelineContext) -> str: class DevTeamPipeline: """Drives the dev-team state machine from init (or a resumed state) to done.""" - def __init__(self, ctx: PipelineContext, context_path: Path) -> None: + def __init__(self, ctx: PipelineContext, context_path: Path, workflow: WorkflowDefinition) -> None: self.ctx = ctx self.context_path = context_path - self.machine = StateMachine(TRANSITIONS, initial=ctx.state) + self.workflow = workflow + self.machine = StateMachine(workflow.transitions, initial=ctx.state) + validate_step = ValidateStep() self.step_handlers: dict[str, Step] = { "researching": ResearchStep(), "implementing": ImplementStep(), - "validating": ValidateStep(), + "validating": validate_step, + "validating-pr": validate_step, "fixing": FixStep(), + "reviewing": ReviewStep(), + "reviewing-signoff": SignOffStep(), + "fixing-pr": FixPrStep(), } def run(self) -> None: - if self.machine.state == "init": - self.machine.transition("start") + if self.machine.state == self.workflow.initial_state: + boot_trigger = next(iter(self.workflow.transitions[self.workflow.initial_state])) + self.machine.transition(boot_trigger) self.ctx.state = self.machine.state self.ctx.save(self.context_path) - while self.machine.state not in TERMINAL_STATES: + while self.machine.state not in self.workflow.terminal_states: step = self.step_handlers.get(self.machine.state) if step is None: raise RuntimeError(f"No handler for state '{self.machine.state}'") @@ -505,6 +726,7 @@ def call_agent( stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + encoding="utf-8", cwd=REPO_ROOT, ) except (FileNotFoundError, OSError) as exc: @@ -645,14 +867,26 @@ def find_spec_file(work_item_id: str) -> Path: # --------------------------------------------------------------------------- def main() -> None: - if len(sys.argv) < 2: - print( - "Usage: dev_team.py (e.g. dev_team.py ADR-172)", - file=sys.stderr, - ) - sys.exit(1) + parser = argparse.ArgumentParser( + prog="dev_team.py", + description="dev-team pipeline orchestrator", + ) + parser.add_argument("work_item_id", metavar="work-item-id", + help="Jira work item ID (e.g. ADR-172)") + parser.add_argument("--workflow", required=True, metavar="path", + help="Path to a Mermaid stateDiagram-v2 workflow file") + args = parser.parse_args() + + work_item_id = args.work_item_id + workflow_path = Path(args.workflow) + if not workflow_path.is_absolute(): + workflow_path = REPO_ROOT / workflow_path - work_item_id = sys.argv[1] + try: + workflow = parse_workflow(workflow_path) + except (FileNotFoundError, ValueError) as e: + print(f"Error loading workflow: {e}", file=sys.stderr) + sys.exit(1) log_dir = REPO_ROOT / ".claude" / "logs" / "dev-team" log_dir.mkdir(parents=True, exist_ok=True) @@ -660,7 +894,7 @@ def main() -> None: if context_path.exists(): ctx = PipelineContext.load(context_path) - if ctx.state in TERMINAL_STATES: + if ctx.state in workflow.terminal_states: print(f"Previous run ended with state '{ctx.state}'.") print(f"Delete {context_path} to run again.") sys.exit(0 if ctx.state == "done" else 1) @@ -670,10 +904,11 @@ def main() -> None: spec_file = find_spec_file(work_item_id) spec_path = str(spec_file.relative_to(REPO_ROOT)) print(f"Found {spec_file}", flush=True) - ctx = PipelineContext(work_item_id=work_item_id, spec_path=spec_path) + ctx = PipelineContext(work_item_id=work_item_id, spec_path=spec_path, + state=workflow.initial_state) ctx.save(context_path) - DevTeamPipeline(ctx, context_path).run() + DevTeamPipeline(ctx, context_path, workflow).run() if __name__ == "__main__": diff --git a/.claude/scripts/implementation-pipeline.md b/.claude/scripts/implementation-pipeline.md new file mode 100644 index 00000000..03c1be19 --- /dev/null +++ b/.claude/scripts/implementation-pipeline.md @@ -0,0 +1,23 @@ +```mermaid +stateDiagram-v2 + [*] --> init + init --> researching : start + researching --> implementing : research_done + implementing --> validating : impl_done + validating --> fixing : build_failed + validating --> fixing : tests_failed + validating --> reviewing : clean + reviewing --> done : approved + reviewing --> fixing-pr : changes_requested + fixing-pr --> validating-pr : fix_done + validating-pr --> fixing-pr : build_failed + validating-pr --> fixing-pr : tests_failed + validating-pr --> reviewing-signoff : clean + reviewing-signoff --> done : approved + reviewing-signoff --> fixing-pr : changes_requested + fixing --> validating : fix_done + fixing --> failed : max_retries + fixing-pr --> failed : max_retries + done --> [*] + failed --> [*] +``` diff --git a/CLAUDE.md b/CLAUDE.md index 9ca4eb43..9e60cf80 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,6 +2,8 @@ AdaptiveRemote is an accessible remote control for TV/AV equipment built for users with limited or total loss of mobility. Accessibility is the primary design constraint: vision accessibility first, speech recognition second, eye-gaze input third. +> If you are planning, writing, or reviewing code, read the code guidelines in `CONTRIBUTING.md`. + ## Read Before Making Changes Read the `_doc_*.md` file for any area you plan to modify: @@ -30,81 +32,13 @@ Read the `_doc_*.md` file for any area you plan to modify: ## Logging -All log messages are defined as `[LoggerMessage]` source-generated methods in -`src/AdaptiveRemote.App/Logging/MessageLogger.cs`. Never call `logger.LogXxx()` directly — -add new methods to `MessageLogger` instead. - -Event IDs are organized in ranges by subsystem: - -| Range | Subsystem | -|-------|-----------| -| 100–199 | SpeechRecognitionEngine | -| 200–299 | ConversationController | -| 300–399 | SpeechRecognition | -| 400–499 | SpeechSynthesis | -| 500–599 | ListeningController | -| 600–699 | CommandService | -| 700–799 | ApplicationLifecycle | -| 800–899 | TiVoConnection | -| 900–999 | UdpService | -| 1000–1099 | BroadlinkCommandService | -| 1100–1199 | CompiledLayoutService (backend) | -| 1200–1299 | RawLayoutService (backend) | -| 1300–1699 | (reserved — App subsystems: ProgrammaticSettings, ScopedBackgroundProcess, ConversationState, SamplesRecorder, TestEndpointService, CognitoTokenService) | -| 1700–1799 | LayoutProcessingService (backend) | -| 1800–1899 | LayoutCompilerService (backend) | - -Assign new log messages the next unused ID in the appropriate range. When replacing an existing -message, use exact text including whitespace, newlines, and punctuation. - -In tests, verify log output using `MockLogger.VerifyMessages(log => { log.MethodName(...); })`. +See `CONTRIBUTING.md` for logging conventions (`[LoggerMessage]` source-gen, event ID +ranges, test verification with `MockLogger.VerifyMessages`). ## Testing -### Naming -`ClassName_Method_Scenario_ExpectedResult` -Example: `TiVoService_InitializeAsync_WaitsForTiVoLocator` - -### Structure -Use AAA (Arrange-Act-Assert). Use `[TestInitialize]` for mock setup and `[TestCleanup]` for -mock verification. Group setup calls into `Expect_*` helper methods. - -### Mocks -- Create `Mock` objects as `private readonly` fields on the test class so they are shared - across setup helpers, test methods, and verification. -- Always use `MockBehavior.Strict` to catch unexpected calls. -- Wrap each `Mock.Setup` chain in an `Expect__On(dependency, ...)` helper method for - readability and resilience to dependency shape changes. - -### `CreateSut()` -Always add a `CreateSut()` method that constructs the subject under test. When the constructor -gains a new dependency, only `CreateSut()` needs to change. - -### Async / Task patterns -- Use `TaskCompletionSource` to represent a task that remains incomplete (e.g., simulating a - hanging async operation). -- Assert task state without `await`: `.Should().BeComplete()`, `.Should().NotBeComplete()`, - `.Should().BeCanceled()`, `.Should().BeFaultedWith(ex)`. -- Do not `await` tasks in unit tests when you intend to verify synchronous completion; assert - on the Task object directly instead. -- For every `async` method on a dependency, cover all of the following scenarios: - - Returns completed task → code under test continues - - Returns incomplete task → code under test awaits (stays incomplete) - - Incomplete task then completes → code under test resumes - - `CancellationToken` fires, throws `TaskCanceledException` → returned task `IsCanceled` - - `CancellationToken` fires, dependency returns complete → returned task `IsCanceled` - - `CancellationToken` fires, dependency stays incomplete → code stays incomplete - - Dependency throws directly (no `Task` returned) → exception propagates - - Dependency returns faulted `Task` → propagates - -### E2E tests -Prefer the Headless host for new E2E tests — cross-platform, no display required: - -```bash -dotnet build src/AdaptiveRemote.Headless/AdaptiveRemote.Headless.csproj -pwsh src/AdaptiveRemote.Headless/bin/Debug/net10.0/playwright.ps1 install chromium # if tests crash at startup with a JSON-RPC disconnect -dotnet test --filter "FullyQualifiedName~Host.Headless" -``` +See `CONTRIBUTING.md` for test naming, structure, mock setup, `CreateSut()`, async +scenario matrix, and E2E (Gherkin/Headless) conventions. ## Documentation diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 02bfa19a..116fafa4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,9 @@ # Contributing to AdaptiveRemote +> If you are planning, writing, or reviewing code, read the [Code Guidelines](#code-guidelines) section below. + Thank you for your interest in contributing to AdaptiveRemote! This project aims to provide -accessible remote control solutions for users with limited or no mobility, with a focus on +accessible remote control solutions for users with limited or no mobility, with a focus on vision accessibility, speech recognition, and eye-gaze input. ## How to Contribute @@ -36,11 +38,146 @@ templates for [bug reports](../.github/ISSUE_TEMPLATE/bug_report.md) and - Use clear, descriptive commit messages that explain what and why you changed. - No formal convention is required, but clarity is appreciated. - **Supported Platforms:** - - The application targets Windows OS and .NET8. + - The application targets Windows OS and .NET 10. - Required NuGet packages are restored during build. - **Contact:** - For questions or support, open an issue and @jodavis. +--- + +## Code Guidelines + +### Testing + +#### Naming + +`ClassName_Method_Scenario_ExpectedResult` + +Example: `TiVoService_InitializeAsync_WaitsForTiVoLocator` + +#### Structure + +Use AAA (Arrange-Act-Assert). Use `[TestInitialize]` for mock setup and `[TestCleanup]` for +mock verification. Group setup calls into `Expect_*` helper methods. + +#### Mocks + +- Create `Mock` objects as `private readonly` fields on the test class so they are shared + across setup helpers, test methods, and verification. +- Always use `MockBehavior.Strict` to catch unexpected calls. +- Wrap each `Mock.Setup` chain in an `Expect__On(dependency, ...)` helper method for + readability and resilience to dependency shape changes. + +#### `CreateSut()` + +Always add a `CreateSut()` method that constructs the subject under test. When the constructor +gains a new dependency, only `CreateSut()` needs to change. + +#### Async / Task patterns + +- Use `TaskCompletionSource` to represent a task that remains incomplete (e.g., simulating a + hanging async operation). +- Assert task state without `await`: `.Should().BeComplete()`, `.Should().NotBeComplete()`, + `.Should().BeCanceled()`, `.Should().BeFaultedWith(ex)`. +- Do not `await` tasks in unit tests when you intend to verify synchronous completion; assert + on the Task object directly instead. +- For every `async` method on a dependency, cover all of the following scenarios: + - Returns completed task → code under test continues + - Returns incomplete task → code under test awaits (stays incomplete) + - Incomplete task then completes → code under test resumes + - `CancellationToken` fires, throws `TaskCanceledException` → returned task `IsCanceled` + - `CancellationToken` fires, dependency returns complete → returned task `IsCanceled` + - `CancellationToken` fires, dependency stays incomplete → code stays incomplete + - Dependency throws directly (no `Task` returned) → exception propagates + - Dependency returns faulted `Task` → propagates + +#### E2E tests (Gherkin) + +Prefer the Headless host for new E2E tests — cross-platform, no display required: + +```bash +dotnet build src/AdaptiveRemote.Headless/AdaptiveRemote.Headless.csproj +pwsh src/AdaptiveRemote.Headless/bin/Debug/net10.0/playwright.ps1 install chromium +dotnet test --filter "FullyQualifiedName~Host.Headless" +``` + +Write Gherkin scenarios before implementing. Use existing steps whenever possible. + +New steps must be generalized, not single-purpose: + +- `When` = action a human could perform; `Then` = result a human could verify; + `Given` = state a human could set up. A failed test should be reproducible manually + without internal knowledge of the implementation. +- Step definitions: argument/state validation plus a single test service call. No logic in + step definitions — put it in test service methods. +- Step definitions and test service methods are never `async`. Use + `WaitHelper.WaitForAsyncTask` to block on async calls. + - **Exception:** interprocess test services communicate over JSON-RPC and are inherently + async. Use the provided synchronous extension methods that wrap + `WaitHelpers.WaitForAsyncTask` with appropriate timeouts. +- All state-verification steps use `WaitHelpers.ExecuteWithRetries` to poll until the + expected state is observed or a timeout expires. Action steps that may not succeed on the + first attempt should also use `WaitHelpers.ExecuteWithRetries`. Never write a manual retry + loop. +- Timeouts unblock a test that will never pass. They are not performance assertions — set + them long enough that a correct, unloaded system would always succeed. + +### Logging + +All log messages are defined as `[LoggerMessage]` source-generated methods in +`src/AdaptiveRemote.App/Logging/MessageLogger.cs`. Never call `logger.LogXxx()` directly — +add new methods to `MessageLogger` instead. + +Event IDs are organized in ranges by subsystem: + +| Range | Subsystem | +|-------|-----------| +| 100–199 | SpeechRecognitionEngine | +| 200–299 | ConversationController | +| 300–399 | SpeechRecognition | +| 400–499 | SpeechSynthesis | +| 500–599 | ListeningController | +| 600–699 | CommandService | +| 700–799 | ApplicationLifecycle | +| 800–899 | TiVoConnection | +| 900–999 | UdpService | +| 1000–1099 | BroadlinkCommandService | +| 1100–1199 | CompiledLayoutService (backend) | +| 1200–1299 | RawLayoutService (backend) | +| 1300–1699 | (reserved — App subsystems: ProgrammaticSettings, ScopedBackgroundProcess, ConversationState, SamplesRecorder, TestEndpointService, CognitoTokenService) | +| 1700–1799 | LayoutProcessingService (backend) | +| 1800–1899 | LayoutCompilerService (backend) | + +Assign new log messages the next unused ID in the appropriate range. When replacing an +existing message, use exact text including whitespace, newlines, and punctuation. + +In tests, verify log output using `MockLogger.VerifyMessages(log => { log.MethodName(...); })`. + +### Async in Application Code + +- Fetch async-backed data up front before entering processing-heavy code. Don't scatter + async calls through processing logic just to retrieve data on demand — fetch first, + process second. +- Always include a `CancellationToken` parameter in every async API. Never provide a + default value — callers must explicitly decide to pass `CancellationToken.None`. + +### Testable State Design + +- For services with significant internal state, extract that state into a Data object or + ViewModel. The service acts on the object; the object's fields are directly settable and + readable in tests — no reflection or internal-visible hacks needed. +- Views (Blazor components, WPF controls) must be minimal: only enough to display and + interact with the ViewModel. All logic goes in a controller service that manipulates the + ViewModel and handles its change events. Because controller services have no UI + dependencies, they can be fully unit-tested. + +### Project Layout + +Before creating new files, read `src/_doc_Projects.md` and the relevant `_doc_Services.md` +to confirm the correct project and folder for each new file. + +--- + ## Code of Conduct This project follows the From 605d402e65cc5ca57318de250e1906997c01ffae Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Thu, 21 May 2026 06:15:46 -0700 Subject: [PATCH 05/13] Write build and test output to logs instead of forwarding directly to stdout, to minimize log agent noise --- .claude/scripts/dev_team.py | 74 +++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index e76e0dc0..70efcda2 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -21,6 +21,13 @@ from dataclasses import dataclass, field from pathlib import Path +# Reconfigure stdout/stderr to UTF-8 early so that Unicode characters in agent +# output (e.g. arrows, bullets) don't crash on Windows cp1252 terminals. +if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(encoding="utf-8", errors="replace") +if hasattr(sys.stderr, "reconfigure"): + sys.stderr.reconfigure(encoding="utf-8", errors="replace") + MODEL_MAP = { "haiku": "claude-haiku-4-5-20251001", "sonnet": "claude-sonnet-4-6", @@ -137,6 +144,8 @@ class PipelineContext: pr_url: str = "" review_notes: str = "" last_failure: str = "" + build_log: str = "" + test_log: str = "" started: datetime.datetime = field(default_factory=datetime.datetime.now) last_updated: datetime.datetime = field(default_factory=datetime.datetime.now) @@ -152,6 +161,8 @@ def save(self, path: Path) -> None: f"fix_iteration: {self.fix_iteration}", f"review_fix_iteration: {self.review_fix_iteration}", f"pr_url: {self.pr_url}", + f"build_log: {self.build_log}", + f"test_log: {self.test_log}", f"started: {self.started.isoformat()}", f"last_updated: {self.last_updated.isoformat()}", "---", @@ -173,6 +184,14 @@ def save(self, path: Path) -> None: if self.last_failure: lines += ["", "## Last Failure", "", self.last_failure.strip()] + log_links: list[str] = [] + if self.build_log: + log_links.append(f"- Build: {self.build_log}") + if self.test_log: + log_links.append(f"- Tests: {self.test_log}") + if log_links: + lines += ["", "## Logs", ""] + log_links + path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -189,6 +208,8 @@ def load(cls, path: Path) -> "PipelineContext": fix_iteration=int(meta.get("fix_iteration", 0)), review_fix_iteration=int(meta.get("review_fix_iteration", 0)), pr_url=meta.get("pr_url", ""), + build_log=meta.get("build_log", ""), + test_log=meta.get("test_log", ""), ) try: @@ -349,29 +370,41 @@ class ValidateStep(Step): def run(self, ctx: PipelineContext) -> str: print("Running scripts/validate-build...", flush=True) try: - build_ok, build_output = run_validate_script("validate-build") + build_ok, build_log, build_tail = run_validate_script("validate-build") except (FileNotFoundError, OSError) as e: print(f"Error running validate-build:\n{e}", file=sys.stderr) sys.exit(1) + ctx.build_log = str(build_log) if not build_ok: - print("Build failed. Invoking developer to fix...", flush=True) - ctx.last_failure = f"Build failed:\n\n{build_output}" + print(f"Build FAILED. Log: {build_log}", flush=True) + ctx.last_failure = ( + f"Build failed.\n\n" + f"Full log (read this for details): {build_log}\n\n" + f"Last 30 lines:\n```\n{build_tail}\n```" + ) return "build_failed" + print(f"Build clean. Log: {build_log}", flush=True) + print("Running scripts/validate-tests...", flush=True) try: - tests_ok, tests_output = run_validate_script("validate-tests") + tests_ok, tests_log, tests_tail = run_validate_script("validate-tests") except (FileNotFoundError, OSError) as e: print(f"Error running validate-tests:\n{e}", file=sys.stderr) sys.exit(1) + ctx.test_log = str(tests_log) if not tests_ok: - print("Tests failed. Invoking developer to fix...", flush=True) - ctx.last_failure = f"Test failures:\n\n{tests_output}" + print(f"Tests FAILED. Log: {tests_log}", flush=True) + ctx.last_failure = ( + f"Test failures.\n\n" + f"Full log (read this for details): {tests_log}\n\n" + f"Last 30 lines:\n```\n{tests_tail}\n```" + ) return "tests_failed" - print("Build and tests clean. Committing and pushing...", flush=True) + print(f"Tests clean. Log: {tests_log}", flush=True) ctx.last_failure = "" _commit_and_push(ctx.work_item_id) return "clean" @@ -787,11 +820,12 @@ def _format_work_summaries(summaries: list[str]) -> str: return "\n\n---\n\n".join(parts) -def run_validate_script(script_name: str) -> tuple[bool, str]: +def run_validate_script(script_name: str) -> tuple[bool, Path, str]: """Run a validate script from the scripts/ directory. - Selects the platform-appropriate extension (.cmd on Windows, .sh elsewhere). - Streams output to stdout and returns (success, combined_output). + Logs full output to a timestamped file under .claude/logs/dev-team/. + Nothing is streamed to the console — callers print their own status line. + Returns (success, log_path, tail) where tail is the last 30 lines. """ scripts_dir = REPO_ROOT / "scripts" ext = ".cmd" if sys.platform == "win32" else ".sh" @@ -802,6 +836,11 @@ def run_validate_script(script_name: str) -> tuple[bool, str]: f"Validate script not found: scripts/{script_name}{ext}" ) + timestamp = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") + log_dir = REPO_ROOT / ".claude" / "logs" / "dev-team" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"{timestamp}-{script_name}.txt" + if sys.platform == "win32": invoke = ["cmd", "/c", str(script_path)] else: @@ -812,17 +851,20 @@ def run_validate_script(script_name: str) -> tuple[bool, str]: stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, + encoding="utf-8", + errors="replace", cwd=REPO_ROOT, ) - chunks: list[str] = [] - with proc.stdout: # type: ignore[union-attr] - for line in proc.stdout: - print(line, end="", flush=True) - chunks.append(line) + lines: list[str] = [] + with log_path.open("w", encoding="utf-8") as log_file: + for line in proc.stdout: # type: ignore[union-attr] + log_file.write(line) + lines.append(line) proc.wait() - return proc.returncode == 0, "".join(chunks) + tail = "".join(lines[-30:]) + return proc.returncode == 0, log_path, tail def find_spec_file(work_item_id: str) -> Path: From 9ba7b9a9041f76cb332a992c868ff809b6473347 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 07:39:21 -0700 Subject: [PATCH 06/13] Developer pipeline bug fixes --- .claude/agents/developer.md | 8 +++ .claude/agents/reviewer.md | 1 + .claude/commands/developer-create-pr.md | 76 +++++++++++++++++++++++++ .claude/commands/developer-fix.md | 7 ++- .claude/commands/reviewer-review.md | 21 +++---- .claude/commands/reviewer-sign-off.md | 20 +++++-- .claude/scripts/dev_team.py | 46 +++++++++++---- 7 files changed, 150 insertions(+), 29 deletions(-) create mode 100644 .claude/commands/developer-create-pr.md diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md index 910eb735..acf8cb39 100644 --- a/.claude/agents/developer.md +++ b/.claude/agents/developer.md @@ -17,6 +17,13 @@ tools: - Skill - WebSearch - WebFetch + - mcp__jira__atlassianUserInfo + - mcp__jira__getTransitionsForJiraIssue + - mcp__jira__transitionJiraIssue + - mcp__jira__editJiraIssue + - mcp__jira__addCommentToJiraIssue + - mcp__github__create_pull_request + - mcp__github__get_pull_request --- You are the Developer for the AdaptiveRemote development team. @@ -81,4 +88,5 @@ Use the `Skill` tool to invoke your task-specific workflows: - `developer-implement` — implement a new feature or fix from a task brief - `developer-fix` — address build errors, test failures, or code review comments +- `developer-create-pr` — create a draft GitHub PR for completed work - `developer-patterns` — load architectural and design patterns diff --git a/.claude/agents/reviewer.md b/.claude/agents/reviewer.md index 6303301d..2d58f52d 100644 --- a/.claude/agents/reviewer.md +++ b/.claude/agents/reviewer.md @@ -10,6 +10,7 @@ tools: - Read - Glob - Grep + - Bash - Skill --- diff --git a/.claude/commands/developer-create-pr.md b/.claude/commands/developer-create-pr.md new file mode 100644 index 00000000..9c3547c0 --- /dev/null +++ b/.claude/commands/developer-create-pr.md @@ -0,0 +1,76 @@ +--- +description: Create a draft GitHub pull request for a completed work item. Determines the correct base branch, writes a developer-authored PR body, and creates the PR as a draft. Idempotent — does nothing if the PR already exists. +argument-hint: +--- + +## Inputs + +Work item ID: `$ARGUMENTS` + +### Task brief + +$TASK_BRIEF + +--- + +### Work summary (all prior implementation and fix rounds) + +$WORK_SUMMARIES + +--- + +### Existing PR URL (empty = not yet created) + +$PR_URL + +--- + +## Steps + +### 1 — Check if PR already exists + +If `$PR_URL` is non-empty, the PR has already been created. Output the following JSON and +stop: + +```json +{"pr_url": "$PR_URL"} +``` + +### 2 — Determine the base branch + +Using Bash: + +1. Run `git fetch --all --quiet` to ensure remote branches are up to date. +2. List candidate base branches in priority order: + - `main` + - Any remote `feature/*` branches: `git branch -r | grep "feature/" | sed "s|.*origin/||"` +3. For each candidate, count how many commits HEAD is ahead of it: + ```bash + git rev-list --count origin/..HEAD 2>/dev/null || echo 99999 + ``` +4. Select the candidate with the fewest commits (the closest ancestor to HEAD). If two + candidates tie, prefer `main`. If no candidate is reachable, fall back to `main`. + +### 3 — Create the draft PR + +Use the GitHub MCP tool `mcp__github__create_pull_request` with: + +- `draft: true` +- `base: ` +- `title: ": "` +- `body:` A well-structured description with these sections: + - **Work item:** `` with a one-sentence summary of what the task required + - **Changes:** A bullet list drawn from the work summaries — one bullet per logical change + (new file, modified interface, new test scenario, etc.) + - **Design decisions:** Any non-obvious choices made during implementation that a reviewer + needs context for (omit if there are none) + +The PR title and body are read by human reviewers — write them clearly and precisely. + +### 4 — Output + +Output the PR URL as the final JSON line: + +```json +{"pr_url": "https://github.com/..."} +``` diff --git a/.claude/commands/developer-fix.md b/.claude/commands/developer-fix.md index d51e89af..c3dc7fca 100644 --- a/.claude/commands/developer-fix.md +++ b/.claude/commands/developer-fix.md @@ -54,13 +54,18 @@ For each issue: Address issues one at a time. After each fix: -1. Build and test the affected code to confirm the fix works without introducing new failures: +1. Build and test only the project(s) you changed to confirm the fix works without + introducing new failures: ```bash dotnet build dotnet test ``` + **Scope:** Do **not** run `scripts/validate-build` or `scripts/validate-tests`. Those + are full pipeline validation scripts run by the orchestrator after this step — running + them here is redundant and slows the fix loop. + 2. Commit the fix immediately with a message describing the specific issue resolved: ```bash diff --git a/.claude/commands/reviewer-review.md b/.claude/commands/reviewer-review.md index a897eacb..5197b410 100644 --- a/.claude/commands/reviewer-review.md +++ b/.claude/commands/reviewer-review.md @@ -26,18 +26,11 @@ the implementation must do or must not do. You will check each one during review Read the relevant `_doc_*.md` architecture files for any subsystem touched by this change. Use the area→file table in `CLAUDE.md` to find them. -## Step 3 — Create the PR if needed +## Step 3 — Verify PR exists -If `$PR_URL` is empty: - -1. Determine the current branch name (`git branch --show-current` or read `.git/HEAD`) -2. Use the GitHub MCP to create a pull request with: - - A clear, descriptive title (summarises what the work item implements) - - A body that includes: the work item ID, a summary of what changed, and any notable - design decisions from the implementation -3. Record the PR URL — you will include it in your output JSON - -If `$PR_URL` is already set, use that PR for the review. +`$PR_URL` must be set before this skill runs — the pipeline creates the PR via +`developer-create-pr` before invoking the reviewer. If `$PR_URL` is empty, report an +error and stop. ## Step 4 — Retrieve and read the diff @@ -105,9 +98,9 @@ review consists of: - Explain what the problem is and why it matters (not just what to change) - For Priority 6 style issues, prefix with "nit:" to signal non-blocking -2. A **review submission** with event type: - - `APPROVE` — if no Priority 1–5 blocking issues were found - - `REQUEST_CHANGES` — if any Priority 1–5 issue was found +2. A **review submission** with event type `COMMENT`. Do not use `APPROVE` or + `REQUEST_CHANGES` — GitHub rejects those from the PR author's account, and the + developer and reviewer agents share the same GitHub account. The GitHub API for this is `POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews`. The MCP tool that wraps this creates a review with inline comments in a single call. diff --git a/.claude/commands/reviewer-sign-off.md b/.claude/commands/reviewer-sign-off.md index ddf83649..44fa1cee 100644 --- a/.claude/commands/reviewer-sign-off.md +++ b/.claude/commands/reviewer-sign-off.md @@ -60,10 +60,22 @@ files. Use the GitHub MCP to submit a **pull request review** (not a plain PR comment) via `POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews`. Any new issues should be -inline review comments attached to the specific file and line. Submit with event type: -- **APPROVE** if all prior threads are resolved (or resolved via this pass) and no new - Priority 1–5 issues were found in the modified files -- **REQUEST_CHANGES** otherwise +inline review comments attached to the specific file and line. Submit with event type +`COMMENT`. Do not use `APPROVE` or `REQUEST_CHANGES` — GitHub rejects those from the +PR author's account, and the developer and reviewer agents share the same GitHub account. + +## Step 6a — Hand off to human reviewer (approved only) + +If the review outcome is **approved**, do the following before writing the output summary: + +1. Convert the PR from draft to Ready for Review: + ```bash + gh pr ready $PR_URL + ``` +2. Call `mcp__jira__lookupJiraAccountId` with `$REVIEW_ASSIGNEE_EMAIL` to get the human reviewer's account ID. +3. Assign the Jira issue to that account with `mcp__jira__editJiraIssue`. +4. Call `mcp__github__add_pull_request_review_request` to request a review from `$REVIEW_ASSIGNEE_EMAIL` on `$PR_URL`. +5. Add a brief Jira comment with `mcp__jira__addCommentToJiraIssue`: "PR ready for human review — reviewer requested on GitHub." ## Step 7 — Output diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 70efcda2..667b8c07 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -171,18 +171,18 @@ def save(self, path: Path) -> None: ] if self.brief: - lines += ["", "## Researcher Brief", "", self.brief.strip()] + lines += ["", "", "", self.brief.strip()] if self.work_summaries: - lines += ["", "## Implementation Summary", "", self.work_summaries[0].strip()] + lines += ["", "", "", self.work_summaries[0].strip()] for i, summary in enumerate(self.work_summaries[1:], start=1): - lines += ["", f"## Fix {i}", "", summary.strip()] + lines += ["", f"", "", summary.strip()] if self.review_notes: - lines += ["", "## Review Notes", "", self.review_notes.strip()] + lines += ["", "", "", self.review_notes.strip()] if self.last_failure: - lines += ["", "## Last Failure", "", self.last_failure.strip()] + lines += ["", "", "", self.last_failure.strip()] log_links: list[str] = [] if self.build_log: @@ -190,7 +190,7 @@ def save(self, path: Path) -> None: if self.test_log: log_links.append(f"- Tests: {self.test_log}") if log_links: - lines += ["", "## Logs", ""] + log_links + lines += ["", "", ""] + log_links path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -236,16 +236,16 @@ def load(cls, path: Path) -> "PipelineContext": def _parse_sections(body: str) -> dict[str, str]: - """Split a markdown body into {heading: content} by '## ' headings.""" + """Split a markdown body into {heading: content} by '' sentinels.""" sections: dict[str, str] = {} current_heading: str | None = None current_lines: list[str] = [] for line in body.split("\n"): - if line.startswith("## "): + if line.startswith(""): if current_heading is not None: sections[current_heading] = "\n".join(current_lines).strip() - current_heading = line[3:].strip() + current_heading = line[len("")].strip() current_lines = [] elif current_heading is not None: current_lines.append(line) @@ -413,7 +413,33 @@ def run(self, ctx: PipelineContext) -> str: class ReviewStep(Step): handles = "reviewing" + def __init__(self, context_path: Path) -> None: + self._context_path = context_path + def run(self, ctx: PipelineContext) -> str: + if not ctx.pr_url: + print(f"Developer is creating PR for {ctx.work_item_id}...", flush=True) + try: + pr_output = call_agent( + "developer", "developer-create-pr", + substitutions={ + "$WORK_ITEM_ID": ctx.work_item_id, + "$PR_URL": ctx.pr_url, + "$TASK_BRIEF": ctx.brief, + "$WORK_SUMMARIES": _format_work_summaries(ctx.work_summaries), + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + print(f"Error invoking developer-create-pr agent:\n{e}", file=sys.stderr) + sys.exit(1) + pr_result = parse_json_output(pr_output) + if pr_result.get("pr_url"): + ctx.pr_url = pr_result["pr_url"] + ctx.save(self._context_path) + else: + print("Error: developer-create-pr did not return a pr_url.", file=sys.stderr) + sys.exit(1) + print(f"Reviewer is reviewing {ctx.work_item_id}...", flush=True) try: output = call_agent( @@ -560,7 +586,7 @@ def __init__(self, ctx: PipelineContext, context_path: Path, workflow: WorkflowD "validating": validate_step, "validating-pr": validate_step, "fixing": FixStep(), - "reviewing": ReviewStep(), + "reviewing": ReviewStep(context_path), "reviewing-signoff": SignOffStep(), "fixing-pr": FixPrStep(), } From 917edba5969f08d6353e2f765f9631dcf6d2b993 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 10:15:59 -0700 Subject: [PATCH 07/13] Address REVIEW comments in agent and skill definition files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _doc_*.md (17 files): Add a one-line Summary: to every architecture doc so agents can discover and triage docs via grep rather than a hardcoded index. - researcher-plan.md: Replace the hardcoded area→file table with a grep instruction that finds _doc_*.md files dynamically using Summary: lines. - researcher.md: Broaden the scope language beyond .NET-specific technologies to cover any domain the task touches (cloud, ML, backend, etc.). Add a TODO for the Microsoft Learn MCP tool names once they can be discovered. - developer-fix.md: Revamp the review comment workflow — developer fetches PR threads directly via GitHub MCP, always replies to each thread (agree or disagree), and posts a rationale rather than silently applying a disputed change. Add dotnet test --filter by class name to narrow per-fix test runs. - developer-implement.md: Add dotnet test --filter by class name to narrow per-layer test runs during TDD implementation. - researcher-validate.md: Add $TASK_BRIEF and $WORK_SUMMARIES template placeholders so the pipeline can inject context when calling the skill. - reviewer-review.md: Remove Priority 1 (requirements check) — this moves to researcher-validate in the signoff step. Renumber priorities 1–5. Update doc discovery to use grep instead of the CLAUDE.md table reference. - reviewer-sign-off.md: Add explicit thread resolution rules for the disagree case (accept rationale → resolve; reject → reply and leave unresolved). Base the approved/changes_requested decision on unresolved threads. Remove requirements from the new-issue scan. Remove redundant push step (pipeline now pushes before signoff runs). - implementation-pipeline.md: Consolidate validating-pr + reviewing-signoff into a single signoff state. Document that signoff runs reviewer-sign-off, researcher-validate, and script validation in parallel. - dev_team.py: Replace SignOffStep with SignoffStep that runs the three signoff tasks concurrently via ThreadPoolExecutor. Remove validating-pr from step_handlers. Push to remote before launching parallel tasks so the reviewer agent sees the latest commits on the PR. - delete old `team-task.md` skill and related subagent skills, since they are superseded by the new skills. Co-Authored-By: Claude Sonnet 4.6 --- .claude/agents/developer.md | 5 +- .claude/agents/researcher.md | 10 +- .claude/commands/developer-fix.md | 37 +- .claude/commands/developer-implement.md | 14 +- .claude/commands/developer-patterns.md | 8 - .claude/commands/implement.md | 213 -------- .claude/commands/researcher-plan.md | 25 +- .claude/commands/researcher-validate.md | 14 +- .claude/commands/reviewer-review.md | 27 +- .claude/commands/reviewer-sign-off.md | 33 +- .claude/commands/team-task.md | 464 ------------------ .claude/scripts/dev_team.py | 143 +++++- .claude/scripts/implementation-pipeline.md | 21 +- src/AdaptiveRemote.App/Components/_doc_UI.md | 2 + src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md | 2 + .../Services/Broadlink/_doc_Broadlink.md | 2 + .../Broadlink/_doc_BroadlinkProtocol.md | 2 + .../Services/Commands/_doc_Commands.md | 2 + .../Conversation/_doc_Conversation.md | 2 + .../Services/Lifecycle/_doc_Lifecycle.md | 2 + .../ModalMessages/_doc_ModalMessages.md | 2 + .../_doc_ProgrammaticSettings.md | 2 + .../Services/_doc_Services.md | 2 + .../_doc_Auth.md | 2 + .../_doc_HeadlessHost.md | 2 + src/_doc_BackendDevelopment.md | 2 + src/_doc_Projects.md | 2 + .../Logging/_doc_TestLogging.md | 6 +- .../_doc_SimulatedDevices.md | 2 + test/_doc_EndToEndTests.md | 2 + 30 files changed, 262 insertions(+), 790 deletions(-) delete mode 100644 .claude/commands/developer-patterns.md delete mode 100644 .claude/commands/implement.md delete mode 100644 .claude/commands/team-task.md diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md index acf8cb39..9c059e9e 100644 --- a/.claude/agents/developer.md +++ b/.claude/agents/developer.md @@ -12,7 +12,6 @@ tools: - Edit - Write - Bash - - TodoWrite - Task - Skill - WebSearch @@ -34,8 +33,7 @@ Your job is to receive a task brief from the Researcher and produce working code modified source files, unit tests, and E2E tests. You implement exactly what the brief describes and nothing more. -You never plan, validate, or approve work — those belong to other agents. You never update -Jira or GitHub. Your deliverable is code and a work summary. +You never plan, validate, or approve work — those belong to other agents. Your deliverable is code and a work summary. ## Before writing any code @@ -89,4 +87,3 @@ Use the `Skill` tool to invoke your task-specific workflows: - `developer-implement` — implement a new feature or fix from a task brief - `developer-fix` — address build errors, test failures, or code review comments - `developer-create-pr` — create a draft GitHub PR for completed work -- `developer-patterns` — load architectural and design patterns diff --git a/.claude/agents/researcher.md b/.claude/agents/researcher.md index 4640fd08..b18b1cd6 100644 --- a/.claude/agents/researcher.md +++ b/.claude/agents/researcher.md @@ -12,6 +12,9 @@ tools: - WebSearch - WebFetch - Skill + # TODO: add mcp__microsoft-learn__* tool names from the "microsoft-learn" MCP server + # configured in .claude/settings.json. Run `claude mcp list-tools microsoft-learn` + # (or equivalent) to discover the available tool names and add them here. --- You are the Researcher for the AdaptiveRemote development team. @@ -35,12 +38,13 @@ Be exhaustive before you write anything: task key; also read surrounding design decisions that constrain it. - Read the existing source files and interfaces the task will interact with. - When the local docs don't cover a question, use `WebSearch`, `WebFetch`, or the Microsoft - Learn MCP tools to look up .NET, C#, Blazor, MAUI, or ASP.NET best practices. + Learn MCP tools to look up best practices relevant to the task — .NET, C#, Blazor, MAUI, + ASP.NET, backend services, cloud APIs, ML, or any other relevant domain. ## Output posture -Return only what the caller needs. Do not relay raw file contents, quote large doc sections, -or repeat information the caller already has. Synthesize — draw conclusions, resolve tensions, +Return only what the other agents will need to implement, test, and review work. Do not relay raw file contents, quote large doc sections, +or repeat information the agents already have. Synthesize — draw conclusions, resolve tensions, surface the non-obvious. Every claim you make must be grounded in what you read. Cite file paths for source-derived diff --git a/.claude/commands/developer-fix.md b/.claude/commands/developer-fix.md index c3dc7fca..fb557bb1 100644 --- a/.claude/commands/developer-fix.md +++ b/.claude/commands/developer-fix.md @@ -29,7 +29,7 @@ $ISSUES ### 1 — Load standards -Invoke the `developer-patterns` skill (loads all guidelines from `CONTRIBUTING.md`). +Read `CONTRIBUTING.md` for code guidelines. Read `CLAUDE.md` for quality gates and operational conventions. ### 2 — Understand context @@ -37,7 +37,16 @@ Read `CLAUDE.md` for quality gates and operational conventions. Read the original task brief and work summary to understand what was built and why. Then read each issue to be fixed. -### 3 — Triage +### 3 — Fetch review comment threads from the PR + +If `$ISSUES` contains code review comments (i.e., it references a PR URL), fetch the open +review comment threads directly from the PR using the GitHub MCP rather than relying solely +on the summary in `$ISSUES`. This ensures you see all comments including those from human +reviewers and GitHub Copilot, not only those the orchestrator relayed. + +Use `$ISSUES` for non-review-comment issues (build errors, test failures) as provided. + +### 4 — Triage For each issue: @@ -46,11 +55,16 @@ For each issue: - **Test failure:** before fixing the production code, confirm whether the test itself is correct. If the test is wrong, fix the test and explain why in the report. If the test is right, write or verify a failing unit test that isolates the defect, then fix the code. -- **Code review comment:** read the comment, understand the intent, and apply the change. - If you disagree with the comment, note it in the report and apply the change anyway unless - it would introduce a correctness problem. +- **Code review comment:** read the comment and understand the intent. + - **Agree:** apply the change. + - **Disagree:** post your rationale as a reply to the PR review thread. Do NOT apply the + change. The Reviewer will see your response during sign-off and decide whether to accept + the rationale or push back. Only apply the change if the Reviewer pushes back in a + subsequent round. + - In either case, always post a reply to the PR review thread explaining what was done + (or why nothing was done). This keeps all reviewers — agent and human — informed. -### 4 — Fix each issue +### 5 — Fix each issue Address issues one at a time. After each fix: @@ -59,9 +73,12 @@ Address issues one at a time. After each fix: ```bash dotnet build - dotnet test + dotnet test --filter "FullyQualifiedName~" ``` + Where `` is the name of the class you modified. If the filter matches zero + tests (no dedicated test class yet), run the full project test instead without `--filter`. + **Scope:** Do **not** run `scripts/validate-build` or `scripts/validate-tests`. Those are full pipeline validation scripts run by the orchestrator after this step — running them here is redundant and slows the fix loop. @@ -76,13 +93,13 @@ Address issues one at a time. After each fix: One commit per issue keeps the git history readable and makes individual fixes easy to review. Do not batch multiple fixes into a single commit. -Do not push — the pipeline pushes after all fixes pass full validation. +**Do not push** — the pipeline pushes after all fixes pass full validation. -### 5 — Self-review +### 6 — Self-review Review the diff for unintended scope, missed issues, and convention violations. -### 6 — Report +### 7 — Report Return a fix summary as structured prose: for each issue, one sentence describing what was changed and why. diff --git a/.claude/commands/developer-implement.md b/.claude/commands/developer-implement.md index 973587f1..b83b64d0 100644 --- a/.claude/commands/developer-implement.md +++ b/.claude/commands/developer-implement.md @@ -17,7 +17,7 @@ $TASK_BRIEF ### 1 — Load standards -Invoke the `developer-patterns` skill (loads all guidelines from `CONTRIBUTING.md`). +Read `CONTRIBUTING.md` for code guidelines. Read `CLAUDE.md` for quality gates and operational conventions. ### 2 — Understand the task @@ -35,7 +35,7 @@ in your work summary and resolve it conservatively. Write Gherkin scenarios that cover the exit criteria before writing any implementation code. Use existing steps whenever possible. When new steps are needed, follow the conventions in -`developer-patterns`: +`CONTRIBUTING.md`: - Generalized, human-readable `When` / `Then` / `Given` phrasing - Step definitions delegate logic to test service methods @@ -58,9 +58,13 @@ After each layer, build and test only the code you have modified: ```bash dotnet build -dotnet test +dotnet test --filter "FullyQualifiedName~" ``` +Where `` is the name of the class you just implemented. If the filter matches +zero tests (e.g., at the very start before any test classes exist), run the full project +test without `--filter`. + Fix any build errors or new test failures before moving to the next layer. ### 6 — Commit @@ -74,7 +78,7 @@ git commit -m "$ARGUMENTS: " The message body (optional) can list the key decisions if they are non-obvious. -Do not push — the pipeline pushes after validation passes. +**Do not push** — the pipeline pushes after validation passes. ### 7 — Self-review @@ -82,7 +86,7 @@ Review the diff as if you were doing a code review: - Does every exit criterion have demonstrable coverage (code + test)? - Are there missing test cases (branches, error paths, invalid inputs)? -- Do all files follow CLAUDE.md naming, structure, and logging conventions? +- Do all files follow CONTRIBUTING.md naming, structure, and logging conventions? - Is there any scope creep — changes not required by the brief? ### 8 — Report diff --git a/.claude/commands/developer-patterns.md b/.claude/commands/developer-patterns.md deleted file mode 100644 index 108d03fc..00000000 --- a/.claude/commands/developer-patterns.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -description: Load code guidelines and architectural patterns before writing any code. Redirects to CONTRIBUTING.md, which is the single source of truth for all coding conventions, async design, testable state, E2E test authoring, and project layout. ---- - -Read `CONTRIBUTING.md` for all code guidelines and architectural patterns: testing -conventions, logging rules, async design (fetch-first, `CancellationToken`), testable -state design (Data objects / ViewModels, controller services), E2E / API test patterns -(Gherkin, `WaitHelpers`, step-definition rules), and project layout guidance. diff --git a/.claude/commands/implement.md b/.claude/commands/implement.md deleted file mode 100644 index c505981c..00000000 --- a/.claude/commands/implement.md +++ /dev/null @@ -1,213 +0,0 @@ ---- -description: Implement a Jira task end-to-end: branch, implement from spec, build/test, code review, commit, PR, and iterative review cycle -argument-hint: ---- - -## Task to implement - -Jira Task $ARGUMENTS - -## Available spec files - -!`find . -name "_spec_*.md" -not -path "./.git/*" | sort` - -## Available architecture docs - -!`find . -name "_doc_*.md" -not -path "./.git/*" | sort` - -## Workflow - -Follow the phases below in order. Each phase has explicit pause points — do not skip ahead without user confirmation. - ---- - -### Phase 0 — Plan mode check - -Check whether plan mode is currently active in your context. - -**If plan mode IS active:** Phases 0–4 are read-only planning steps. Complete them, then call -`ExitPlanMode` to present the implementation plan for user approval. Proceed with Phases 5–11 -only after the user approves. - -**If plan mode is NOT active:** Tell the user: - -> Plan mode is not active. This skill is designed to be invoked in plan mode so you can review -> the implementation plan before any code is written. Do you want to continue without plan mode? - -**PAUSE — wait for the user's answer before continuing.** - ---- - -### Phase 1 — Find the spec file - -Search the repository for a `_spec_*.md` file that references the task key: - -``` -grep -rl "TASK_KEY" . --include="_spec_*.md" --exclude-dir=.git -``` - -(substitute the actual task key from `$ARGUMENTS`) - -**If no file is found:** Stop and tell the user: - -> No `_spec_*.md` file was found containing `TASK_KEY`. Check that the task key is correct -> and that a spec file exists for this task before continuing. - -**PAUSE — do not fall back to keyword search.** A missing spec almost certainly means the -task key or working branch is wrong. - -**If a file is found:** Read it fully before continuing. - ---- - -### Phase 2 — Read architecture docs - -Read the `_doc_*.md` files relevant to the areas the spec touches. At minimum read -`src/_doc_Projects.md`. Read any others that apply to the feature area. - ---- - -### Phase 3 — Create or switch to branch - -Derive a branch slug (5–6 words, kebab-case) from the task title listed in the spec. - -Branch name format: `dev/claude/TASK_KEY-slug` -Example: `dev/claude/ADR-123-programmable-commands` - -Check whether the branch already exists: - -``` -git branch --list "dev/claude/TASK_KEY*" -``` - -- **If it exists:** switch to it with `git checkout`. -- **If it does not exist:** create it from the working branch: - ``` - git fetch origin WORKING_BRANCH - git checkout -b dev/claude/TASK_KEY-slug WORKING_BRANCH - ``` - -**Optional Jira status update:** Attempt to set the Jira issue status to "In Progress" using -`mcp__jira__editJiraIssue`. Skip silently if the tool is unavailable or the transition fails. - ---- - -### Phase 4 — Plan the implementation - -Produce a concrete implementation plan: - -1. List every requirement from the spec that needs new or changed code. -2. Identify existing utilities, interfaces, and patterns to reuse — prefer reuse over - writing new code. -3. List each file to be created or modified, with a brief description of the change. -4. Note any test coverage needed: unit tests for new logic, E2E scenarios for user-visible - behaviour. - -If the spec contains gaps that would require guessing to implement (missing decisions, -unspecified error cases, unclear interfaces), list them all and ask the user to resolve them. - -**PAUSE if you have questions — wait for the user's answers before proceeding.** - -If plan mode is active, call `ExitPlanMode` here to present the plan for approval. - ---- - -### Phase 5 — Implement - -Follow the plan and spec. Apply conventions from `CLAUDE.md`: - -- Log messages must be defined as `[LoggerMessage]` source-generated methods in - `MessageLogger.cs`. Never call `logger.LogXxx()` directly. -- Assign new log message IDs from the next unused ID in the appropriate subsystem range. -- Never introduce accessibility regressions (priority: vision > speech > eye-gaze > keyboard). -- Update any affected `_doc_*.md` files. - ---- - -### Phase 6 — Quality check (first pass) - -Run all Linux-compatible quality gates. The following projects target Windows and cannot run -on Linux — do not attempt them: `Speech.Tests`, `Host.Wpf`, `Host.Console`. - -``` -dotnet build /warnaserror -dotnet test test/AdaptiveRemote.App.Tests/AdaptiveRemote.App.Tests.csproj -dotnet test test/AdaptiveRemote.EndToEndTests.Host.Headless/AdaptiveRemote.EndToEndTests.Host.Headless.csproj -``` - -Fix every warning, error, and test failure before continuing. Repeat until all three -commands pass cleanly. - ---- - -### Phase 7 — Code review - -Invoke `/simplify` to review the changed code for reuse, quality, and efficiency. Address -all findings. Then invoke `/security-review` to check for security issues. Address all -findings. - ---- - -### Phase 8 — Quality check (second pass) - -Re-run all three commands from Phase 6. All must pass before continuing. - ---- - -### Phase 9 — Commit and push - -Self-review the diff before committing — does every change belong? Is anything missing? - -Commit message format: `: [TASK_KEY]` -Example: `feat: add programmable command scheduling [ADR-123]` - -Include a brief "why" in the commit body if the change is non-trivial. - -Push the branch: - -``` -git push -u origin dev/claude/TASK_KEY-slug -``` - ---- - -### Phase 10 — Create PR and request reviews - -Create the pull request using `mcp__github__create_pull_request`: - -- **Title:** `[TASK_KEY] ` -- **Body:** - - Link to the Jira task: `https://jodasoft.atlassian.net/browse/TASK_KEY` - - 3–5 bullet summary of what changed and why - - Test plan: what to verify manually - -Then: - -1. Request Copilot review: `mcp__github__request_copilot_review` -2. Subscribe to PR activity: `mcp__github__subscribe_pr_activity` -3. **Optional Jira status update:** Attempt `mcp__jira__editJiraIssue` to set status to - "In Review". Skip silently if unavailable. - -Tell the user: - -> PR created at ``. I've requested a Copilot review. Please review when ready — -> I'll watch for comments. - -**PAUSE — wait for review activity (`` events) before continuing.** - ---- - -### Phase 11 — Review cycle - -When review activity arrives: - -1. Read all comments carefully. -2. Address every comment: - - Implement the requested change, OR - - Explain clearly why the comment doesn't apply or is incorrect — but only when - genuinely wrong, not to avoid work. -3. Re-run all three quality gate commands from Phase 6. -4. Commit and push the fixes with a clear message referencing the review round. -5. Reply to each addressed comment confirming what was done. - -Repeat Phase 11 until the PR is approved or the user explicitly signs off. diff --git a/.claude/commands/researcher-plan.md b/.claude/commands/researcher-plan.md index a8dcf50a..bc9acc58 100644 --- a/.claude/commands/researcher-plan.md +++ b/.claude/commands/researcher-plan.md @@ -30,20 +30,14 @@ If no section is found for the task key, stop and tell the caller: Identify which subsystems and areas this task touches based on the spec section and its surrounding design decisions. Read the relevant `_doc_*.md` files for those areas. -Always read `src/_doc_Projects.md`. Read additional docs as needed: - -| Area | File | -|------|------| -| Services architecture | `src/AdaptiveRemote.App/Services/_doc_Services.md` | -| Lifecycle | `src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md` | -| Commands | `src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md` | -| Broadlink / IR | `src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md` | -| Conversation | `src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md` | -| MVVM | `src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md` | -| UI components | `src/AdaptiveRemote.App/Components/_doc_UI.md` | -| E2E tests | `test/_doc_EndToEndTests.md` | -| Simulated devices | `test/AdaptiveRemote.EndToEndTests.TestServices/_doc_SimulatedDevices.md` | -| Backend | `src/_doc_BackendDevelopment.md` | +Always read `src/_doc_Projects.md`. To discover all other architecture docs, run: + +```bash +grep -rl "^Summary:" src test --include="_doc_*.md" +``` + +Read the `Summary:` line of each result to decide which docs are relevant to the task, +then read the relevant ones in full. ### 3 — Read relevant source @@ -55,7 +49,8 @@ to reuse, patterns established in adjacent code. If the task touches a framework or pattern not covered by local docs, use `WebSearch`, `WebFetch`, or Microsoft Learn to look up best practices. Common areas: .NET DI patterns, -Blazor component lifecycle, MAUI platform-specific code, ASP.NET minimal API conventions. +Blazor component lifecycle, MAUI platform-specific code, ASP.NET minimal API conventions, +Machine Learning, Cloud services. ### 5 — Write the task brief diff --git a/.claude/commands/researcher-validate.md b/.claude/commands/researcher-validate.md index 1de16ed1..7cc68b45 100644 --- a/.claude/commands/researcher-validate.md +++ b/.claude/commands/researcher-validate.md @@ -8,11 +8,17 @@ argument-hint: Task key: the first token of `$ARGUMENTS` Spec file: the second token of `$ARGUMENTS` -The caller must also include in the prompt: +**Original task brief:** -- **Original task brief** — the full prose brief produced by `researcher-plan` -- **Summary of work done** — what the Developer implemented (changed files, key decisions made) -- **Links to new tests** — paths or test names for new unit tests and E2E scenarios +$TASK_BRIEF + +--- + +**Summary of work done:** + +$WORK_SUMMARIES + +--- All of these are required. If any are missing, stop and tell the caller what is needed. diff --git a/.claude/commands/reviewer-review.md b/.claude/commands/reviewer-review.md index 5197b410..07907f26 100644 --- a/.claude/commands/reviewer-review.md +++ b/.claude/commands/reviewer-review.md @@ -24,7 +24,8 @@ Re-read the task brief above and extract the exit criteria — the explicit list the implementation must do or must not do. You will check each one during review. Read the relevant `_doc_*.md` architecture files for any subsystem touched by this change. -Use the area→file table in `CLAUDE.md` to find them. +Use `grep -rl "^Summary:" src test --include="_doc_*.md"` to discover available docs, then +read the `Summary:` line of each match to find the relevant ones. ## Step 3 — Verify PR exists @@ -42,29 +43,22 @@ complete context of each change. Evaluate the diff against each dimension below, in priority order. For each issue you find, note the file, line number, and a clear description of the problem. -### Priority 1 — Requirements +### Priority 1 — Correctness and fault tolerance -Check each exit criterion from the task brief: -- Is it implemented? -- Is it implemented correctly (not just partially)? -- Are there edge cases the brief implied but the implementation misses? - -### Priority 2 — Correctness and fault tolerance - -- Are all exception paths handled? No swallowed exceptions, no empty `catch` blocks. +- Are all exception paths handled? No swallowed exceptions, no empty `catch` blocks (unless there's a comment with a good justification). - Are `CancellationToken` parameters present in every async method signature? No default values — callers must pass explicitly. - Are there blocking calls (`.Result`, `.Wait()`, `Thread.Sleep`) on async code paths? - Does error handling propagate faithfully, or does it silently discard failures? -### Priority 3 — Security +### Priority 2 — Security - Is user input validated at system boundaries? - Are there SQL injection, command injection, or path traversal risks? - Is sensitive data (tokens, passwords, PII) logged or returned in error messages? - Are authentication/authorization checks present where the architecture requires them? -### Priority 4 — Performance +### Priority 3 — Performance - Are there N+1 query patterns (fetching inside a loop that could be batched)? - Is there synchronous I/O on async code paths? @@ -73,13 +67,14 @@ Check each exit criterion from the task brief: - Are async-backed data fetches happening up front (fetch-first pattern) rather than scattered through processing logic? -### Priority 5 — Documentation +### Priority 4 — Documentation - Does new code conform to the design described in the relevant `_doc_*.md` files? - If the implementation changed the design (new interface, changed responsibility, new dependency), has the relevant `_doc_*.md` been updated? +- Have new `_doc_*.md` files beed added where necessary? -### Priority 6 — Code style (note, do not block) +### Priority 5 — Code style (note, do not block) - Do naming conventions follow CONTRIBUTING.md (`ClassName_Method_Scenario_ExpectedResult` for tests, etc.)? @@ -110,7 +105,7 @@ Do NOT use the regular PR comment endpoint (`POST /repos/{owner}/{repo}/issues/{ ## Step 7 — Output Write a concise plain-text summary of all issues found (one bullet per issue, Priority -1–5 first, style issues last). This summary will be passed to the developer so they can +1–4 first, style issues last). This summary will be passed to the developer so they can address each point without re-reading the full PR thread. Then output the JSON result as the final line: @@ -119,5 +114,5 @@ Then output the JSON result as the final line: {"status": "approved|changes_requested", "pr_url": "https://github.com/..."} ``` -Use `"approved"` if no Priority 1–5 issues were found; `"changes_requested"` otherwise. +Use `"approved"` if no Priority 1–4 issues were found; `"changes_requested"` otherwise. Always include the PR URL even when approving. diff --git a/.claude/commands/reviewer-sign-off.md b/.claude/commands/reviewer-sign-off.md index 44fa1cee..03074f55 100644 --- a/.claude/commands/reviewer-sign-off.md +++ b/.claude/commands/reviewer-sign-off.md @@ -15,11 +15,10 @@ $TASK_BRIEF Read `CONTRIBUTING.md` in full. These are the standards against which you are reviewing. -## Step 2 — Push latest changes to remote +## Step 2 — Note recent changes -The developer has made fixes since the last review. Ensure the latest commits are visible -on the remote before reviewing. Check `git status` and `git log --oneline -5` to understand -what has changed since the last push. +The pipeline pushes the latest commits before invoking this sign-off. Check +`git log --oneline -5` to understand what has changed since the previous review pass. ## Step 3 — Retrieve prior review threads @@ -33,15 +32,19 @@ unresolved thread, note: For each unresolved review comment: -1. Read the relevant section of the latest code -2. Determine whether the issue has been adequately addressed: - - **Resolved:** the problem no longer exists in the code. Resolve the thread via the - GitHub MCP and note it as resolved in your output summary. +1. Read the relevant section of the latest code and any developer replies in the thread. +2. Determine the outcome: + - **Addressed satisfactorily:** the problem no longer exists in the code. Resolve the + thread via the GitHub MCP. + - **Developer disagreed (posted rationale):** read the developer's rationale. + - **Accept the rationale:** add a reply acknowledging it and resolve the thread. + - **Reject the rationale:** add a reply restating the requirement and explaining why + the rationale doesn't address the concern. Leave the thread unresolved. - **Partially addressed:** the developer made a change but the underlying problem remains or a different instance was missed. Add a follow-up comment explaining what - still needs to be done. - - **Not addressed:** the code is unchanged. Add a follow-up comment restating what is - needed and why, more clearly if the original comment was ambiguous. + still needs to be done. Leave the thread unresolved. + - **Not addressed:** the code is unchanged and no rationale was posted. Add a follow-up + comment restating what is needed and why. Leave the thread unresolved. ## Step 5 — Scan modified files for new issues @@ -50,8 +53,8 @@ log). Scan **only those files** for new issues introduced by the developer's fix re-review unmodified code. Apply the same priority order as the first-pass review: -1. Requirements, 2. Correctness/fault tolerance, 3. Security, 4. Performance, -5. Documentation, 6. Code style (note only) +1. Correctness/fault tolerance, 2. Security, 3. Performance, +4. Documentation, 5. Code style (note only) Post new inline review comments for any new Priority 1–5 issues found in the modified files. @@ -64,6 +67,10 @@ inline review comments attached to the specific file and line. Submit with event `COMMENT`. Do not use `APPROVE` or `REQUEST_CHANGES` — GitHub rejects those from the PR author's account, and the developer and reviewer agents share the same GitHub account. +**Sign-off decision:** Set `approved` if all review threads are resolved (no unresolved +threads remain) AND no new Priority 1–4 issues were found in the modified files. +Set `changes_requested` if any threads remain unresolved or if new blocking issues were found. + ## Step 6a — Hand off to human reviewer (approved only) If the review outcome is **approved**, do the following before writing the output summary: diff --git a/.claude/commands/team-task.md b/.claude/commands/team-task.md deleted file mode 100644 index 65bbfb68..00000000 --- a/.claude/commands/team-task.md +++ /dev/null @@ -1,464 +0,0 @@ ---- -description: Implement a Jira task autonomously using a team of specialized sub-agents (Researcher, Developer, Tester, Reviewer) with build, test, and review cycles -argument-hint: ---- - -## Task to implement - -Jira Task: $ARGUMENTS - -## Available spec files - -!`find . -name "_spec_*.md" -not -path "./.git/*" | sort` - -## Your role - -You are the **orchestrating agent**. You coordinate four specialized sub-agents to implement -this task from spec to merged PR. Keep your own context lean: - -- Accept only minimal, structured outputs from sub-agents -- Direct sub-agents to read from and write to GitHub PR and Jira directly — do not relay - large payloads through yourself - -**Before starting**, capture the current working branch — this is the **base branch** -(PR target, Developer branches off this): - -``` -git branch --show-current -``` - ---- - -## Structured output schemas - -Two agents return JSON. Validate their output matches before routing. - -**Tester output** (array, empty = all pass): -```json -[ - { - "test": "ClassName_Method_Scenario", - "failure_type": "assertion | exception | timeout | other", - "root_cause": "one-sentence diagnosis" - } -] -``` - -**Reviewer output** (array, empty = clean): -```json -[ - { - "file": "relative/path/to/File.cs", - "line": 42, - "comment": "description of the issue" - } -] -``` - ---- - -## Workflow - -Follow the phases in order. The skill runs autonomously except at the three explicit -**PAUSE** points: spec not found, Developer stuck on ambiguity, or repeated identical -test failures. - ---- - -### Phase 1 — Researcher (Haiku) - -Spawn a Researcher agent with `model: haiku`. Pass it: - -- The task key (`$ARGUMENTS`) -- The list of spec file paths from the "Available spec files" section above - -Researcher instructions: - -> You are the Researcher for a software implementation team. Produce a concise task brief -> for the Developer — focused only on what is needed for this specific task, not the whole spec. -> -> **Task key:** TASK_KEY -> -> **Spec files:** -> (paste spec file paths) -> -> **Steps:** -> 1. Grep each spec file for TASK_KEY. If no file matches, output exactly: -> `{ "status": "not_found" }` and stop. -> 2. Read the matching spec file in full. Find the section for TASK_KEY. -> 3. Read all `_doc_*.md` files relevant to the areas this task touches. At minimum read -> `src/_doc_Projects.md`. -> 4. Read relevant source files in the areas the task will modify. -> 5. Write a **task brief** as structured prose covering: -> - Task title and one-sentence description -> - Exit criteria (checklist) -> - Key design decisions for this task -> - Files and interfaces to create or modify; existing patterns and utilities to reuse -> - Conventions: all log messages via `[LoggerMessage]` source-gen in `MessageLogger.cs`; -> never call `logger.LogXxx()` directly; no accessibility regressions (priority: vision > -> speech > eye-gaze > keyboard); follow `.editorconfig` formatting -> - Known ambiguities — questions the Developer may need answered -> -> Do not return the full spec or doc content — only the information needed for this task. -> Return the task brief as plain prose. The spec file contents, docs, and source files stay -> inside you; do not include them in your output. - -**If Researcher returns `{ "status": "not_found" }`**, tell the user: - -> No `_spec_*.md` file was found containing `TASK_KEY`. Check that the task key is correct -> and that you are on the right branch before continuing. - -**PAUSE — wait for the user before continuing.** - ---- - -### Phase 2 — Developer, first pass (Sonnet) - -Spawn a Developer agent with `model: claude-sonnet-4-6`. Pass it: - -- The task brief (full text from Researcher) -- Task key, base branch - -Developer instructions: - -> You are the Developer. Implement the task described in the task brief below. -> -> **Task key:** TASK_KEY -> **Base branch:** BASE_BRANCH -> -> **Task brief:** -> TASK_BRIEF -> -> --- -> -> **Step 1 — Branch setup** -> -> Derive a 5–6 word kebab-case slug from the task title. -> Branch name: `dev/claude/TASK_KEY-slug` -> -> Check if it exists: `git branch --list "dev/claude/TASK_KEY*"` -> - If yes: `git checkout` to it. -> - If no: `git checkout -b dev/claude/TASK_KEY-slug BASE_BRANCH` -> -> Optional (silent fail): set Jira status to "In Progress" via `mcp__jira__editJiraIssue`. -> -> **Step 2 — Clarification check** -> -> Do not read the spec file directly — the task brief is your complete source of truth for -> requirements. If something is missing, use the clarification signal below; do not go to -> the spec yourself. -> -> If the task brief contains ambiguities you cannot resolve by reading the existing code, -> return exactly: -> ```json -> { "status": "needs_clarification", "questions": ["question 1", "question 2"] } -> ``` -> and stop. Otherwise continue to Step 3. -> -> **Step 3 — Implement** -> -> Follow all CLAUDE.md conventions: -> - All log messages via `[LoggerMessage]` source-gen methods in `MessageLogger.cs`. -> Never call `logger.LogXxx()` directly. -> - Assign new log message IDs from the next unused ID in the appropriate subsystem range. -> - No accessibility regressions (priority: vision > speech > eye-gaze > keyboard). -> - Follow `.editorconfig` formatting conventions — write code correctly first rather than -> relying on the build to catch it. -> - Update any affected `_doc_*.md` files. -> -> **Step 4 — Build** -> -> Do not run tests — that is the Tester agent's responsibility. Only run the build: -> -> ``` -> scripts/validate-build.sh -> ``` -> -> The script stages new files and cleans before building — do not run `git add -A` -> separately. Fix all warnings and errors and re-run until the build is clean. -> -> **Step 5 — Commit and push** -> -> Commit format: `feat: description [TASK_KEY]` -> Include a brief "why" in the commit body if the change is non-trivial. -> -> ``` -> git push -u origin BRANCH_NAME -> ``` -> -> Return: `{ "status": "done", "branch": "dev/claude/TASK_KEY-slug" }` - -**If Developer returns `{ "status": "needs_clarification" }`:** - -Re-spawn the Researcher (Haiku) with the original task brief plus the questions. Researcher -re-reads source and spec to answer. If it cannot answer definitively, it should return: -`{ "status": "cannot_answer", "questions": [...] }`. - -If Researcher cannot answer: **PAUSE — ask the user the outstanding questions.** - -Once answers are received, re-spawn Developer with the task brief plus the answers appended. - ---- - -### Phase 3 — Tester (Haiku) - -Spawn a Tester agent with `model: haiku`. Pass it the branch name. - -Tester instructions: - -> You are the Tester. Run the full test suite on the given branch and report any failures. -> -> **Branch:** BRANCH_NAME -> -> ``` -> git checkout BRANCH_NAME -> scripts/validate-tests.sh -> ``` -> -> For each failing test, investigate the root cause by reading the relevant test file and -> the source it tests. Do not just surface the error message — explain *why* the test fails. -> You may re-run individual tests to gather more information. -> -> Return a JSON array matching this schema exactly. Return only the JSON — no other text. -> An empty array means all tests pass. -> -> ```json -> [ -> { -> "test": "ClassName_Method_Scenario", -> "failure_type": "assertion | exception | timeout | other", -> "root_cause": "one-sentence diagnosis" -> } -> ] -> ``` - -**Track failures across Tester runs.** If the identical set of failing tests appears in -3 consecutive runs without any change, the Developer is stuck: - -**PAUSE — report the repeated failures to the user and wait for guidance.** - -**If failures:** re-spawn Developer (Sonnet) with: - -> You are the Developer. Fix the failing tests described below. -> -> **Task key:** TASK_KEY -> **Branch:** BRANCH_NAME -> **Task brief:** TASK_BRIEF -> -> **Failing tests:** -> FAILURE_JSON -> -> Check out the branch and fix each failure. You may re-run individual tests to -> verify a specific fix (e.g. `dotnet test --filter "FullyQualifiedName~TEST_NAME"`), -> but do not run the full suite — that is the Tester agent's job. When done, confirm -> the build is still clean, then commit and push: -> -> ``` -> git checkout BRANCH_NAME -> scripts/validate-build.sh -> git commit -m "fix: address test failures [TASK_KEY]" -> git push -> ``` -> -> Return: `{ "status": "done", "branch": "BRANCH_NAME" }` - -Re-run Tester. Repeat until no failures. - -**If no failures:** proceed to Phase 4. - ---- - -### Phase 4 — Create PR - -Create the pull request. Substitute actual values for all placeholders: - -``` -gh pr create \ - --base BASE_BRANCH \ - --head BRANCH_NAME \ - --title "[TASK_KEY] " \ - --body "$(cat <<'EOF' -Jira: https://jodasoft.atlassian.net/browse/TASK_KEY - -## What changed -- bullet 1 -- bullet 2 -- bullet 3 - -## Test plan -`scripts/validate-tests` passes — unit tests and headless E2E tests. - -🤖 Generated with [Claude Code](https://claude.com/claude-code) -EOF -)" -``` - -Capture the PR URL from the output. - -Optional (silent fail): `mcp__github__request_copilot_review` -Optional (silent fail): set Jira status to "In Review" via `mcp__jira__editJiraIssue` - ---- - -### Phase 5 — Reviewer, first pass (Sonnet) - -Before spawning the Reviewer, capture the current commit SHA — this is the **reviewer baseline** -used by all subsequent scoped Reviewer passes: - -``` -git rev-parse HEAD -``` - -Store this as `REVIEWER_BASELINE`. - -Spawn a Reviewer agent with `model: claude-sonnet-4-6`. Pass it the PR URL, branch, and base branch. - -Reviewer instructions: - -> You are the Reviewer. Perform a thorough code review on the changes in this PR. -> -> **PR URL:** PR_URL -> **Branch:** BRANCH_NAME -> **Base branch:** BASE_BRANCH -> -> ``` -> git checkout BRANCH_NAME -> git diff BASE_BRANCH..HEAD -> ``` -> -> Review for: -> - Correctness and completeness against the task's exit criteria (read the PR description -> for context on what was intended) -> - CLAUDE.md conventions: `[LoggerMessage]` source-gen only in `MessageLogger.cs`, no -> direct `logger.LogXxx()` calls, correct log event ID ranges -> - No accessibility regressions (vision > speech > eye-gaze > keyboard) -> - Code quality: simplicity, no premature abstractions, no unnecessary error handling or -> validation, no stale or redundant comments -> - Test coverage: new logic should have unit tests; user-visible behaviour should have -> headless E2E coverage -> - `.editorconfig` compliance -> -> For each issue found, post a comment **directly to the PR**: -> ``` -> gh pr review PR_URL --comment -b "path/to/File.cs:LINE — your comment" -> ``` -> -> After posting all comments, return a JSON array. Return only the JSON — no other text. -> An empty array means no issues. -> -> ```json -> [ -> { -> "file": "relative/path/to/File.cs", -> "line": 42, -> "comment": "description of the issue" -> } -> ] -> ``` - -**If Reviewer returns an empty array:** the cycle is complete — go to the Completion step. - -**If Reviewer returns comments:** proceed to Phase 6. - ---- - -### Phase 6 — Developer, review pass (Sonnet) - -Spawn a Developer agent with `model: claude-sonnet-4-6`. Pass it the PR URL, branch, and task brief. - -Developer instructions: - -> You are the Developer. Address the open review comments on this PR. -> -> **Task key:** TASK_KEY -> **Branch:** BRANCH_NAME -> **PR URL:** PR_URL -> -> **Task brief:** -> TASK_BRIEF -> -> **Step 1** — Check out the branch: -> ``` -> git checkout BRANCH_NAME -> ``` -> -> **Step 2** — Read all PR comments: -> ``` -> gh pr view PR_URL --comments -> ``` -> -> **Step 3** — For each comment, either: -> a) Implement the fix, OR -> b) If the comment is factually incorrect or the fix is genuinely not appropriate, post a -> rebuttal reply directly to the PR (explain the reasoning concisely): -> ``` -> gh pr review PR_URL --comment -b "File.cs:LINE — [your rebuttal]" -> ``` -> -> **Step 4** — Build, commit, and push: -> ``` -> scripts/validate-build.sh -> git commit -m "review: address feedback [TASK_KEY]" -> git push -> ``` -> -> Return: `{ "status": "done", "branch": "BRANCH_NAME" }` - -After Developer finishes, update `REVIEWER_BASELINE` to the current HEAD commit: - -``` -git rev-parse HEAD -``` - -Then spawn **Tester and scoped Reviewer in parallel** and wait for both. - -**Tester** (Haiku) — same instructions as Phase 3. - -**Scoped Reviewer** (Sonnet) instructions: - -> You are the Reviewer performing a follow-up review. -> -> **PR URL:** PR_URL -> **Branch:** BRANCH_NAME -> **Reviewer baseline commit:** REVIEWER_BASELINE -> -> This is not a full re-review. Focus only on: -> -> 1. **Previous comments** — read the PR comments (`gh pr view PR_URL --comments`) and verify -> every previously-raised issue is either fixed or has an accepted rebuttal. If any remain -> unaddressed without a rebuttal, include them in your output. -> -> 2. **Changed files** — review only files that changed since the baseline commit: -> ``` -> git checkout BRANCH_NAME -> git diff REVIEWER_BASELINE..HEAD -> ``` -> Raise new issues only if they are **significant**: correctness bugs, security problems, -> accessibility regressions, or clear spec non-compliance. Do not raise style, naming, -> or minor cleanup issues. -> -> For each issue, post a comment directly to the PR: -> ``` -> gh pr review PR_URL --comment -b "path/to/File.cs:LINE — your comment" -> ``` -> -> Return a JSON array (same schema as before). Return only the JSON — no other text. -> An empty array means all previous comments are resolved and no new significant issues exist. - -**Routing after both complete:** -- If Tester has failures → re-spawn Developer with failure list (same-failure-3x guard applies) -- If scoped Reviewer has issues → re-spawn Developer with PR URL to address them -- If both have issues → re-spawn Developer once with both sets -- If both are clean → go to Completion - -Loop, updating `REVIEWER_BASELINE` each time before the parallel spawn. - ---- - -## Completion - -When the Reviewer returns an empty array, tell the user: - -> Implementation complete. -> PR: PR_URL -> All tests pass and all review comments have been addressed or rebutted on the PR. diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 667b8c07..8b0a9b22 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -10,6 +10,7 @@ """ import argparse +import concurrent.futures import datetime import json import re @@ -468,32 +469,122 @@ def run(self, ctx: PipelineContext) -> str: return "changes_requested" -class SignOffStep(Step): - handles = "reviewing-signoff" +class SignoffStep(Step): + handles = "signoff" def run(self, ctx: PipelineContext) -> str: - print(f"Reviewer is signing off on {ctx.work_item_id}...", flush=True) - try: - output = call_agent( - "reviewer", "reviewer-sign-off", - substitutions={ - "$WORK_ITEM_ID": ctx.work_item_id, - "$TASK_BRIEF": ctx.brief, - "$PR_URL": ctx.pr_url, - }, - ) - except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: - print(f"Error invoking reviewer agent:\n{e}", file=sys.stderr) - sys.exit(1) + print(f"Running parallel signoff for {ctx.work_item_id}...", flush=True) - ctx.review_notes = output - result = parse_json_output(output) - status = result.get("status", "changes_requested") - if status == "approved": - print("Sign-off approved.", flush=True) - return "approved" - print("Sign-off requested further changes.", flush=True) - return "changes_requested" + # Push first so the reviewer-sign-off agent can see the latest commits on the PR. + _commit_and_push(ctx.work_item_id) + + failures: list[str] = [] + + def _run_scripts() -> tuple[bool, str]: + try: + build_ok, build_log, build_tail = run_validate_script("validate-build") + except (FileNotFoundError, OSError) as e: + return False, f"validate-build error: {e}" + if not build_ok: + return False, ( + f"Build failed.\n\n" + f"Full log: {build_log}\n\n" + f"Last 30 lines:\n```\n{build_tail}\n```" + ) + try: + tests_ok, tests_log, tests_tail = run_validate_script("validate-tests") + except (FileNotFoundError, OSError) as e: + return False, f"validate-tests error: {e}" + if not tests_ok: + return False, ( + f"Test failures.\n\n" + f"Full log: {tests_log}\n\n" + f"Last 30 lines:\n```\n{tests_tail}\n```" + ) + return True, "" + + def _run_reviewer_signoff() -> tuple[bool, str, str]: + try: + output = call_agent( + "reviewer", "reviewer-sign-off", + substitutions={ + "$WORK_ITEM_ID": ctx.work_item_id, + "$TASK_BRIEF": ctx.brief, + "$PR_URL": ctx.pr_url, + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + return False, "", f"reviewer-sign-off error: {e}" + result = parse_json_output(output) + pr_url = result.get("pr_url", "") + approved = result.get("status", "changes_requested") == "approved" + return approved, pr_url, output if not approved else "" + + def _run_researcher_validate() -> tuple[bool, str]: + try: + output = call_agent( + "researcher", "researcher-validate", + ctx.work_item_id, ctx.spec_path, + substitutions={ + "$TASK_BRIEF": ctx.brief, + "$WORK_SUMMARIES": _format_work_summaries(ctx.work_summaries), + }, + ) + except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: + return False, f"researcher-validate error: {e}" + + # Parse the JSON array returned by researcher-validate + try: + for block in reversed(re.findall(r"```(?:json)?\s*\n([\s\S]*?)\n```", output)): + try: + criteria = json.loads(block.strip()) + if isinstance(criteria, list): + failing = [ + c for c in criteria + if c.get("status") in ("fail", "partial") + ] + if failing: + details = "\n".join( + f"- [{c['status'].upper()}] {c['criterion']}: " + f"{c.get('finding', '')}" + for c in failing + ) + return False, f"Exit criteria not fully met:\n{details}" + return True, "" + except (json.JSONDecodeError, KeyError): + continue + except Exception: + pass + return True, "" # No parseable JSON — treat as inconclusive pass + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + fut_scripts = executor.submit(_run_scripts) + fut_reviewer = executor.submit(_run_reviewer_signoff) + fut_researcher = executor.submit(_run_researcher_validate) + + scripts_ok, scripts_failure = fut_scripts.result() + reviewer_ok, reviewer_pr_url, reviewer_failure = fut_reviewer.result() + researcher_ok, researcher_failure = fut_researcher.result() + + if reviewer_pr_url: + ctx.pr_url = reviewer_pr_url + + if not scripts_ok: + failures.append(scripts_failure) + if not reviewer_ok: + failures.append(f"Reviewer sign-off:\n{reviewer_failure}") + if not researcher_ok: + failures.append(researcher_failure) + + if failures: + ctx.review_notes = "\n\n---\n\n".join(failures) + ctx.last_failure = ctx.review_notes + print("Signoff found issues; requesting further changes.", flush=True) + return "changes_requested" + + ctx.last_failure = "" + print("Signoff approved.", flush=True) + return "approved" class FixStep(Step): @@ -579,15 +670,13 @@ def __init__(self, ctx: PipelineContext, context_path: Path, workflow: WorkflowD self.context_path = context_path self.workflow = workflow self.machine = StateMachine(workflow.transitions, initial=ctx.state) - validate_step = ValidateStep() self.step_handlers: dict[str, Step] = { "researching": ResearchStep(), "implementing": ImplementStep(), - "validating": validate_step, - "validating-pr": validate_step, + "validating": ValidateStep(), "fixing": FixStep(), "reviewing": ReviewStep(context_path), - "reviewing-signoff": SignOffStep(), + "signoff": SignoffStep(), "fixing-pr": FixPrStep(), } diff --git a/.claude/scripts/implementation-pipeline.md b/.claude/scripts/implementation-pipeline.md index 03c1be19..5293093c 100644 --- a/.claude/scripts/implementation-pipeline.md +++ b/.claude/scripts/implementation-pipeline.md @@ -9,15 +9,24 @@ stateDiagram-v2 validating --> reviewing : clean reviewing --> done : approved reviewing --> fixing-pr : changes_requested - fixing-pr --> validating-pr : fix_done - validating-pr --> fixing-pr : build_failed - validating-pr --> fixing-pr : tests_failed - validating-pr --> reviewing-signoff : clean - reviewing-signoff --> done : approved - reviewing-signoff --> fixing-pr : changes_requested + fixing-pr --> signoff : fix_done + signoff --> done : approved + signoff --> fixing-pr : changes_requested fixing --> validating : fix_done fixing --> failed : max_retries fixing-pr --> failed : max_retries done --> [*] failed --> [*] ``` + +The `signoff` state runs three tasks in parallel before making its decision: + +1. **`reviewer-sign-off`** — checks that all PR review threads have been resolved and scans + modified files for new code quality issues (Priority 1–4). Resolves satisfied threads; + leaves unresolved threads where the developer disagreed and the reviewer is pushing back. +2. **`researcher-validate`** — checks each exit criterion from the spec against the + actual code and tests. Any `fail` or `partial` result counts as a failure. +3. **Script validation** — runs `validate-build` then (if clean) `validate-tests`. + +All three must pass for `signoff` to emit `approved`. Any failure from any task emits +`changes_requested` and routes back to `fixing-pr`, with accumulated failure details. diff --git a/src/AdaptiveRemote.App/Components/_doc_UI.md b/src/AdaptiveRemote.App/Components/_doc_UI.md index c06eee34..0b1fa4a7 100644 --- a/src/AdaptiveRemote.App/Components/_doc_UI.md +++ b/src/AdaptiveRemote.App/Components/_doc_UI.md @@ -1,5 +1,7 @@ # UI Subsystem Architecture & Design +Summary: Describes Blazor-based UI rendering, eye-gaze accessibility (large buttons, high-contrast), MVVM binding, and component organization. + ## Overview The UI subsystem is responsible for rendering the application interface, handling user input (touch, mouse, eye-gaze), and displaying application state and progress. It is built with WPF (for hosting) and Blazor (for the main UI), with static assets managed in `wwwroot`. diff --git a/src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md b/src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md index 2ef0ca63..624b6306 100644 --- a/src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md +++ b/src/AdaptiveRemote.App/Mvvm/_doc_Mvvm.md @@ -1,5 +1,7 @@ # Mvvm Subsystem Architecture & Design +Summary: Describes MvvmProperty, a strongly-typed property change notification system that serves as the MVVM foundation without WPF dependencies. + ## Overview The Mvvm subsystem provides foundational support for MVVM architecture in the UI. It enables strongly-typed property change notification for model classes, similar to WPF's `DependencyProperty`, but without WPF dependencies. diff --git a/src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md b/src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md index 44b6acaf..04e49c20 100644 --- a/src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md +++ b/src/AdaptiveRemote.App/Services/Broadlink/_doc_Broadlink.md @@ -1,5 +1,7 @@ # Broadlink Subsystem Architecture & Design +Summary: Describes the RM4 Mini driver for device discovery, authentication, and IR command delivery. + ## Overview The Broadlink subsystem provides a driver for the Broadlink RM4 mini device, handling device discovery, authentication, and communication. It is based on the Python implementation at [mjg59/python-broadlink](https://github.com/mjg59/python-broadlink), but refactored for maintainability and debugging in C#. diff --git a/src/AdaptiveRemote.App/Services/Broadlink/_doc_BroadlinkProtocol.md b/src/AdaptiveRemote.App/Services/Broadlink/_doc_BroadlinkProtocol.md index ce6d8951..a8d84b87 100644 --- a/src/AdaptiveRemote.App/Services/Broadlink/_doc_BroadlinkProtocol.md +++ b/src/AdaptiveRemote.App/Services/Broadlink/_doc_BroadlinkProtocol.md @@ -1,5 +1,7 @@ # Broadlink RM4 Mini IR Learning Protocol +Summary: Documents the IR learning sequence for the RM4 Mini — enter learning mode, poll for the captured IR code, then decode and store it. + This document describes the IR learning protocol for the Broadlink RM4 Mini, as implemented in AdaptiveRemote. It is distilled from the [mjg59/python-broadlink](https://github.com/mjg59/python-broadlink) Python project and references the C# implementation in `AdaptiveRemote.Services.Broadlink.DeviceConnection`. ## Overview diff --git a/src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md b/src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md index 66cf2c3a..1637552d 100644 --- a/src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md +++ b/src/AdaptiveRemote.App/Services/Commands/_doc_Commands.md @@ -1,5 +1,7 @@ # Command Architecture & Design +Summary: Describes the Command data object that decouples UI rendering, speech recognition, and command execution across subsystems. + ## Overview The `Command` abstraction is a central data object representing a remote control command. It is designed to decouple the definition, display, execution, and invocation of commands across multiple subsystems. The base class is defined in [`Models/Command.cs`](../../Models/Command.cs). diff --git a/src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md b/src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md index 4bc8acac..73dbc1c5 100644 --- a/src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md +++ b/src/AdaptiveRemote.App/Services/Conversation/_doc_Conversation.md @@ -1,5 +1,7 @@ # Conversation Subsystem Architecture & Design +Summary: Describes speech recognition, synthesis, conversation state management, and command invocation for the speech interface. + ## Overview The Conversation subsystem manages all speech-related functionality: listening to user instructions, mapping them to commands, invoking those commands, and providing spoken feedback. It is not responsible for executing commands directly, but invokes handlers provided by other subsystems. UI updates are handled via a ViewModel, not directly by the subsystem. diff --git a/src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md b/src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md index 16982538..dc566801 100644 --- a/src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md +++ b/src/AdaptiveRemote.App/Services/Lifecycle/_doc_Lifecycle.md @@ -1,5 +1,7 @@ # Lifecycle Subsystem Architecture & Design +Summary: Describes DI scope management, lifecycle hooks, and the Blazor scope-sharing model used for service initialization and teardown. + ## Overview The Lifecycle subsystem orchestrates application startup, shutdown, and scoped updates. Its main role is to manage DI scopes for services that need to be re-initialized when configuration or data changes. It is not responsible for diff --git a/src/AdaptiveRemote.App/Services/ModalMessages/_doc_ModalMessages.md b/src/AdaptiveRemote.App/Services/ModalMessages/_doc_ModalMessages.md index 3e6e849e..5d1c8d32 100644 --- a/src/AdaptiveRemote.App/Services/ModalMessages/_doc_ModalMessages.md +++ b/src/AdaptiveRemote.App/Services/ModalMessages/_doc_ModalMessages.md @@ -1,5 +1,7 @@ # Modal Message Service — Design & Architecture +Summary: Describes the modal overlay message service — FIFO queuing, keep-alive support, and ViewModel integration for conversation and programming flows. + ## Overview The modal message service provides a single, centered overlay message visible on top of the remote control UI. It is used by both the conversation (speech synthesis) subsystem and the programming diff --git a/src/AdaptiveRemote.App/Services/ProgrammaticSettings/_doc_ProgrammaticSettings.md b/src/AdaptiveRemote.App/Services/ProgrammaticSettings/_doc_ProgrammaticSettings.md index b52725b3..8901faae 100644 --- a/src/AdaptiveRemote.App/Services/ProgrammaticSettings/_doc_ProgrammaticSettings.md +++ b/src/AdaptiveRemote.App/Services/ProgrammaticSettings/_doc_ProgrammaticSettings.md @@ -1,5 +1,7 @@ # ProgrammaticSettings File Format and Usage +Summary: Describes the INI file format and service used to persist programmed IR command payloads (and other settings) across sessions. + This document describes the file format and usage for storing programmed IR commands in AdaptiveRemote, as managed by the `ProgrammaticSettings` service. The service is generally available for any component to add data that must be persisted across sessions. IR commands are only the first use case. diff --git a/src/AdaptiveRemote.App/Services/_doc_Services.md b/src/AdaptiveRemote.App/Services/_doc_Services.md index e4eaff27..304b3dc7 100644 --- a/src/AdaptiveRemote.App/Services/_doc_Services.md +++ b/src/AdaptiveRemote.App/Services/_doc_Services.md @@ -1,5 +1,7 @@ # Services Folder Architecture & Design +Summary: Describes the Services folder structure — public interfaces at the top level, subsystem implementations in subfolders, and DI/naming conventions. + ## Overview The top-level `Services` folder organizes all public API interfaces for services, along with helpers and subsystem implementations. This structure enables clear separation between subsystem consumers and implementation details. diff --git a/src/AdaptiveRemote.Backend.CompiledLayoutService/_doc_Auth.md b/src/AdaptiveRemote.Backend.CompiledLayoutService/_doc_Auth.md index ca4d63f2..c02c9b5d 100644 --- a/src/AdaptiveRemote.Backend.CompiledLayoutService/_doc_Auth.md +++ b/src/AdaptiveRemote.Backend.CompiledLayoutService/_doc_Auth.md @@ -1,5 +1,7 @@ # Authentication — CompiledLayoutService (ADR-168) +Summary: Describes JWT Bearer authentication via AWS Cognito for external API endpoints in the CompiledLayoutService. + ## Overview External API endpoints are protected by JWT Bearer authentication backed by **AWS Cognito**. diff --git a/src/AdaptiveRemote.Headless/_doc_HeadlessHost.md b/src/AdaptiveRemote.Headless/_doc_HeadlessHost.md index cf2066b2..073381a1 100644 --- a/src/AdaptiveRemote.Headless/_doc_HeadlessHost.md +++ b/src/AdaptiveRemote.Headless/_doc_HeadlessHost.md @@ -1,6 +1,8 @@ # AdaptiveRemote.Headless Design Document +Summary: Describes the headless console host that runs Blazor under Playwright for cross-platform automated E2E testing. + ## Purpose AdaptiveRemote.Headless is a cross-platform .NET console host for the AdaptiveRemote.App Blazor application, designed to enable automated end-to-end (E2E) UI testing in headless environments (e.g., GitHub Copilot Agents, CI/CD pipelines, and Linux containers). It allows tests to validate AdaptiveRemote functionality without requiring a graphical desktop or Windows UI stack. diff --git a/src/_doc_BackendDevelopment.md b/src/_doc_BackendDevelopment.md index d76d2785..7d07c0ab 100644 --- a/src/_doc_BackendDevelopment.md +++ b/src/_doc_BackendDevelopment.md @@ -1,5 +1,7 @@ # Backend Development Guide +Summary: Defines the local dev pattern, service ports, and verification steps for backend API and Lambda services. + This document defines the standing development pattern for backend services introduced by Task 5 ([ADR-187](https://jodasoft.atlassian.net/browse/ADR-187)). diff --git a/src/_doc_Projects.md b/src/_doc_Projects.md index 77673b8f..541923c7 100644 --- a/src/_doc_Projects.md +++ b/src/_doc_Projects.md @@ -1,6 +1,8 @@ # Projects Overview +Summary: Maps each project in the repo to its purpose, boundaries, and cross-platform constraints. + This document describes the high-level organization of the AdaptiveRemote repository. It is intended for new developers and Copilot agents to understand where code should be implemented, the responsibilities and boundaries of each project, and conventions for cross-platform support. ## Project Structure and Responsibilities diff --git a/test/AdaptiveRemote.EndtoEndTests.TestServices/Logging/_doc_TestLogging.md b/test/AdaptiveRemote.EndtoEndTests.TestServices/Logging/_doc_TestLogging.md index 5f2a905e..fad6e430 100644 --- a/test/AdaptiveRemote.EndtoEndTests.TestServices/Logging/_doc_TestLogging.md +++ b/test/AdaptiveRemote.EndtoEndTests.TestServices/Logging/_doc_TestLogging.md @@ -1,5 +1,7 @@ # Test Logging Design +Summary: Describes the logging channel for E2E test code — routes structured test logs through the app's ILogger pipeline and MSTest TestContext. + Goal - Provide a logging channel for end-to-end test code so tests can write structured logs that are recorded by the application's `ILogger` pipeline and are also visible inside the test runner (`MSTest.TestContext`). @@ -28,7 +30,7 @@ Where files live - `test/AdaptiveRemote.EndtoEndTests.TestServices/Logging/TestContextLoggerProvider.cs` (local sink that writes to `TestContext`) API shape (single source of truth) -- [`ITestLogger`](../../../src/AdaptiveRemote.App/Services/Testing/ITestLogger.cs) (marshalable over JSON-RPC) — sinks implement this API and the intermediary dispatches to the registered sinks. +- [`ITestLogger`](../../../src/AdaptiveRemote.App/Services/Testing/ITestLogger.cs) (marshalable over JSON-RPC) � sinks implement this API and the intermediary dispatches to the registered sinks. ```csharp [RpcMarshalable] @@ -79,5 +81,5 @@ Backward-compatibility and incremental rollout Open questions resolved - Dynamic sink registration: yes (attach on connect / unregister on disconnect). -- Use .NET logging primitives: yes — implement an `ILoggerProvider` in the test process to avoid duplicating .NET logging semantics. +- Use .NET logging primitives: yes � implement an `ILoggerProvider` in the test process to avoid duplicating .NET logging semantics. - Dispatch semantics: provider currently waits for remote sinks to acknowledge operations to preserve ordering. diff --git a/test/AdaptiveRemote.EndtoEndTests.TestServices/_doc_SimulatedDevices.md b/test/AdaptiveRemote.EndtoEndTests.TestServices/_doc_SimulatedDevices.md index 1b52f6fa..f078fff5 100644 --- a/test/AdaptiveRemote.EndtoEndTests.TestServices/_doc_SimulatedDevices.md +++ b/test/AdaptiveRemote.EndtoEndTests.TestServices/_doc_SimulatedDevices.md @@ -1,5 +1,7 @@ # Simulated Devices for E2E Testing +Summary: Describes in-process device simulators used in E2E tests — implementing real wire protocols without physical hardware. + ## Overview The AdaptiveRemote test suite includes in-process simulated devices that enable end-to-end testing without requiring physical hardware. These simulators implement the actual wire protocols used by real devices, allowing comprehensive testing of device discovery, authentication, command transmission, and error handling. diff --git a/test/_doc_EndToEndTests.md b/test/_doc_EndToEndTests.md index 61affeca..837dcbaa 100644 --- a/test/_doc_EndToEndTests.md +++ b/test/_doc_EndToEndTests.md @@ -1,5 +1,7 @@ # End-to-End Test Architecture +Summary: Describes the E2E test architecture — how host processes are started, controlled via StreamJsonRpc, and validated using Playwright/Reqnroll. + ## Overview The E2E testing subsystem validates that AdaptiveRemote host applications (WPF, Console, and Headless) can start correctly, establish test control connections, execute commands, and shut down cleanly. These tests run as separate processes to validate the complete deployment artifacts. From 1d6391bc87eb6808db6472b25ce36aeaf621ed57 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 18:36:54 -0700 Subject: [PATCH 08/13] Plan for fixing a GitHub issue --- .claude/commands/dev-team.md | 59 ++++++++++++++++------ .claude/commands/researcher-issue.md | 42 +++++++++++++++ .claude/commands/researcher-validate.md | 13 +++-- .claude/scripts/dev_team.py | 39 +++++++++----- .claude/scripts/fix-issue-pipeline.md | 32 ++++++++++++ .claude/scripts/implementation-pipeline.md | 3 +- 6 files changed, 156 insertions(+), 32 deletions(-) create mode 100644 .claude/commands/researcher-issue.md create mode 100644 .claude/scripts/fix-issue-pipeline.md diff --git a/.claude/commands/dev-team.md b/.claude/commands/dev-team.md index 193ef586..dba2861f 100644 --- a/.claude/commands/dev-team.md +++ b/.claude/commands/dev-team.md @@ -1,17 +1,18 @@ --- -description: Entry point for the dev-team agent pipeline. Runs the Researcher phase for a Jira work item. -argument-hint: +description: Entry point for the dev-team agent pipeline. Routes to the correct pipeline based on the work item request, then starts the pipeline script and relays its output. +argument-hint: --- -## Work item +## Request $ARGUMENTS ## Role -Your only job is to start the pipeline script and relay its output to the user. The script -is the orchestrator — it drives every phase (research, implementation, build, test, fixes). -You are a passive observer. +You determine which pipeline to run and which research skill to use, then start the +pipeline script and relay its output to the user. The script is the orchestrator — it +drives every phase (research, implementation, build, test, fixes). Once you have started +the script you are a passive observer. **Never attempt to:** - Fix build errors or test failures @@ -24,23 +25,51 @@ attempt recovery. ## Steps -1. Check the platform: +### 1 — Determine pipeline and work item ID + +Analyze the request using your judgment: + +- If the request refers to a **Jira task** — e.g. "implement ADR-123", "ADR-123", or + any `[A-Z]+-\d+` pattern — use: + - Pipeline: `implementation-pipeline` + - Research skill: `researcher-plan` + - Work item ID: the Jira key as-is (e.g. `ADR-123`) + +- If the request refers to a **GitHub issue** — e.g. "fix issue #444", "#444", or + any `#\d+` pattern — use: + - Pipeline: `fix-issue-pipeline` + - Research skill: `researcher-issue` + - Work item ID: `GH-` (strip the `#`, e.g. `#444` → `GH-444`) + +- If the intent is unclear, tell the user: + + > I'm not sure which pipeline to use for this request. Provide a Jira task key + > (e.g. ADR-123) to use the implementation pipeline, or a GitHub issue number + > (e.g. #444) to use the fix-issue pipeline. + + Then stop. + +### 2 — Check the platform ```bash python -c "import sys; print(sys.platform)" ``` -2. Start the pipeline script in the background: +### 3 — Start the pipeline script in the background ```bash -python -u .claude/scripts/dev_team.py $ARGUMENTS --workflow .claude/scripts/implementation-pipeline.md +python -u .claude/scripts/dev_team.py --workflow .claude/scripts/.md --research-skill ``` -3. **Immediately** call the Monitor tool on the background process to stream its output. - Do not wait. Do not use TaskOutput. Use the platform-appropriate tail command: - - **`win32`**: `powershell -Command "Get-Content -Wait -Path ''"` - - **anything else**: `tail -f ` +### 4 — Stream output + +**Immediately** call the Monitor tool on the background process to stream its output. +Do not wait. Do not use TaskOutput. Use the platform-appropriate tail command: +- **`win32`**: `powershell -Command "Get-Content -Wait -Path ''"` +- **anything else**: `tail -f ` + +Stream all output to the user as it arrives until the process exits. - Stream all output to the user as it arrives until the process exits. +### 5 — Report exit status -4. When the process exits, report its exit status to the user. Take no further action. +When the process exits, report its exit status to the user. Take no further action. diff --git a/.claude/commands/researcher-issue.md b/.claude/commands/researcher-issue.md new file mode 100644 index 00000000..806a1c0c --- /dev/null +++ b/.claude/commands/researcher-issue.md @@ -0,0 +1,42 @@ +--- +description: Produce a concise task brief for a GitHub issue, synthesized from the issue body and comments, the relevant architecture docs, and source code. Proposes exit criteria since none are written in the issue. +argument-hint: +--- + +## Inputs + +Work item ID: the first token of `$ARGUMENTS` (e.g. `GH-444`) + +If missing, stop and tell the caller: + +> Usage: `/researcher-issue ` + +--- + +## Steps + +### 1 — Fetch the GitHub issue + +Derive the issue number by stripping the `GH-` prefix from the work item ID +(e.g. `GH-444` → `444`). Fetch the issue title, body, and all comments: + +```bash +gh issue view --comments +``` + +If the issue is not found, stop and report the error. + +### 2–5 — Exploration and brief + +Read `.claude/commands/researcher-plan.md` and follow **steps 2 through 5** exactly, with +two differences: + +- **Source of requirements (step 1 replacement):** Use the issue title, body, and comments + you fetched above instead of a spec file section. Let the issue content guide which + subsystems and areas are relevant. + +- **Exit criteria (step 5 difference):** The issue contains no formal exit criteria. Instead + of copying them verbatim, **propose** a concrete, checkable list synthesized from the issue + description and the context you gathered. Frame each criterion the same way spec exit + criteria are written (observable behaviour, not implementation detail). Label the section + **"Exit criteria (proposed)"** so the caller knows these are inferred, not authoritative. diff --git a/.claude/commands/researcher-validate.md b/.claude/commands/researcher-validate.md index 7cc68b45..78d7b074 100644 --- a/.claude/commands/researcher-validate.md +++ b/.claude/commands/researcher-validate.md @@ -1,12 +1,12 @@ --- description: Validate completed work against a task's exit criteria, returning a structured pass/fail result for each criterion -argument-hint: +argument-hint: [path to spec file] --- ## Inputs Task key: the first token of `$ARGUMENTS` -Spec file: the second token of `$ARGUMENTS` +Spec file: the second token of `$ARGUMENTS` (may be empty for GitHub issue tasks) **Original task brief:** @@ -20,18 +20,23 @@ $WORK_SUMMARIES --- -All of these are required. If any are missing, stop and tell the caller what is needed. +Task key and task brief are required. If either is missing, stop and tell the caller what is needed. --- ## Steps -### 1 — Re-read the authoritative exit criteria +### 1 — Identify the authoritative exit criteria +**If a spec file path is provided** (second token of `$ARGUMENTS` is non-empty): Read the spec file and locate the section for the task key. Extract the exit criteria checklist as written in the spec — this is the authoritative source, not the task brief. If the spec has been updated since the brief was written, use the spec version. +**If no spec file path is provided** (only one token in `$ARGUMENTS`): +Extract the exit criteria from the **Original task brief** above. These were proposed by +the researcher at planning time and are the authoritative source for this run. + ### 2 — Evaluate each criterion For each exit criterion, determine its status by reading the evidence the caller provided: diff --git a/.claude/scripts/dev_team.py b/.claude/scripts/dev_team.py index 8b0a9b22..4d457fd7 100644 --- a/.claude/scripts/dev_team.py +++ b/.claude/scripts/dev_team.py @@ -136,7 +136,7 @@ class PipelineContext: """All mutable state for a dev-team pipeline run, persisted across resumptions.""" work_item_id: str - spec_path: str + spec_path: str = "" state: str = "init" brief: str = "" work_summaries: list[str] = field(default_factory=list) @@ -326,9 +326,26 @@ def run(self, ctx: PipelineContext) -> str: ... +class FindSpecStep(Step): + handles = "spec-finding" + + def run(self, ctx: PipelineContext) -> str: + if ctx.spec_path: + print("Spec path already set — skipping.", flush=True) + return "spec_found" + print(f"Searching for spec for {ctx.work_item_id}...", flush=True) + spec_file = find_spec_file(ctx.work_item_id) + ctx.spec_path = str(spec_file.relative_to(REPO_ROOT)) + print(f"Found {spec_file}", flush=True) + return "spec_found" + + class ResearchStep(Step): handles = "researching" + def __init__(self, skill: str) -> None: + self._skill = skill + def run(self, ctx: PipelineContext) -> str: if ctx.brief: print("Research already complete in context — skipping.", flush=True) @@ -336,7 +353,7 @@ def run(self, ctx: PipelineContext) -> str: print(f"Researcher is planning work for {ctx.work_item_id}...", flush=True) try: ctx.brief = call_agent( - "researcher", "researcher-plan", + "researcher", self._skill, ctx.work_item_id, ctx.spec_path, ) except (FileNotFoundError, RuntimeError, subprocess.CalledProcessError) as e: @@ -665,13 +682,14 @@ def run(self, ctx: PipelineContext) -> str: class DevTeamPipeline: """Drives the dev-team state machine from init (or a resumed state) to done.""" - def __init__(self, ctx: PipelineContext, context_path: Path, workflow: WorkflowDefinition) -> None: + def __init__(self, ctx: PipelineContext, context_path: Path, workflow: WorkflowDefinition, research_skill: str) -> None: self.ctx = ctx self.context_path = context_path self.workflow = workflow self.machine = StateMachine(workflow.transitions, initial=ctx.state) self.step_handlers: dict[str, Step] = { - "researching": ResearchStep(), + "spec-finding": FindSpecStep(), + "researching": ResearchStep(research_skill), "implementing": ImplementStep(), "validating": ValidateStep(), "fixing": FixStep(), @@ -1029,9 +1047,11 @@ def main() -> None: description="dev-team pipeline orchestrator", ) parser.add_argument("work_item_id", metavar="work-item-id", - help="Jira work item ID (e.g. ADR-172)") + help="Work item ID (e.g. ADR-172 or GH-444)") parser.add_argument("--workflow", required=True, metavar="path", help="Path to a Mermaid stateDiagram-v2 workflow file") + parser.add_argument("--research-skill", required=True, metavar="skill", + help="Researcher skill to use (e.g. researcher-plan or researcher-issue)") args = parser.parse_args() work_item_id = args.work_item_id @@ -1057,15 +1077,10 @@ def main() -> None: sys.exit(0 if ctx.state == "done" else 1) print(f"Resuming {work_item_id} from state '{ctx.state}'...", flush=True) else: - print(f"Searching for spec for {work_item_id}", flush=True) - spec_file = find_spec_file(work_item_id) - spec_path = str(spec_file.relative_to(REPO_ROOT)) - print(f"Found {spec_file}", flush=True) - ctx = PipelineContext(work_item_id=work_item_id, spec_path=spec_path, - state=workflow.initial_state) + ctx = PipelineContext(work_item_id=work_item_id, state=workflow.initial_state) ctx.save(context_path) - DevTeamPipeline(ctx, context_path, workflow).run() + DevTeamPipeline(ctx, context_path, workflow, research_skill=args.research_skill).run() if __name__ == "__main__": diff --git a/.claude/scripts/fix-issue-pipeline.md b/.claude/scripts/fix-issue-pipeline.md new file mode 100644 index 00000000..71a21ec3 --- /dev/null +++ b/.claude/scripts/fix-issue-pipeline.md @@ -0,0 +1,32 @@ +```mermaid +stateDiagram-v2 + [*] --> init + init --> researching : start + researching --> implementing : research_done + implementing --> validating : impl_done + validating --> fixing : build_failed + validating --> fixing : tests_failed + validating --> reviewing : clean + reviewing --> done : approved + reviewing --> fixing-pr : changes_requested + fixing-pr --> signoff : fix_done + signoff --> done : approved + signoff --> fixing-pr : changes_requested + fixing --> validating : fix_done + fixing --> failed : max_retries + fixing-pr --> failed : max_retries + done --> [*] + failed --> [*] +``` + +The `signoff` state runs three tasks in parallel before making its decision: + +1. **`reviewer-sign-off`** — checks that all PR review threads have been resolved and scans + modified files for new code quality issues (Priority 1–4). Resolves satisfied threads; + leaves unresolved threads where the developer disagreed and the reviewer is pushing back. +2. **`researcher-validate`** — checks each exit criterion proposed by the researcher against + the actual code and tests. Any `fail` or `partial` result counts as a failure. +3. **Script validation** — runs `validate-build` then (if clean) `validate-tests`. + +All three must pass for `signoff` to emit `approved`. Any failure from any task emits +`changes_requested` and routes back to `fixing-pr`, with accumulated failure details. diff --git a/.claude/scripts/implementation-pipeline.md b/.claude/scripts/implementation-pipeline.md index 5293093c..f3432f0a 100644 --- a/.claude/scripts/implementation-pipeline.md +++ b/.claude/scripts/implementation-pipeline.md @@ -1,7 +1,8 @@ ```mermaid stateDiagram-v2 [*] --> init - init --> researching : start + init --> spec-finding : start + spec-finding --> researching : spec_found researching --> implementing : research_done implementing --> validating : impl_done validating --> fixing : build_failed From a2d7b0c31cb3ca02b547f4412d4a3630438a31f7 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 19:09:15 -0700 Subject: [PATCH 09/13] GH-120: Cancel in-progress learning when a new button is clicked Replace the WaitAsync(0) early-return guard with a cancel-and-wait pattern. A per-operation CancellationTokenSource (_currentLearnCts) is exchanged via Interlocked.Exchange before waiting on the semaphore, so a new button click cancels the previous cycle and the device re-enters learning mode for the new button. Duplicate clicks on the same button also cancel-and-restart. Key decisions: - learnCts.Token is linked into the inner linkedCts so preemption propagates through EnterLearningModeAsync and the polling loop. - Timeout-catch condition widens to exclude learnCts cancellation, preventing preemption from surfacing as a TimeoutException. - Interlocked.CompareExchange in finally ensures the shared field is only cleared by the holder that set it. - EventId 1009 (LearningAlreadyInProgress) removed; EventId 1010 (CancellingLearningForNewCommand) added. Co-Authored-By: Claude Sonnet 4.6 --- .../Logging/MessageLogger.cs | 4 +- .../Broadlink/BroadlinkCommandService.cs | 38 +++-- .../Broadlink/BroadlinkCommandServiceTests.cs | 140 +++++++++++++----- .../Features/Shared/ProgrammingMode.feature | 34 +++++ 4 files changed, 166 insertions(+), 50 deletions(-) diff --git a/src/AdaptiveRemote.App/Logging/MessageLogger.cs b/src/AdaptiveRemote.App/Logging/MessageLogger.cs index 9e3b3224..40c58c75 100644 --- a/src/AdaptiveRemote.App/Logging/MessageLogger.cs +++ b/src/AdaptiveRemote.App/Logging/MessageLogger.cs @@ -174,8 +174,8 @@ public MessageLogger(ILogger logger) [LoggerMessage(EventId = 1008, Level = LogLevel.Warning, Message = "IR learning timed out for {Command}")] public partial void BroadlinkCommandService_LearningTimedOut(Models.Command command); - [LoggerMessage(EventId = 1009, Level = LogLevel.Warning, Message = "IR learning already in progress; ignoring request to program {Command}")] - public partial void BroadlinkCommandService_LearningAlreadyInProgress(Models.Command command); + [LoggerMessage(EventId = 1010, Level = LogLevel.Information, Message = "Cancelling in-progress learning to start learning {Command}")] + public partial void BroadlinkCommandService_CancellingLearningForNewCommand(Models.Command command); [LoggerMessage(EventId = 1101, Level = LogLevel.Information, Message = "Loading existing settings from {FilePath}")] public partial void ProgrammaticSettings_LoadingExistingSettings(string filePath); diff --git a/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs b/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs index bcc08d44..4a278dff 100644 --- a/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs +++ b/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs @@ -14,12 +14,18 @@ internal sealed class BroadlinkCommandService : CommandServiceBase private readonly IModalMessageService _modalMessageService; /// - /// Ensures that only one IR learning cycle runs at a time. The device - /// cannot handle concurrent learning requests; a second attempt while one - /// is in progress is silently ignored. + /// Ensures that only one IR learning cycle runs at a time. A new request + /// cancels any in-progress cycle before acquiring this semaphore. /// private readonly SemaphoreSlim _learningSemaphore = new(1, 1); + /// + /// CTS owned by the current semaphore holder. A new request exchanges this + /// field (via ) to cancel the previous + /// holder before waiting on the semaphore. + /// + private CancellationTokenSource? _currentLearnCts; + private IDeviceConnection? _connection; public BroadlinkCommandService( @@ -82,30 +88,33 @@ protected override Command.ExecuteDelegate CreateHandler(IRCommand command) IDeviceConnection connection = _connection ?? throw new InvalidOperationException(Phrases.Broadlink_NotConnected(command.Name)); - // Prevent a second learning cycle from starting while one is already in progress. - // The device cannot handle concurrent learning; we silently skip the duplicate request. - if (!await _learningSemaphore.WaitAsync(0, cancellationToken)) - { - Logger.BroadlinkCommandService_LearningAlreadyInProgress(command); - return; - } - string message = Phrases.Broadlink_ProgrammingCommand(command.Label); TimeSpan pollInterval = TimeSpan.FromSeconds(_broadlinkSettings.Value.LearnPollInterval); TimeSpan learnTimeout = TimeSpan.FromSeconds(_broadlinkSettings.Value.LearnTimeout); + // Cancel any in-progress learning cycle, then wait for the semaphore it holds. + using CancellationTokenSource learnCts = new(); + CancellationTokenSource? previousCts = Interlocked.Exchange(ref _currentLearnCts, learnCts); + if (previousCts is not null) + { + Logger.BroadlinkCommandService_CancellingLearningForNewCommand(command); + await previousCts.CancelAsync(); + } + + await _learningSemaphore.WaitAsync(cancellationToken); + try { await _modalMessageService.ShowMessageAsync(message, async ct => { using CancellationTokenSource timeoutCts = new(learnTimeout); - using CancellationTokenSource linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token); + using CancellationTokenSource linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct, timeoutCts.Token, learnCts.Token); Logger.BroadlinkCommandService_EnteringLearningMode(command); await connection.EnterLearningModeAsync(linkedCts.Token); // Poll until data is received (early return), the user cancels (ct fires), - // or the overall learning timeout fires (timeoutCts fires). + // a new button is clicked (learnCts fires), or the learning timeout fires. try { while (true) @@ -128,7 +137,7 @@ await _modalMessageService.ShowMessageAsync(message, async ct => await Task.Delay(pollInterval, linkedCts.Token); } } - catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !ct.IsCancellationRequested) + catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !ct.IsCancellationRequested && !learnCts.Token.IsCancellationRequested) { Logger.BroadlinkCommandService_LearningTimedOut(command); throw Errors.Broadlink_LearningTimedOut(learnTimeout); @@ -137,6 +146,7 @@ await _modalMessageService.ShowMessageAsync(message, async ct => } finally { + Interlocked.CompareExchange(ref _currentLearnCts, null, learnCts); _learningSemaphore.Release(); } }; diff --git a/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs b/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs index 08fbb35c..9ef4311b 100644 --- a/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs +++ b/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs @@ -408,11 +408,13 @@ public void BroadlinkCommandService_ProgramAsync_DeviceErrorDuringPolling_Throws } [TestMethod] - public void BroadlinkCommandService_ProgramAsync_WhileLearningAlreadyInProgress_ReturnsImmediately() + public void BroadlinkCommandService_ProgramAsync_WhileOtherLearningInProgress_CancelsPreviousAndStartsNew() { // Arrange const string commandName = "Mute"; const string commandName2 = "Power"; + byte[] learnedData = [0x01, 0x02, 0x03, 0x04]; + string expectedBase64 = Convert.ToBase64String(learnedData); BroadlinkSettings settings = new() { LearnPollInterval = 0 }; MockDefinitionService @@ -430,45 +432,41 @@ public void BroadlinkCommandService_ProgramAsync_WhileLearningAlreadyInProgress_ IRCommand firstCommand = MockDefinitionService.Object.GetCommands().First(c => c.Name == commandName); IRCommand secondCommand = MockDefinitionService.Object.GetCommands().First(c => c.Name == commandName2); - // First learning cycle: ShowMessageAsync blocks (body not called), semaphore stays acquired - TaskCompletionSource firstTaskBlocker = new(); - MockModalMessageService - .Setup(x => x.ShowMessageAsync( - Phrases.Broadlink_ProgrammingCommand(commandName), - It.IsAny>(), - It.IsAny(), - It.IsAny())) - .Returns(delegate (string message, Func action, bool keepAlive, CancellationToken ct) + // EnterLearningModeAsync: first call (Mute) blocks until its CT fires; second call (Power) completes immediately + int enterLearningCallCount = 0; + MockConnection + .Setup(x => x.EnterLearningModeAsync(It.IsAny())) + .Returns(ct => { - // Return blocking task without running body, simulating a long-running learning cycle. - // The semaphore is held for the duration since the body's finally never runs. - return firstTaskBlocker.Task; + if (Interlocked.Increment(ref enterLearningCallCount) == 1) + { + TaskCompletionSource tcs = new(); + ct.Register(() => tcs.TrySetCanceled(ct)); + return tcs.Task; + } + return Task.CompletedTask; }) - .Verifiable(Times.Once); + .Verifiable(Times.Exactly(2)); - // Second command's ShowMessageAsync must never be called — rejected before reaching it - MockModalMessageService - .Setup(x => x.ShowMessageAsync( - Phrases.Broadlink_ProgrammingCommand(commandName2), - It.IsAny>(), - It.IsAny(), - It.IsAny())) - .Verifiable(Times.Never); + // First command's ShowMessageAsync runs body (so learnCts is linked and can be cancelled) + Expect_ModalMessageService_ShowMessage(Phrases.Broadlink_ProgrammingCommand(commandName)); + // Second command's ShowMessageAsync runs body normally + Expect_ModalMessageService_ShowMessage(Phrases.Broadlink_ProgrammingCommand(commandName2)); - // Act – start first learning (semaphore acquired, modal is "showing") + Expect_Connection_CheckLearnedData(learnedData); + Expect_PersistSettings_Set($"IRData:{commandName2}", expectedBase64); + + // Act – start first learning; it blocks at EnterLearningModeAsync Task firstTask = firstCommand.ProgramAsync!(default); - // Start second learning immediately — semaphore is held so it should be rejected + // Start second learning; it cancels the first and waits for the semaphore Task secondTask = secondCommand.ProgramAsync!(default); - // Assert – second task completes immediately without modal or device interaction - secondTask.Should().BeCompleteWithin(TimeSpan.FromMilliseconds(200), - because: "a concurrent ProgramAsync call while learning is in progress should return immediately"); - - // Unblock first task and wait for it to finish - firstTaskBlocker.SetResult(); - firstTask.Should().BeCompleteWithin(TimeSpan.FromSeconds(1), - because: "the first ProgramAsync call should complete once the modal task resolves"); + // Assert – first task is cancelled (preempted), second task completes after learning + firstTask.Should().BeCanceledWithin(TimeSpan.FromSeconds(5), + because: "the first learning cycle should be cancelled when a new button is clicked"); + secondTask.Should().BeCompleteWithin(TimeSpan.FromSeconds(5), + because: "the second command should complete its full learning cycle after preempting the first"); MockLogger.VerifyMessages(messageLogger => { @@ -477,10 +475,84 @@ public void BroadlinkCommandService_ProgramAsync_WhileLearningAlreadyInProgress_ messageLogger.BroadlinkCommandService_Authenticated(IPEndPoint.Parse("10.20.30.40:1234")); messageLogger.BroadlinkCommandService_Ready(); messageLogger.CommandService_Programming(firstCommand); + messageLogger.BroadlinkCommandService_EnteringLearningMode(firstCommand); messageLogger.CommandService_Programming(secondCommand); - messageLogger.BroadlinkCommandService_LearningAlreadyInProgress(secondCommand); + messageLogger.BroadlinkCommandService_CancellingLearningForNewCommand(secondCommand); + messageLogger.CommandService_ProgramCancelled(firstCommand); + messageLogger.BroadlinkCommandService_EnteringLearningMode(secondCommand); + messageLogger.BroadlinkCommandService_PollingForLearnedData(secondCommand); + messageLogger.BroadlinkCommandService_LearnedDataReceived(secondCommand, learnedData.Length); messageLogger.CommandService_Programmed(secondCommand); - messageLogger.CommandService_Programmed(firstCommand); + }); + } + + [TestMethod] + public void BroadlinkCommandService_ProgramAsync_DuplicateClick_CancelsPreviousAndStartsSelf() + { + // Arrange — same command clicked twice while its own learning is in progress + const string commandName = "Mute"; + byte[] learnedData = [0x01, 0x02, 0x03, 0x04]; + string expectedBase64 = Convert.ToBase64String(learnedData); + BroadlinkSettings settings = new() { LearnPollInterval = 0 }; + IRCommand command = SetupAndInitializeWithCommand(commandName, settings: settings); + + // EnterLearningModeAsync: first call blocks until cancelled; second completes immediately + int enterLearningCallCount = 0; + MockConnection + .Setup(x => x.EnterLearningModeAsync(It.IsAny())) + .Returns(ct => + { + if (Interlocked.Increment(ref enterLearningCallCount) == 1) + { + TaskCompletionSource tcs = new(); + ct.Register(() => tcs.TrySetCanceled(ct)); + return tcs.Task; + } + return Task.CompletedTask; + }) + .Verifiable(Times.Exactly(2)); + + // Both calls use the same message; expect it to be shown twice + MockModalMessageService + .Setup(x => x.ShowMessageAsync( + Phrases.Broadlink_ProgrammingCommand(commandName), + It.IsAny>(), + It.IsAny(), + It.IsAny())) + .Returns(delegate (string msg, Func action, bool keepAlive, CancellationToken ct) + { + return action(ct); + }) + .Verifiable(Times.Exactly(2)); + + Expect_Connection_CheckLearnedData(learnedData); + Expect_PersistSettings_Set($"IRData:{commandName}", expectedBase64); + + // Act – start learning, then click the same button again + Task firstTask = command.ProgramAsync!(default); + Task secondTask = command.ProgramAsync!(default); + + // Assert – first is cancelled; second completes successfully + firstTask.Should().BeCanceledWithin(TimeSpan.FromSeconds(5), + because: "a duplicate click cancels the in-progress learning and starts fresh"); + secondTask.Should().BeCompleteWithin(TimeSpan.FromSeconds(5), + because: "the second click should complete its own learning cycle"); + + MockLogger.VerifyMessages(messageLogger => + { + messageLogger.BroadlinkCommandService_SearchingForDevice(); + messageLogger.BroadlinkCommandService_Authenticating(IPEndPoint.Parse("10.20.30.40:1234")); + messageLogger.BroadlinkCommandService_Authenticated(IPEndPoint.Parse("10.20.30.40:1234")); + messageLogger.BroadlinkCommandService_Ready(); + messageLogger.CommandService_Programming(command); + messageLogger.BroadlinkCommandService_EnteringLearningMode(command); + messageLogger.CommandService_Programming(command); + messageLogger.BroadlinkCommandService_CancellingLearningForNewCommand(command); + messageLogger.CommandService_ProgramCancelled(command); + messageLogger.BroadlinkCommandService_EnteringLearningMode(command); + messageLogger.BroadlinkCommandService_PollingForLearnedData(command); + messageLogger.BroadlinkCommandService_LearnedDataReceived(command, learnedData.Length); + messageLogger.CommandService_Programmed(command); }); } diff --git a/test/AdaptiveRemote.EndToEndTests.Host.Wpf/Features/Shared/ProgrammingMode.feature b/test/AdaptiveRemote.EndToEndTests.Host.Wpf/Features/Shared/ProgrammingMode.feature index 6fc50b6d..18b681ae 100644 --- a/test/AdaptiveRemote.EndToEndTests.Host.Wpf/Features/Shared/ProgrammingMode.feature +++ b/test/AdaptiveRemote.EndToEndTests.Host.Wpf/Features/Shared/ProgrammingMode.feature @@ -107,3 +107,37 @@ Scenario: Programming a programmable command is cancelled And I should see the 'Power' button is enabled And I should not see any error messages in the logs +Scenario: Programming a command is preempted by clicking a different programmable button + Given the application is in the Ready phase + When I click on the 'Learn' button + Then I should see the 'Mute' button is enabled + And I should see the 'Mute' button is not programmed + And I should see the 'Power' button is enabled + And I should see the 'Power' button is programmed + + # Start learning Mute + When I click on the 'Mute' button + Then I should see a modal message containing + """ +

Programming 'Mute'

+

Point your remote at the Broadlink device and press Mute.

+ """ + + # Click Power while Mute's modal is showing — should cancel Mute and start Power + When I click on the 'Power' button + Then I should see a modal message containing + """ +

Programming 'Power'

+

Point your remote at the Broadlink device and press Power.

+ """ + + # Complete Power's learning cycle + When I send the IR signal for Power to the Broadlink device + Then I should see the 'Power' button is programmed + And I should not see a modal message + + # Verify Mute was not programmed + When I click on the 'Learn' button + Then I should see the 'Mute' button is not programmed + And I should not see any error messages in the logs + From 09e1ac66da51df2a32f33cded8333066fb311aa0 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 19:12:59 -0700 Subject: [PATCH 10/13] GH-120: Fix ServiceFixture failing to find repo root in git worktrees Directory.Exists() returns false when .git is a file (git worktree layout). Path.Exists() accepts both files and directories, so it correctly finds the repo root in worktree checkouts, fixing all 29 AdaptiveRemote.Backend.ApiTests that were failing with "Could not find repository root (no .git directory found)". --- .../Backend/ServiceFixture.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/AdaptiveRemote.EndtoEndTests.TestServices/Backend/ServiceFixture.cs b/test/AdaptiveRemote.EndtoEndTests.TestServices/Backend/ServiceFixture.cs index 359575a0..7faccf55 100644 --- a/test/AdaptiveRemote.EndtoEndTests.TestServices/Backend/ServiceFixture.cs +++ b/test/AdaptiveRemote.EndtoEndTests.TestServices/Backend/ServiceFixture.cs @@ -48,7 +48,7 @@ public void StartService() // Find the repository root by looking for the .git directory string currentDir = Directory.GetCurrentDirectory(); string? repoRoot = currentDir; - while (repoRoot != null && !Directory.Exists(Path.Combine(repoRoot, ".git"))) + while (repoRoot != null && !Path.Exists(Path.Combine(repoRoot, ".git"))) { repoRoot = Directory.GetParent(repoRoot)?.FullName; } From 3e04ee53ce0a31812d918f7ec7d2a64f4f4b9db1 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 19:30:23 -0700 Subject: [PATCH 11/13] GH-120: Fix _currentLearnCts not cleared when WaitAsync throws Moved WaitAsync inside the try block with a bool acquired guard so Interlocked.CompareExchange always runs in finally on every exit path. Without this, an OperationCanceledException from WaitAsync (e.g. outer CT fires before semaphore acquired) would leave _currentLearnCts pointing to a disposed CTS, causing ObjectDisposedException on the next ProgramAsync call. --- .../Services/Broadlink/BroadlinkCommandService.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs b/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs index 4a278dff..6f10bbba 100644 --- a/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs +++ b/src/AdaptiveRemote.App/Services/Broadlink/BroadlinkCommandService.cs @@ -101,10 +101,11 @@ protected override Command.ExecuteDelegate CreateHandler(IRCommand command) await previousCts.CancelAsync(); } - await _learningSemaphore.WaitAsync(cancellationToken); - + bool acquired = false; try { + await _learningSemaphore.WaitAsync(cancellationToken); + acquired = true; await _modalMessageService.ShowMessageAsync(message, async ct => { using CancellationTokenSource timeoutCts = new(learnTimeout); @@ -147,7 +148,10 @@ await _modalMessageService.ShowMessageAsync(message, async ct => finally { Interlocked.CompareExchange(ref _currentLearnCts, null, learnCts); - _learningSemaphore.Release(); + if (acquired) + { + _learningSemaphore.Release(); + } } }; } From 0aa50cdf6bbfc38d523eb2b6d536d8d5fe1315a3 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 19:31:26 -0700 Subject: [PATCH 12/13] GH-120: Add times overload to Expect_ModalMessageService_ShowMessage Added an optional Times? parameter (defaulting to Times.Once()) so callers needing a different verification count don't have to bypass the helper. Updated DuplicateClick test to use Expect_ModalMessageService_ShowMessage with Times.Exactly(2) instead of raw MockModalMessageService.Setup(). --- .../Broadlink/BroadlinkCommandServiceTests.cs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs b/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs index 9ef4311b..8fad7c38 100644 --- a/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs +++ b/test/AdaptiveRemote.App.Tests/Services/Broadlink/BroadlinkCommandServiceTests.cs @@ -513,17 +513,7 @@ public void BroadlinkCommandService_ProgramAsync_DuplicateClick_CancelsPreviousA .Verifiable(Times.Exactly(2)); // Both calls use the same message; expect it to be shown twice - MockModalMessageService - .Setup(x => x.ShowMessageAsync( - Phrases.Broadlink_ProgrammingCommand(commandName), - It.IsAny>(), - It.IsAny(), - It.IsAny())) - .Returns(delegate (string msg, Func action, bool keepAlive, CancellationToken ct) - { - return action(ct); - }) - .Verifiable(Times.Exactly(2)); + Expect_ModalMessageService_ShowMessage(Phrases.Broadlink_ProgrammingCommand(commandName), Times.Exactly(2)); Expect_Connection_CheckLearnedData(learnedData); Expect_PersistSettings_Set($"IRData:{commandName}", expectedBase64); @@ -676,7 +666,7 @@ private void Expect_Connection_EnterLearningMode() .WithStandardTaskBehavior() .Verifiable(Times.Once); - private void Expect_ModalMessageService_ShowMessage(string expectedMessage) + private void Expect_ModalMessageService_ShowMessage(string expectedMessage, Times? times = null) => MockModalMessageService .Setup(x => x.ShowMessageAsync(expectedMessage, It.IsAny>(), It.IsAny(), It.IsAny())) .Returns(delegate (string message, Func action, bool keepAlive, CancellationToken ct) @@ -684,7 +674,7 @@ private void Expect_ModalMessageService_ShowMessage(string expectedMessage) Assert.AreEqual(expectedMessage, message, "Modal message should have the expected text"); return action(ct); }) - .Verifiable(Times.Once); + .Verifiable(times ?? Times.Once()); private void Expect_Connection_CheckLearnedData(params byte[]?[] returnSequence) { From 4be913dcaac9f0438ac9274d1b7124a3d182a279 Mon Sep 17 00:00:00 2001 From: Joe Davis Date: Fri, 22 May 2026 19:59:44 -0700 Subject: [PATCH 13/13] GH-120: Fix race: HandleEnterLearningAsync cleared data queued by test The modal appears before EnterLearningModeAsync completes, so the test could call ProvideLearnedData before the device's HandleEnterLearningAsync lock ran. If the lock then ran after, it cleared the queued data and all subsequent polls returned "no data available", leaving the modal open indefinitely. Removing the clear is correct: HandleCheckLearnedDataAsync already clears _pendingLearnedData when it is consumed, so the clear in HandleEnterLearningAsync served no purpose except to create this race. --- .../SimulatedBroadlink/SimulatedBroadlinkDevice.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/test/AdaptiveRemote.EndtoEndTests.TestServices/SimulatedBroadlink/SimulatedBroadlinkDevice.cs b/test/AdaptiveRemote.EndtoEndTests.TestServices/SimulatedBroadlink/SimulatedBroadlinkDevice.cs index 1857e2e1..248c9bfc 100644 --- a/test/AdaptiveRemote.EndtoEndTests.TestServices/SimulatedBroadlink/SimulatedBroadlinkDevice.cs +++ b/test/AdaptiveRemote.EndtoEndTests.TestServices/SimulatedBroadlink/SimulatedBroadlinkDevice.cs @@ -388,7 +388,6 @@ private async Task HandleEnterLearningAsync(DecodedPacket packet, IPEndPoint rem lock (_learningLock) { _isInLearningMode = true; - _pendingLearnedData = null; // Clear any previous learned data } // Record the packet