From f3481d5aaff1645ec1804bc520fee472fc28a966 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 8 Apr 2026 00:30:14 +0000 Subject: [PATCH 01/13] Add eval, dataset-qa, and unlearning CLI sub-commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement three new typer sub-apps following the skills.py pattern: - `hirundo eval run/list/check` — launch/list/monitor LLM behavior eval runs via LlmBehaviorEval.launch_eval_run / check_run_by_id / list_runs - `hirundo dataset-qa run/list/check` — launch/list/monitor Dataset QA runs via QADataset.launch_qa_run / check_run_by_id / list_runs - `hirundo unlearning run/list/check` — launch/list/monitor LLM unlearning runs via LlmUnlearningRun.launch / check_run_by_id / list Each `run` command accepts --wait/--no-wait (default: wait) to either stream tqdm progress or immediately return the run ID for scripting. Closes #220 https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli.py | 7 ++ hirundo/cli_dataset_qa.py | 96 +++++++++++++++++++++++ hirundo/cli_eval.py | 138 ++++++++++++++++++++++++++++++++ hirundo/cli_unlearning.py | 161 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 402 insertions(+) create mode 100644 hirundo/cli_dataset_qa.py create mode 100644 hirundo/cli_eval.py create mode 100644 hirundo/cli_unlearning.py diff --git a/hirundo/cli.py b/hirundo/cli.py index 6714e596..76fbc439 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -10,6 +10,9 @@ from rich.table import Table from hirundo._env import API_HOST, EnvLocation +from hirundo.cli_dataset_qa import dataset_qa_app +from hirundo.cli_eval import eval_app +from hirundo.cli_unlearning import unlearning_app docs = "sphinx" in sys.modules hirundo_epilog = ( @@ -26,6 +29,10 @@ epilog=hirundo_epilog, ) +app.add_typer(eval_app, name="eval") +app.add_typer(dataset_qa_app, name="dataset-qa") +app.add_typer(unlearning_app, name="unlearning") + def _upsert_env(dotenv_filepath: str | Path, var_name: str, var_value: str): """ diff --git a/hirundo/cli_dataset_qa.py b/hirundo/cli_dataset_qa.py new file mode 100644 index 00000000..c47f1224 --- /dev/null +++ b/hirundo/cli_dataset_qa.py @@ -0,0 +1,96 @@ +""" +CLI sub-app for Dataset QA commands. + +Commands: + hirundo dataset-qa run - Launch a Dataset QA run + hirundo dataset-qa list - List Dataset QA runs + hirundo dataset-qa check - Check the status of a Dataset QA run +""" + +import sys +from typing import Annotated + +import typer +from rich.console import Console +from rich.table import Table + +docs = "sphinx" in sys.modules +dataset_qa_epilog = ( + None + if docs + else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." +) + +console = Console() + +dataset_qa_app = typer.Typer( + name="dataset-qa", + no_args_is_help=True, + rich_markup_mode="rich", + epilog=dataset_qa_epilog, + help="Launch and monitor Dataset QA runs.", +) + + +@dataset_qa_app.command("run", epilog=dataset_qa_epilog) +def dataset_qa_run( + dataset_id: Annotated[int, typer.Argument(help="ID of the dataset to run QA on.")], + wait: Annotated[ + bool, + typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + ] = True, +): + """ + Launch a Dataset QA run on the dataset with the given ID. + """ + from hirundo.dataset_qa import QADataset + + run_id = QADataset.launch_qa_run(dataset_id) + console.print(f"Dataset QA run started. Run ID: [bold]{run_id}[/bold]") + + if wait: + QADataset.check_run_by_id(run_id) + else: + console.print( + "Use [bold]hirundo dataset-qa check[/bold] [italic][/italic] to monitor progress." + ) + + +@dataset_qa_app.command("list", epilog=dataset_qa_epilog) +def dataset_qa_list( + archived: Annotated[ + bool, + typer.Option("--archived/--no-archived", help="Include archived runs."), + ] = False, +): + """ + List Dataset QA runs. + """ + from hirundo.dataset_qa import QADataset + + runs = QADataset.list_runs(archived=archived) + + table = Table(title="Dataset QA Runs:", expand=True) + for col in ("Dataset Name", "Run ID", "Status", "Created At", "Run Args"): + table.add_column(col, overflow="fold") + for run in runs: + table.add_row( + str(run.name), + str(run.run_id), + str(run.status), + run.created_at.isoformat(), + run.run_args.model_dump_json() if run.run_args else None, + ) + console.print(table) + + +@dataset_qa_app.command("check", epilog=dataset_qa_epilog) +def dataset_qa_check( + run_id: Annotated[str, typer.Argument(help="The run ID to check.")], +): + """ + Check the status of a Dataset QA run and stream progress. + """ + from hirundo.dataset_qa import QADataset + + QADataset.check_run_by_id(run_id) diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py new file mode 100644 index 00000000..ab019a0f --- /dev/null +++ b/hirundo/cli_eval.py @@ -0,0 +1,138 @@ +""" +CLI sub-app for LLM behavior evaluation commands. + +Commands: + hirundo eval run - Launch an LLM behavior evaluation run + hirundo eval list - List evaluation runs + hirundo eval check - Check the status of an evaluation run +""" + +import sys +from typing import Annotated, Optional + +import typer +from rich.console import Console +from rich.table import Table + +docs = "sphinx" in sys.modules +eval_epilog = ( + None + if docs + else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." +) + +console = Console() + +eval_app = typer.Typer( + name="eval", + no_args_is_help=True, + rich_markup_mode="rich", + epilog=eval_epilog, + help="Launch and monitor LLM behavior evaluation runs.", +) + + +@eval_app.command("run", epilog=eval_epilog) +def eval_run( + preset: Annotated[ + str, + typer.Option( + "--preset", + help="Evaluation preset. One of: BBQ_BIAS, BBQ_UNBIAS, UNQOVER_BIAS, HALU_EVAL, MED_HALLU, INJECTION_EVAL", + ), + ], + model_id: Annotated[ + Optional[int], + typer.Option("--model-id", help="ID of the LLM model to evaluate."), + ] = None, + source_run_id: Annotated[ + Optional[str], + typer.Option("--source-run-id", help="ID of the unlearning run to evaluate."), + ] = None, + name: Annotated[ + Optional[str], + typer.Option("--name", help="Optional name for this evaluation run."), + ] = None, + wait: Annotated[ + bool, + typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + ] = True, +): + """ + Launch an LLM behavior evaluation run. + + Either --model-id or --source-run-id must be provided. + """ + from hirundo.llm_behavior_eval import EvalRunInfo, LlmBehaviorEval, ModelOrRun, PresetType + + if model_id is None and source_run_id is None: + console.print("[red]Error: either --model-id or --source-run-id must be provided.[/red]") + raise typer.Exit(code=1) + if model_id is not None and source_run_id is not None: + console.print("[red]Error: only one of --model-id or --source-run-id may be provided.[/red]") + raise typer.Exit(code=1) + + try: + preset_type = PresetType(preset.upper()) + except ValueError: + valid = ", ".join(p.value for p in PresetType) + console.print(f"[red]Invalid preset '{preset}'. Valid options: {valid}[/red]") + raise typer.Exit(code=1) + + model_or_run = ModelOrRun.MODEL if model_id is not None else ModelOrRun.RUN + run_info = EvalRunInfo( + model_id=model_id, + source_run_id=source_run_id, + preset_type=preset_type, + name=name, + ) + + run_id = LlmBehaviorEval.launch_eval_run(model_or_run, run_info) + console.print(f"Eval run started. Run ID: [bold]{run_id}[/bold]") + + if wait: + LlmBehaviorEval.check_run_by_id(run_id) + else: + console.print( + "Use [bold]hirundo eval check[/bold] [italic][/italic] to monitor progress." + ) + + +@eval_app.command("list", epilog=eval_epilog) +def eval_list( + archived: Annotated[ + bool, + typer.Option("--archived/--no-archived", help="Include archived runs."), + ] = False, +): + """ + List LLM behavior evaluation runs. + """ + from hirundo.llm_behavior_eval import LlmBehaviorEval + + runs = LlmBehaviorEval.list_runs(archived=archived) + + table = Table(title="Eval Runs:", expand=True) + for col in ("Run ID", "Name", "Status", "Preset", "Created At"): + table.add_column(col, overflow="fold") + for run in runs: + table.add_row( + str(run.run_id), + str(run.name), + str(run.status), + run.preset_type.value if run.preset_type else None, + run.created_at.isoformat(), + ) + console.print(table) + + +@eval_app.command("check", epilog=eval_epilog) +def eval_check( + run_id: Annotated[str, typer.Argument(help="The run ID to check.")], +): + """ + Check the status of an LLM behavior evaluation run and stream progress. + """ + from hirundo.llm_behavior_eval import LlmBehaviorEval + + LlmBehaviorEval.check_run_by_id(run_id) diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py new file mode 100644 index 00000000..6be6fed2 --- /dev/null +++ b/hirundo/cli_unlearning.py @@ -0,0 +1,161 @@ +""" +CLI sub-app for LLM unlearning commands. + +Commands: + hirundo unlearning run - Launch an LLM unlearning run + hirundo unlearning list - List LLM unlearning runs + hirundo unlearning check - Check the status of an LLM unlearning run +""" + +import sys +from typing import Annotated, Optional + +import typer +from rich.console import Console +from rich.table import Table + +docs = "sphinx" in sys.modules +unlearning_epilog = ( + None + if docs + else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." +) + +console = Console() + +unlearning_app = typer.Typer( + name="unlearning", + no_args_is_help=True, + rich_markup_mode="rich", + epilog=unlearning_epilog, + help="Launch and monitor LLM unlearning runs.", +) + + +@unlearning_app.command("run", epilog=unlearning_epilog) +def unlearning_run( + model_id: Annotated[int, typer.Argument(help="ID of the LLM model to unlearn.")], + bias_type: Annotated[ + Optional[str], + typer.Option( + "--bias-type", + help="Bias type for unlearning. One of: ALL, RACE, NATIONALITY, GENDER, PHYSICAL_APPEARANCE, RELIGION, AGE", + ), + ] = None, + hallucination_type: Annotated[ + Optional[str], + typer.Option( + "--hallucination-type", + help="Hallucination type for unlearning. One of: GENERAL, MEDICAL, LEGAL, DEFENSE", + ), + ] = None, + name: Annotated[ + Optional[str], + typer.Option("--name", help="Optional name for this unlearning run."), + ] = None, + wait: Annotated[ + bool, + typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + ] = True, +): + """ + Launch an LLM unlearning run. + + Exactly one of --bias-type or --hallucination-type must be provided. + """ + from hirundo.llm_bias_type import BBQBiasType + from hirundo.unlearning_llm import ( + BiasBehavior, + DefaultUtility, + HallucinationBehavior, + HallucinationType, + LlmRunInfo, + LlmUnlearningRun, + ) + + if bias_type is None and hallucination_type is None: + console.print( + "[red]Error: either --bias-type or --hallucination-type must be provided.[/red]" + ) + raise typer.Exit(code=1) + if bias_type is not None and hallucination_type is not None: + console.print( + "[red]Error: only one of --bias-type or --hallucination-type may be provided.[/red]" + ) + raise typer.Exit(code=1) + + if bias_type is not None: + try: + bias_type_enum = BBQBiasType(bias_type.upper()) + except ValueError: + valid = ", ".join(b.value for b in BBQBiasType) + console.print( + f"[red]Invalid bias type '{bias_type}'. Valid options: {valid}[/red]" + ) + raise typer.Exit(code=1) + target_behavior = BiasBehavior(bias_type=bias_type_enum) + else: + try: + hallucination_type_enum = HallucinationType(hallucination_type.upper()) + except ValueError: + valid = ", ".join(h.value for h in HallucinationType) + console.print( + f"[red]Invalid hallucination type '{hallucination_type}'. Valid options: {valid}[/red]" + ) + raise typer.Exit(code=1) + target_behavior = HallucinationBehavior(hallucination_type=hallucination_type_enum) + + run_info = LlmRunInfo( + name=name, + target_behaviors=[target_behavior], + target_utilities=[DefaultUtility()], + ) + + run_id = LlmUnlearningRun.launch(model_id, run_info) + console.print(f"Unlearning run started. Run ID: [bold]{run_id}[/bold]") + + if wait: + LlmUnlearningRun.check_run_by_id(run_id) + else: + console.print( + "Use [bold]hirundo unlearning check[/bold] [italic][/italic] to monitor progress." + ) + + +@unlearning_app.command("list", epilog=unlearning_epilog) +def unlearning_list( + archived: Annotated[ + bool, + typer.Option("--archived/--no-archived", help="Include archived runs."), + ] = False, +): + """ + List LLM unlearning runs. + """ + from hirundo.unlearning_llm import LlmUnlearningRun + + runs = LlmUnlearningRun.list(archived=archived) + + table = Table(title="Unlearning Runs:", expand=True) + for col in ("Name", "Run ID", "Status", "Created At"): + table.add_column(col, overflow="fold") + for run in runs: + table.add_row( + str(run.name), + str(run.run_id), + str(run.status), + run.created_at.isoformat(), + ) + console.print(table) + + +@unlearning_app.command("check", epilog=unlearning_epilog) +def unlearning_check( + run_id: Annotated[str, typer.Argument(help="The run ID to check.")], +): + """ + Check the status of an LLM unlearning run and stream progress. + """ + from hirundo.unlearning_llm import LlmUnlearningRun + + LlmUnlearningRun.check_run_by_id(run_id) From 7b375da6420812a7ec4c0cfb80b29bffa4468ec4 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 10 Apr 2026 00:27:20 +0000 Subject: [PATCH 02/13] Deduplicate CLI boilerplate into _cli_common module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract repeated docs/epilog/console/Typer setup from the three CLI sub-apps into hirundo/_cli_common.py: - hirundo_epilog, docs, console — computed/instantiated once - make_app() — factory for sub-app Typer instances - validate_enum() — replaces copy-paste try/except enum coercion Remove verbose module docstrings that duplicated typer --help output. Remove local Console() instantiation from cli.py list_runs(). https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 32 ++++++++++++++++++++++ hirundo/cli.py | 11 +------- hirundo/cli_dataset_qa.py | 34 ++++-------------------- hirundo/cli_eval.py | 42 +++++------------------------ hirundo/cli_unlearning.py | 56 +++++++-------------------------------- 5 files changed, 53 insertions(+), 122 deletions(-) create mode 100644 hirundo/_cli_common.py diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py new file mode 100644 index 00000000..c564588f --- /dev/null +++ b/hirundo/_cli_common.py @@ -0,0 +1,32 @@ +import sys + +import typer +from rich.console import Console + +docs = "sphinx" in sys.modules +hirundo_epilog = ( + None + if docs + else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." +) + +console = Console() + + +def make_app(name: str, help: str) -> typer.Typer: + return typer.Typer( + name=name, + no_args_is_help=True, + rich_markup_mode="rich", + epilog=hirundo_epilog, + help=help, + ) + + +def validate_enum(value: str, enum_cls, label: str): + try: + return enum_cls(value.upper()) + except ValueError: + valid = ", ".join(e.value for e in enum_cls) + console.print(f"[red]Invalid {label} '{value}'. Valid options: {valid}[/red]") + raise typer.Exit(code=1) diff --git a/hirundo/cli.py b/hirundo/cli.py index 76fbc439..24e8bd60 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -1,26 +1,18 @@ import os import re -import sys from pathlib import Path from typing import Annotated from urllib.parse import urlparse import typer -from rich.console import Console from rich.table import Table +from hirundo._cli_common import console, docs, hirundo_epilog from hirundo._env import API_HOST, EnvLocation from hirundo.cli_dataset_qa import dataset_qa_app from hirundo.cli_eval import eval_app from hirundo.cli_unlearning import unlearning_app -docs = "sphinx" in sys.modules -hirundo_epilog = ( - None - if docs - else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." -) - app = typer.Typer( name="hirundo", @@ -217,7 +209,6 @@ def list_runs(): runs = QADataset.list_runs() - console = Console() table = Table( title="Runs:", expand=True, diff --git a/hirundo/cli_dataset_qa.py b/hirundo/cli_dataset_qa.py index c47f1224..45e43a95 100644 --- a/hirundo/cli_dataset_qa.py +++ b/hirundo/cli_dataset_qa.py @@ -1,38 +1,14 @@ -""" -CLI sub-app for Dataset QA commands. - -Commands: - hirundo dataset-qa run - Launch a Dataset QA run - hirundo dataset-qa list - List Dataset QA runs - hirundo dataset-qa check - Check the status of a Dataset QA run -""" - -import sys from typing import Annotated import typer -from rich.console import Console from rich.table import Table -docs = "sphinx" in sys.modules -dataset_qa_epilog = ( - None - if docs - else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." -) - -console = Console() +from hirundo._cli_common import console, hirundo_epilog, make_app -dataset_qa_app = typer.Typer( - name="dataset-qa", - no_args_is_help=True, - rich_markup_mode="rich", - epilog=dataset_qa_epilog, - help="Launch and monitor Dataset QA runs.", -) +dataset_qa_app = make_app("dataset-qa", "Launch and monitor Dataset QA runs.") -@dataset_qa_app.command("run", epilog=dataset_qa_epilog) +@dataset_qa_app.command("run", epilog=hirundo_epilog) def dataset_qa_run( dataset_id: Annotated[int, typer.Argument(help="ID of the dataset to run QA on.")], wait: Annotated[ @@ -56,7 +32,7 @@ def dataset_qa_run( ) -@dataset_qa_app.command("list", epilog=dataset_qa_epilog) +@dataset_qa_app.command("list", epilog=hirundo_epilog) def dataset_qa_list( archived: Annotated[ bool, @@ -84,7 +60,7 @@ def dataset_qa_list( console.print(table) -@dataset_qa_app.command("check", epilog=dataset_qa_epilog) +@dataset_qa_app.command("check", epilog=hirundo_epilog) def dataset_qa_check( run_id: Annotated[str, typer.Argument(help="The run ID to check.")], ): diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index ab019a0f..e54fa737 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -1,38 +1,14 @@ -""" -CLI sub-app for LLM behavior evaluation commands. - -Commands: - hirundo eval run - Launch an LLM behavior evaluation run - hirundo eval list - List evaluation runs - hirundo eval check - Check the status of an evaluation run -""" - -import sys from typing import Annotated, Optional import typer -from rich.console import Console from rich.table import Table -docs = "sphinx" in sys.modules -eval_epilog = ( - None - if docs - else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." -) - -console = Console() +from hirundo._cli_common import console, hirundo_epilog, make_app, validate_enum -eval_app = typer.Typer( - name="eval", - no_args_is_help=True, - rich_markup_mode="rich", - epilog=eval_epilog, - help="Launch and monitor LLM behavior evaluation runs.", -) +eval_app = make_app("eval", "Launch and monitor LLM behavior evaluation runs.") -@eval_app.command("run", epilog=eval_epilog) +@eval_app.command("run", epilog=hirundo_epilog) def eval_run( preset: Annotated[ str, @@ -72,13 +48,7 @@ def eval_run( console.print("[red]Error: only one of --model-id or --source-run-id may be provided.[/red]") raise typer.Exit(code=1) - try: - preset_type = PresetType(preset.upper()) - except ValueError: - valid = ", ".join(p.value for p in PresetType) - console.print(f"[red]Invalid preset '{preset}'. Valid options: {valid}[/red]") - raise typer.Exit(code=1) - + preset_type = validate_enum(preset, PresetType, "preset") model_or_run = ModelOrRun.MODEL if model_id is not None else ModelOrRun.RUN run_info = EvalRunInfo( model_id=model_id, @@ -98,7 +68,7 @@ def eval_run( ) -@eval_app.command("list", epilog=eval_epilog) +@eval_app.command("list", epilog=hirundo_epilog) def eval_list( archived: Annotated[ bool, @@ -126,7 +96,7 @@ def eval_list( console.print(table) -@eval_app.command("check", epilog=eval_epilog) +@eval_app.command("check", epilog=hirundo_epilog) def eval_check( run_id: Annotated[str, typer.Argument(help="The run ID to check.")], ): diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py index 6be6fed2..d920f36a 100644 --- a/hirundo/cli_unlearning.py +++ b/hirundo/cli_unlearning.py @@ -1,38 +1,14 @@ -""" -CLI sub-app for LLM unlearning commands. - -Commands: - hirundo unlearning run - Launch an LLM unlearning run - hirundo unlearning list - List LLM unlearning runs - hirundo unlearning check - Check the status of an LLM unlearning run -""" - -import sys from typing import Annotated, Optional import typer -from rich.console import Console from rich.table import Table -docs = "sphinx" in sys.modules -unlearning_epilog = ( - None - if docs - else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information." -) +from hirundo._cli_common import console, hirundo_epilog, make_app, validate_enum -console = Console() +unlearning_app = make_app("unlearning", "Launch and monitor LLM unlearning runs.") -unlearning_app = typer.Typer( - name="unlearning", - no_args_is_help=True, - rich_markup_mode="rich", - epilog=unlearning_epilog, - help="Launch and monitor LLM unlearning runs.", -) - -@unlearning_app.command("run", epilog=unlearning_epilog) +@unlearning_app.command("run", epilog=hirundo_epilog) def unlearning_run( model_id: Annotated[int, typer.Argument(help="ID of the LLM model to unlearn.")], bias_type: Annotated[ @@ -85,25 +61,11 @@ def unlearning_run( raise typer.Exit(code=1) if bias_type is not None: - try: - bias_type_enum = BBQBiasType(bias_type.upper()) - except ValueError: - valid = ", ".join(b.value for b in BBQBiasType) - console.print( - f"[red]Invalid bias type '{bias_type}'. Valid options: {valid}[/red]" - ) - raise typer.Exit(code=1) - target_behavior = BiasBehavior(bias_type=bias_type_enum) + target_behavior = BiasBehavior(bias_type=validate_enum(bias_type, BBQBiasType, "bias type")) else: - try: - hallucination_type_enum = HallucinationType(hallucination_type.upper()) - except ValueError: - valid = ", ".join(h.value for h in HallucinationType) - console.print( - f"[red]Invalid hallucination type '{hallucination_type}'. Valid options: {valid}[/red]" - ) - raise typer.Exit(code=1) - target_behavior = HallucinationBehavior(hallucination_type=hallucination_type_enum) + target_behavior = HallucinationBehavior( + hallucination_type=validate_enum(hallucination_type, HallucinationType, "hallucination type") + ) run_info = LlmRunInfo( name=name, @@ -122,7 +84,7 @@ def unlearning_run( ) -@unlearning_app.command("list", epilog=unlearning_epilog) +@unlearning_app.command("list", epilog=hirundo_epilog) def unlearning_list( archived: Annotated[ bool, @@ -149,7 +111,7 @@ def unlearning_list( console.print(table) -@unlearning_app.command("check", epilog=unlearning_epilog) +@unlearning_app.command("check", epilog=hirundo_epilog) def unlearning_check( run_id: Annotated[str, typer.Argument(help="The run ID to check.")], ): From 43afadcae526b1d174c42aba6f97ac6d79933a5c Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 21:43:27 +0000 Subject: [PATCH 03/13] Fix CI: ruff, basedpyright, and module-docs failures ruff: - Rename make_app `help` param to `help_text` (A002 builtin shadow) - raise typer.Exit(code=1) from None in validate_enum (B904) - Remove unused Optional imports now replaced by X | None (F401) - Fix import ordering (I001, auto-fixed) - Convert Optional[X] to X | None (UP045, auto-fixed) basedpyright: - Use elif hallucination_type is not None + exhaustive else to let pyright narrow the str | None type before passing to validate_enum module-docs: - Add .rst files for the three new public CLI modules https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- docs/hirundo.cli_dataset_qa.rst | 10 ++++++++++ docs/hirundo.cli_eval.rst | 10 ++++++++++ docs/hirundo.cli_unlearning.rst | 10 ++++++++++ hirundo/_cli_common.py | 6 +++--- hirundo/cli_eval.py | 15 ++++++++++----- hirundo/cli_unlearning.py | 12 +++++++----- 6 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 docs/hirundo.cli_dataset_qa.rst create mode 100644 docs/hirundo.cli_eval.rst create mode 100644 docs/hirundo.cli_unlearning.rst diff --git a/docs/hirundo.cli_dataset_qa.rst b/docs/hirundo.cli_dataset_qa.rst new file mode 100644 index 00000000..2144fdf0 --- /dev/null +++ b/docs/hirundo.cli_dataset_qa.rst @@ -0,0 +1,10 @@ +.. meta:: + :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none' + +hirundo.cli_dataset_qa module +============================= + +.. automodule:: hirundo.cli_dataset_qa + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/hirundo.cli_eval.rst b/docs/hirundo.cli_eval.rst new file mode 100644 index 00000000..85c429f3 --- /dev/null +++ b/docs/hirundo.cli_eval.rst @@ -0,0 +1,10 @@ +.. meta:: + :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none' + +hirundo.cli_eval module +======================= + +.. automodule:: hirundo.cli_eval + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/hirundo.cli_unlearning.rst b/docs/hirundo.cli_unlearning.rst new file mode 100644 index 00000000..1b54735e --- /dev/null +++ b/docs/hirundo.cli_unlearning.rst @@ -0,0 +1,10 @@ +.. meta:: + :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none' + +hirundo.cli_unlearning module +============================= + +.. automodule:: hirundo.cli_unlearning + :members: + :undoc-members: + :show-inheritance: diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index c564588f..154bc891 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -13,13 +13,13 @@ console = Console() -def make_app(name: str, help: str) -> typer.Typer: +def make_app(name: str, help_text: str) -> typer.Typer: return typer.Typer( name=name, no_args_is_help=True, rich_markup_mode="rich", epilog=hirundo_epilog, - help=help, + help=help_text, ) @@ -29,4 +29,4 @@ def validate_enum(value: str, enum_cls, label: str): except ValueError: valid = ", ".join(e.value for e in enum_cls) console.print(f"[red]Invalid {label} '{value}'. Valid options: {valid}[/red]") - raise typer.Exit(code=1) + raise typer.Exit(code=1) from None diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index e54fa737..ce2416ce 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -1,4 +1,4 @@ -from typing import Annotated, Optional +from typing import Annotated import typer from rich.table import Table @@ -18,15 +18,15 @@ def eval_run( ), ], model_id: Annotated[ - Optional[int], + int | None, typer.Option("--model-id", help="ID of the LLM model to evaluate."), ] = None, source_run_id: Annotated[ - Optional[str], + str | None, typer.Option("--source-run-id", help="ID of the unlearning run to evaluate."), ] = None, name: Annotated[ - Optional[str], + str | None, typer.Option("--name", help="Optional name for this evaluation run."), ] = None, wait: Annotated[ @@ -39,7 +39,12 @@ def eval_run( Either --model-id or --source-run-id must be provided. """ - from hirundo.llm_behavior_eval import EvalRunInfo, LlmBehaviorEval, ModelOrRun, PresetType + from hirundo.llm_behavior_eval import ( + EvalRunInfo, + LlmBehaviorEval, + ModelOrRun, + PresetType, + ) if model_id is None and source_run_id is None: console.print("[red]Error: either --model-id or --source-run-id must be provided.[/red]") diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py index d920f36a..4c228af1 100644 --- a/hirundo/cli_unlearning.py +++ b/hirundo/cli_unlearning.py @@ -1,4 +1,4 @@ -from typing import Annotated, Optional +from typing import Annotated import typer from rich.table import Table @@ -12,21 +12,21 @@ def unlearning_run( model_id: Annotated[int, typer.Argument(help="ID of the LLM model to unlearn.")], bias_type: Annotated[ - Optional[str], + str | None, typer.Option( "--bias-type", help="Bias type for unlearning. One of: ALL, RACE, NATIONALITY, GENDER, PHYSICAL_APPEARANCE, RELIGION, AGE", ), ] = None, hallucination_type: Annotated[ - Optional[str], + str | None, typer.Option( "--hallucination-type", help="Hallucination type for unlearning. One of: GENERAL, MEDICAL, LEGAL, DEFENSE", ), ] = None, name: Annotated[ - Optional[str], + str | None, typer.Option("--name", help="Optional name for this unlearning run."), ] = None, wait: Annotated[ @@ -62,10 +62,12 @@ def unlearning_run( if bias_type is not None: target_behavior = BiasBehavior(bias_type=validate_enum(bias_type, BBQBiasType, "bias type")) - else: + elif hallucination_type is not None: target_behavior = HallucinationBehavior( hallucination_type=validate_enum(hallucination_type, HallucinationType, "hallucination type") ) + else: + raise typer.Exit(code=1) from None run_info = LlmRunInfo( name=name, From d235f6bf9c44dd667f5b730bda438ce374069340 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 21:43:42 +0000 Subject: [PATCH 04/13] Remove extra blank line in cli.py (linter) https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hirundo/cli.py b/hirundo/cli.py index 24e8bd60..ea4d841e 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -13,7 +13,6 @@ from hirundo.cli_eval import eval_app from hirundo.cli_unlearning import unlearning_app - app = typer.Typer( name="hirundo", no_args_is_help=True, From f1a4e0f5a901f40e712e8c6e95356a6b5197f6fc Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 21:47:56 +0000 Subject: [PATCH 05/13] Fix ruff format failures https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli_dataset_qa.py | 4 +++- hirundo/cli_eval.py | 12 +++++++++--- hirundo/cli_unlearning.py | 12 +++++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hirundo/cli_dataset_qa.py b/hirundo/cli_dataset_qa.py index 45e43a95..89cf5209 100644 --- a/hirundo/cli_dataset_qa.py +++ b/hirundo/cli_dataset_qa.py @@ -13,7 +13,9 @@ def dataset_qa_run( dataset_id: Annotated[int, typer.Argument(help="ID of the dataset to run QA on.")], wait: Annotated[ bool, - typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + typer.Option( + "--wait/--no-wait", help="Wait for the run to complete and stream progress." + ), ] = True, ): """ diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index ce2416ce..2fdf2c2b 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -31,7 +31,9 @@ def eval_run( ] = None, wait: Annotated[ bool, - typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + typer.Option( + "--wait/--no-wait", help="Wait for the run to complete and stream progress." + ), ] = True, ): """ @@ -47,10 +49,14 @@ def eval_run( ) if model_id is None and source_run_id is None: - console.print("[red]Error: either --model-id or --source-run-id must be provided.[/red]") + console.print( + "[red]Error: either --model-id or --source-run-id must be provided.[/red]" + ) raise typer.Exit(code=1) if model_id is not None and source_run_id is not None: - console.print("[red]Error: only one of --model-id or --source-run-id may be provided.[/red]") + console.print( + "[red]Error: only one of --model-id or --source-run-id may be provided.[/red]" + ) raise typer.Exit(code=1) preset_type = validate_enum(preset, PresetType, "preset") diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py index 4c228af1..987cbf4a 100644 --- a/hirundo/cli_unlearning.py +++ b/hirundo/cli_unlearning.py @@ -31,7 +31,9 @@ def unlearning_run( ] = None, wait: Annotated[ bool, - typer.Option("--wait/--no-wait", help="Wait for the run to complete and stream progress."), + typer.Option( + "--wait/--no-wait", help="Wait for the run to complete and stream progress." + ), ] = True, ): """ @@ -61,10 +63,14 @@ def unlearning_run( raise typer.Exit(code=1) if bias_type is not None: - target_behavior = BiasBehavior(bias_type=validate_enum(bias_type, BBQBiasType, "bias type")) + target_behavior = BiasBehavior( + bias_type=validate_enum(bias_type, BBQBiasType, "bias type") + ) elif hallucination_type is not None: target_behavior = HallucinationBehavior( - hallucination_type=validate_enum(hallucination_type, HallucinationType, "hallucination type") + hallucination_type=validate_enum( + hallucination_type, HallucinationType, "hallucination type" + ) ) else: raise typer.Exit(code=1) from None From 23e9cc8bff1b2e229ba40e183efd295ac3e2eac0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 21:52:56 +0000 Subject: [PATCH 06/13] Validate run_id in check commands to prevent path traversal run_id flows into cache_dir / f"{run_id}.zip" in unzip.py. A malicious value like ../../evil would escape ~/.hirundo/cache/. Add validate_run_id() to _cli_common which rejects anything outside [a-zA-Z0-9_-] (covers UUIDs and integer IDs). Apply it in the check command of each sub-app before passing run_id to the SDK. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 13 +++++++++++++ hirundo/cli_dataset_qa.py | 4 ++-- hirundo/cli_eval.py | 10 ++++++++-- hirundo/cli_unlearning.py | 10 ++++++++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index 154bc891..183ee4e0 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -1,8 +1,11 @@ +import re import sys import typer from rich.console import Console +_RUN_ID_RE = re.compile(r"^[a-zA-Z0-9_-]+$") + docs = "sphinx" in sys.modules hirundo_epilog = ( None @@ -23,6 +26,16 @@ def make_app(name: str, help_text: str) -> typer.Typer: ) +def validate_run_id(run_id: str) -> str: + if not _RUN_ID_RE.match(run_id): + console.print( + f"[red]Invalid run ID '{run_id}'. " + "Run IDs may only contain alphanumeric characters, hyphens, and underscores.[/red]" + ) + raise typer.Exit(code=1) from None + return run_id + + def validate_enum(value: str, enum_cls, label: str): try: return enum_cls(value.upper()) diff --git a/hirundo/cli_dataset_qa.py b/hirundo/cli_dataset_qa.py index 89cf5209..0954323b 100644 --- a/hirundo/cli_dataset_qa.py +++ b/hirundo/cli_dataset_qa.py @@ -3,7 +3,7 @@ import typer from rich.table import Table -from hirundo._cli_common import console, hirundo_epilog, make_app +from hirundo._cli_common import console, hirundo_epilog, make_app, validate_run_id dataset_qa_app = make_app("dataset-qa", "Launch and monitor Dataset QA runs.") @@ -71,4 +71,4 @@ def dataset_qa_check( """ from hirundo.dataset_qa import QADataset - QADataset.check_run_by_id(run_id) + QADataset.check_run_by_id(validate_run_id(run_id)) diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index 2fdf2c2b..5e4b49e0 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -3,7 +3,13 @@ import typer from rich.table import Table -from hirundo._cli_common import console, hirundo_epilog, make_app, validate_enum +from hirundo._cli_common import ( + console, + hirundo_epilog, + make_app, + validate_enum, + validate_run_id, +) eval_app = make_app("eval", "Launch and monitor LLM behavior evaluation runs.") @@ -116,4 +122,4 @@ def eval_check( """ from hirundo.llm_behavior_eval import LlmBehaviorEval - LlmBehaviorEval.check_run_by_id(run_id) + LlmBehaviorEval.check_run_by_id(validate_run_id(run_id)) diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py index 987cbf4a..955aaefd 100644 --- a/hirundo/cli_unlearning.py +++ b/hirundo/cli_unlearning.py @@ -3,7 +3,13 @@ import typer from rich.table import Table -from hirundo._cli_common import console, hirundo_epilog, make_app, validate_enum +from hirundo._cli_common import ( + console, + hirundo_epilog, + make_app, + validate_enum, + validate_run_id, +) unlearning_app = make_app("unlearning", "Launch and monitor LLM unlearning runs.") @@ -128,4 +134,4 @@ def unlearning_check( """ from hirundo.unlearning_llm import LlmUnlearningRun - LlmUnlearningRun.check_run_by_id(run_id) + LlmUnlearningRun.check_run_by_id(validate_run_id(run_id)) From 36663cfa646f71f4d306518d47ef369761f16544 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:04:14 +0000 Subject: [PATCH 07/13] refactor(cli): extract wait_or_notify, print_runs_table helpers; surface cached_zip_path Adds two shared helpers to _cli_common to eliminate copy-paste across the three CLI sub-apps: wait_or_notify centralises the wait/notify branching, and print_runs_table centralises Rich table construction. Also captures and prints results.cached_zip_path for dataset-qa and eval check/run commands, matching the legacy check-run behaviour. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 29 ++++++++++++++++++++++- hirundo/cli_dataset_qa.py | 50 ++++++++++++++++++++++----------------- hirundo/cli_eval.py | 43 +++++++++++++++++---------------- hirundo/cli_unlearning.py | 35 +++++++++++++-------------- 4 files changed, 94 insertions(+), 63 deletions(-) diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index 183ee4e0..24d9738f 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -1,8 +1,11 @@ import re import sys +from collections.abc import Callable +from typing import Any import typer from rich.console import Console +from rich.table import Table _RUN_ID_RE = re.compile(r"^[a-zA-Z0-9_-]+$") @@ -36,10 +39,34 @@ def validate_run_id(run_id: str) -> str: return run_id -def validate_enum(value: str, enum_cls, label: str): +def validate_enum(value: str, enum_cls: type, label: str) -> Any: try: return enum_cls(value.upper()) except ValueError: valid = ", ".join(e.value for e in enum_cls) console.print(f"[red]Invalid {label} '{value}'. Valid options: {valid}[/red]") raise typer.Exit(code=1) from None + + +def wait_or_notify( + run_id: str, check_fn: Callable[[str], Any], cmd_name: str, wait: bool +) -> Any: + if wait: + return check_fn(run_id) + console.print( + f"Use [bold]hirundo {cmd_name} check[/bold] [italic][/italic] to monitor progress." + ) + return None + + +def print_runs_table( + title: str, + columns: tuple[str, ...], + rows: list[tuple[str | None, ...]], +) -> None: + table = Table(title=title, expand=True) + for col in columns: + table.add_column(col, overflow="fold") + for row in rows: + table.add_row(*row) + console.print(table) diff --git a/hirundo/cli_dataset_qa.py b/hirundo/cli_dataset_qa.py index 0954323b..50c7b2e9 100644 --- a/hirundo/cli_dataset_qa.py +++ b/hirundo/cli_dataset_qa.py @@ -1,9 +1,15 @@ from typing import Annotated import typer -from rich.table import Table -from hirundo._cli_common import console, hirundo_epilog, make_app, validate_run_id +from hirundo._cli_common import ( + console, + hirundo_epilog, + make_app, + print_runs_table, + validate_run_id, + wait_or_notify, +) dataset_qa_app = make_app("dataset-qa", "Launch and monitor Dataset QA runs.") @@ -26,12 +32,9 @@ def dataset_qa_run( run_id = QADataset.launch_qa_run(dataset_id) console.print(f"Dataset QA run started. Run ID: [bold]{run_id}[/bold]") - if wait: - QADataset.check_run_by_id(run_id) - else: - console.print( - "Use [bold]hirundo dataset-qa check[/bold] [italic][/italic] to monitor progress." - ) + results = wait_or_notify(run_id, QADataset.check_run_by_id, "dataset-qa", wait) + if results is not None: + console.print(f"Run results saved to {results.cached_zip_path}") @dataset_qa_app.command("list", epilog=hirundo_epilog) @@ -47,19 +50,20 @@ def dataset_qa_list( from hirundo.dataset_qa import QADataset runs = QADataset.list_runs(archived=archived) - - table = Table(title="Dataset QA Runs:", expand=True) - for col in ("Dataset Name", "Run ID", "Status", "Created At", "Run Args"): - table.add_column(col, overflow="fold") - for run in runs: - table.add_row( - str(run.name), - str(run.run_id), - str(run.status), - run.created_at.isoformat(), - run.run_args.model_dump_json() if run.run_args else None, - ) - console.print(table) + print_runs_table( + "Dataset QA Runs:", + ("Dataset Name", "Run ID", "Status", "Created At", "Run Args"), + [ + ( + str(run.name), + str(run.run_id), + str(run.status), + run.created_at.isoformat(), + run.run_args.model_dump_json() if run.run_args else None, + ) + for run in runs + ], + ) @dataset_qa_app.command("check", epilog=hirundo_epilog) @@ -71,4 +75,6 @@ def dataset_qa_check( """ from hirundo.dataset_qa import QADataset - QADataset.check_run_by_id(validate_run_id(run_id)) + results = QADataset.check_run_by_id(validate_run_id(run_id)) + if results is not None: + console.print(f"Run results saved to {results.cached_zip_path}") diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index 5e4b49e0..4889fc2c 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -1,14 +1,15 @@ from typing import Annotated import typer -from rich.table import Table from hirundo._cli_common import ( console, hirundo_epilog, make_app, + print_runs_table, validate_enum, validate_run_id, + wait_or_notify, ) eval_app = make_app("eval", "Launch and monitor LLM behavior evaluation runs.") @@ -77,12 +78,9 @@ def eval_run( run_id = LlmBehaviorEval.launch_eval_run(model_or_run, run_info) console.print(f"Eval run started. Run ID: [bold]{run_id}[/bold]") - if wait: - LlmBehaviorEval.check_run_by_id(run_id) - else: - console.print( - "Use [bold]hirundo eval check[/bold] [italic][/italic] to monitor progress." - ) + results = wait_or_notify(run_id, LlmBehaviorEval.check_run_by_id, "eval", wait) + if results is not None: + console.print(f"Run results saved to {results.cached_zip_path}") @eval_app.command("list", epilog=hirundo_epilog) @@ -98,19 +96,20 @@ def eval_list( from hirundo.llm_behavior_eval import LlmBehaviorEval runs = LlmBehaviorEval.list_runs(archived=archived) - - table = Table(title="Eval Runs:", expand=True) - for col in ("Run ID", "Name", "Status", "Preset", "Created At"): - table.add_column(col, overflow="fold") - for run in runs: - table.add_row( - str(run.run_id), - str(run.name), - str(run.status), - run.preset_type.value if run.preset_type else None, - run.created_at.isoformat(), - ) - console.print(table) + print_runs_table( + "Eval Runs:", + ("Run ID", "Name", "Status", "Preset", "Created At"), + [ + ( + str(run.run_id), + str(run.name), + str(run.status), + run.preset_type.value if run.preset_type else None, + run.created_at.isoformat(), + ) + for run in runs + ], + ) @eval_app.command("check", epilog=hirundo_epilog) @@ -122,4 +121,6 @@ def eval_check( """ from hirundo.llm_behavior_eval import LlmBehaviorEval - LlmBehaviorEval.check_run_by_id(validate_run_id(run_id)) + results = LlmBehaviorEval.check_run_by_id(validate_run_id(run_id)) + if results is not None: + console.print(f"Run results saved to {results.cached_zip_path}") diff --git a/hirundo/cli_unlearning.py b/hirundo/cli_unlearning.py index 955aaefd..c7df1495 100644 --- a/hirundo/cli_unlearning.py +++ b/hirundo/cli_unlearning.py @@ -1,14 +1,15 @@ from typing import Annotated import typer -from rich.table import Table from hirundo._cli_common import ( console, hirundo_epilog, make_app, + print_runs_table, validate_enum, validate_run_id, + wait_or_notify, ) unlearning_app = make_app("unlearning", "Launch and monitor LLM unlearning runs.") @@ -90,12 +91,7 @@ def unlearning_run( run_id = LlmUnlearningRun.launch(model_id, run_info) console.print(f"Unlearning run started. Run ID: [bold]{run_id}[/bold]") - if wait: - LlmUnlearningRun.check_run_by_id(run_id) - else: - console.print( - "Use [bold]hirundo unlearning check[/bold] [italic][/italic] to monitor progress." - ) + wait_or_notify(run_id, LlmUnlearningRun.check_run_by_id, "unlearning", wait) @unlearning_app.command("list", epilog=hirundo_epilog) @@ -111,18 +107,19 @@ def unlearning_list( from hirundo.unlearning_llm import LlmUnlearningRun runs = LlmUnlearningRun.list(archived=archived) - - table = Table(title="Unlearning Runs:", expand=True) - for col in ("Name", "Run ID", "Status", "Created At"): - table.add_column(col, overflow="fold") - for run in runs: - table.add_row( - str(run.name), - str(run.run_id), - str(run.status), - run.created_at.isoformat(), - ) - console.print(table) + print_runs_table( + "Unlearning Runs:", + ("Name", "Run ID", "Status", "Created At"), + [ + ( + str(run.name), + str(run.run_id), + str(run.status), + run.created_at.isoformat(), + ) + for run in runs + ], + ) @unlearning_app.command("check", epilog=hirundo_epilog) From 63c27230b7ec36c26c4369bb2fbe70c45003cda0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:05:49 +0000 Subject: [PATCH 08/13] fix(cli): use run.run_id instead of run.id in legacy list-runs table The "Run ID" column was displaying the numeric DB id (run.id) instead of the string run identifier (run.run_id), inconsistent with the dataset-qa list command. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hirundo/cli.py b/hirundo/cli.py index ea4d841e..3c84cb7e 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -227,7 +227,7 @@ def list_runs(): for run in runs: table.add_row( str(run.name), - str(run.id), + str(run.run_id), str(run.status), run.created_at.isoformat(), run.run_args.model_dump_json() if run.run_args else None, From cc2e8bfdc845b31694e260d9218aa388572cda47 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:06:36 +0000 Subject: [PATCH 09/13] fix(cli): validate --source-run-id in eval run command Applies the same validate_run_id() check used by eval check/unlearning check so invalid run ID formats are rejected consistently before launching. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli_eval.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hirundo/cli_eval.py b/hirundo/cli_eval.py index 4889fc2c..6ddc7863 100644 --- a/hirundo/cli_eval.py +++ b/hirundo/cli_eval.py @@ -66,6 +66,9 @@ def eval_run( ) raise typer.Exit(code=1) + if source_run_id is not None: + source_run_id = validate_run_id(source_run_id) + preset_type = validate_enum(preset, PresetType, "preset") model_or_run = ModelOrRun.MODEL if model_id is not None else ModelOrRun.RUN run_info = EvalRunInfo( From 67dc58a019a59395068cf8db3511ebdf04584cb7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:12:51 +0000 Subject: [PATCH 10/13] fix(cli): use fullmatch for run ID validation to reject trailing newlines re.match with $ allows a single trailing newline through; fullmatch requires the entire string to consist of only the allowed characters. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index 24d9738f..a306ba66 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -30,7 +30,7 @@ def make_app(name: str, help_text: str) -> typer.Typer: def validate_run_id(run_id: str) -> str: - if not _RUN_ID_RE.match(run_id): + if not _RUN_ID_RE.fullmatch(run_id): console.print( f"[red]Invalid run ID '{run_id}'. " "Run IDs may only contain alphanumeric characters, hyphens, and underscores.[/red]" From 474d44afd1bec7cb602c2fa6171beda374e6c510 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:14:59 +0000 Subject: [PATCH 11/13] fix(cli): type validate_enum enum_cls as type[Enum] for basedpyright bare `type` is not iterable; `type[Enum]` lets basedpyright know the class supports iteration via EnumMeta. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index a306ba66..0c1e1b2c 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -1,6 +1,7 @@ import re import sys from collections.abc import Callable +from enum import Enum from typing import Any import typer @@ -39,7 +40,7 @@ def validate_run_id(run_id: str) -> str: return run_id -def validate_enum(value: str, enum_cls: type, label: str) -> Any: +def validate_enum(value: str, enum_cls: type[Enum], label: str) -> Any: try: return enum_cls(value.upper()) except ValueError: From 95812ac67919ef014cda46c8d8d02930abb4d014 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 9 May 2026 22:17:57 +0000 Subject: [PATCH 12/13] refactor(cli): use print_runs_table in legacy list-runs command Replaces inline Rich Table construction with the shared print_runs_table helper, removing the last duplication between list-runs and dataset-qa list. https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/cli.py | 41 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/hirundo/cli.py b/hirundo/cli.py index 3c84cb7e..1260205a 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -5,9 +5,8 @@ from urllib.parse import urlparse import typer -from rich.table import Table -from hirundo._cli_common import console, docs, hirundo_epilog +from hirundo._cli_common import docs, hirundo_epilog, print_runs_table from hirundo._env import API_HOST, EnvLocation from hirundo.cli_dataset_qa import dataset_qa_app from hirundo.cli_eval import eval_app @@ -207,32 +206,20 @@ def list_runs(): from hirundo.dataset_qa import QADataset runs = QADataset.list_runs() - - table = Table( - title="Runs:", - expand=True, - ) - cols = ( - "Dataset name", - "Run ID", - "Status", - "Created At", - "Run Args", + print_runs_table( + "Runs:", + ("Dataset name", "Run ID", "Status", "Created At", "Run Args"), + [ + ( + str(run.name), + str(run.run_id), + str(run.status), + run.created_at.isoformat(), + run.run_args.model_dump_json() if run.run_args else None, + ) + for run in runs + ], ) - for col in cols: - table.add_column( - col, - overflow="fold", - ) - for run in runs: - table.add_row( - str(run.name), - str(run.run_id), - str(run.status), - run.created_at.isoformat(), - run.run_args.model_dump_json() if run.run_args else None, - ) - console.print(table) typer_click_object = typer.main.get_command(app) From 9a4feff51c1f7a2014fe3bcd81ab6a754959ac1c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 18:20:08 +0000 Subject: [PATCH 13/13] refactor(cli): clean up duplication, naming, and add unit tests - Rename loop variable `e` -> `member` in validate_enum - Delegate legacy list-runs to dataset_qa_list to eliminate duplicate table rendering - Add validate_run_id + None guard to legacy check-run for consistency - Add tests/test_cli_common.py covering validate_run_id (valid/invalid cases) and wait_or_notify (both wait=True/False branches) https://claude.ai/code/session_01QGjQJb8G8DM8UaYt66Jewt --- hirundo/_cli_common.py | 2 +- hirundo/cli.py | 27 +++++------------------ tests/test_cli_common.py | 47 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 tests/test_cli_common.py diff --git a/hirundo/_cli_common.py b/hirundo/_cli_common.py index 0c1e1b2c..c2557fb7 100644 --- a/hirundo/_cli_common.py +++ b/hirundo/_cli_common.py @@ -44,7 +44,7 @@ def validate_enum(value: str, enum_cls: type[Enum], label: str) -> Any: try: return enum_cls(value.upper()) except ValueError: - valid = ", ".join(e.value for e in enum_cls) + valid = ", ".join(member.value for member in enum_cls) console.print(f"[red]Invalid {label} '{value}'. Valid options: {valid}[/red]") raise typer.Exit(code=1) from None diff --git a/hirundo/cli.py b/hirundo/cli.py index 1260205a..0744df49 100644 --- a/hirundo/cli.py +++ b/hirundo/cli.py @@ -6,9 +6,9 @@ import typer -from hirundo._cli_common import docs, hirundo_epilog, print_runs_table +from hirundo._cli_common import docs, hirundo_epilog, validate_run_id from hirundo._env import API_HOST, EnvLocation -from hirundo.cli_dataset_qa import dataset_qa_app +from hirundo.cli_dataset_qa import dataset_qa_app, dataset_qa_list from hirundo.cli_eval import eval_app from hirundo.cli_unlearning import unlearning_app @@ -194,8 +194,9 @@ def check_run( """ from hirundo.dataset_qa import QADataset - results = QADataset.check_run_by_id(run_id) - print(f"Run results saved to {results.cached_zip_path}") + results = QADataset.check_run_by_id(validate_run_id(run_id)) + if results is not None: + print(f"Run results saved to {results.cached_zip_path}") @app.command("list-runs", epilog=hirundo_epilog) @@ -203,23 +204,7 @@ def list_runs(): """ List all runs available. """ - from hirundo.dataset_qa import QADataset - - runs = QADataset.list_runs() - print_runs_table( - "Runs:", - ("Dataset name", "Run ID", "Status", "Created At", "Run Args"), - [ - ( - str(run.name), - str(run.run_id), - str(run.status), - run.created_at.isoformat(), - run.run_args.model_dump_json() if run.run_args else None, - ) - for run in runs - ], - ) + dataset_qa_list(archived=False) typer_click_object = typer.main.get_command(app) diff --git a/tests/test_cli_common.py b/tests/test_cli_common.py new file mode 100644 index 00000000..3b18d59d --- /dev/null +++ b/tests/test_cli_common.py @@ -0,0 +1,47 @@ +from unittest.mock import MagicMock, patch + +import hirundo._cli_common as cli_common # noqa: E402 +import pytest +import typer +from hirundo._cli_common import validate_run_id, wait_or_notify + + +class TestValidateRunId: + def test_valid_id_returned_unchanged(self): + assert validate_run_id("abc-123_XYZ") == "abc-123_XYZ" + + @pytest.mark.parametrize( + "bad_id", ["run/id", "run\\id", "run id", "run\nid", "run.id", ""] + ) + def test_invalid_id_exits(self, bad_id): + with pytest.raises(typer.Exit) as exc: + validate_run_id(bad_id) + assert exc.value.exit_code == 1 + + def test_invalid_id_prints_message(self, bad_id="bad id"): + with ( + patch.object(cli_common.console, "print") as mock_print, + pytest.raises(typer.Exit), + ): + validate_run_id(bad_id) + output = mock_print.call_args[0][0] + assert "bad id" in output + assert "may only contain" in output + + +class TestWaitOrNotify: + def test_wait_true_calls_check_fn_and_returns_result(self): + check_fn = MagicMock(return_value="result") + assert wait_or_notify("run-1", check_fn, "dataset-qa", wait=True) == "result" + check_fn.assert_called_once_with("run-1") + + def test_wait_false_returns_none_without_calling_check_fn(self): + check_fn = MagicMock() + assert wait_or_notify("run-1", check_fn, "dataset-qa", wait=False) is None + check_fn.assert_not_called() + + def test_wait_false_prints_check_hint(self): + with patch.object(cli_common.console, "print") as mock_print: + wait_or_notify("run-1", MagicMock(), "dataset-qa", wait=False) + output = mock_print.call_args[0][0] + assert "dataset-qa check" in output