Hirundo-io · benglewis · Feb 4, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
@@ -1,2 +1,2 @@
 watch_file uv.lock
-uv sync --all-extras && source .venv/bin/activate
+uv sync --group dev && source .venv/bin/activate
@@ -75,4 +75,4 @@ repos:
     hooks:
       - id: uv-lock
       - id: uv-sync
-        args: ["--extra", "dev", "--extra", "docs", "--extra", "pandas", "--extra", "polars", "--extra", "transformers"]
+        args: ["--group", "dev"]
@@ -0,0 +1,47 @@
+# Repository Guidelines
+
+## Instructions
+
+- Always use context7 when I need code generation, setup or configuration steps, or
+  library/API documentation. This means you should automatically use the Context7 MCP
+  tools to resolve library id and get library docs without me having to explicitly ask.
+
+## Project Structure & Module Organization
+
+- `hirundo/` holds the SDK source (CLI entry point is `hirundo.cli:app`).
+- `tests/` contains pytest-based test coverage.
+- `docs/` and `source/` contain Sphinx documentation assets.
+- `notebooks/` and `on_prem_test_notebook.ipynb` provide example workflows.
+- `requirements/` stores compiled dependency sets (for dev, docs, pandas, polars, transformers).
+
+## Build, Test, and Development Commands
+
+- `uv sync --group dev`: fast dependency sync with extras.
+- `ruff check` / `ruff format`: lint and auto-format (run before PRs).
+- `pytest`: run the test suite.
+- `python -m build`: build the package artifacts.
+- `pre-commit install`: enable git hooks (optional, but recommended).
+
+## Coding Style & Naming Conventions
+
+- Python 3.10+ codebase, 4-space indentation, line length 88 (Ruff defaults).
+- Follow Ruff linting rules (`pyproject.toml`), with tests allowing `assert` usage.
+- Prefer descriptive names; avoid short, cryptic identifiers in new code.
+- Avoid 1-3 character variable names in new or refactored code. Use descriptive names
+  even in small scopes.
+
+## Testing Guidelines
+
+- Frameworks: `pytest` and `pytest-asyncio`.
+- Place tests in `tests/`; name files `test_*.py`.
+- Run locally with `pytest` before opening a PR (CI runs lint + integration tests).
+
+## Commit & Pull Request Guidelines
+
+- Recent commit history favors `SDK-<id>: <summary>` (e.g., `SDK-78: Migrate to basedpyright`).
+- Include issue/PR references when available (e.g., `(#190)`).
+- PRs should describe changes clearly and confirm `ruff check` and `ruff format` passed.
+
+## Security & Configuration Tips
+
+- Supported Python versions: CPython 3.10–3.13.
@@ -3,6 +3,7 @@
 The Hirundo Python SDK lets you:
 
 - Launch and monitor LLM behavior unlearning runs.
+- Run LLM behavior evaluations for bias, hallucination, and prompt injection.
 - Run dataset QA for ML datasets (classification, object detection, and more).
 - Fetch QA results as `pandas` or `polars` DataFrames.
 
@@ -22,7 +23,7 @@ pip install hirundo
 Optional extras:
 
 - LLM behavior unlearning (Transformers + PEFT): `pip install hirundo[transformers]`
-- Dataset QA results as DataFrames: `pip install hirundo[pandas]` or `pip install hirundo[polars]`
+- Dataset QA or LLM behavior eval results as DataFrames: `pip install hirundo[pandas]` or `pip install hirundo[polars]`
 
 If you want to install from source, clone this repository and run:
 
@@ -40,120 +41,11 @@ hirundo setup
 
 This writes `API_KEY` (and optionally `API_HOST`) to `.env` in the current directory or `~/.hirundo.conf`.
 
-## Quickstart: LLM behavior unlearning
-
-Make sure you have the `transformers` extra installed (`pip install hirundo[transformers]`).
-
-```python
-from hirundo import (
-    BiasRunInfo,
-    BiasType,
-    HuggingFaceTransformersModel,
-    LlmModel,
-    LlmUnlearningRun,
-)
-
-llm = LlmModel(
-    model_name="Nemotron-Flash-1B",
-    model_source=HuggingFaceTransformersModel(
-        model_name="nvidia/Nemotron-Flash-1B",
-    ),
-)
-llm_id = llm.create()
-
-run_id = LlmUnlearningRun.launch(
-    llm_id,
-    BiasRunInfo(bias_type=BiasType.ALL),
-)
-
-result = LlmUnlearningRun.check_run(run_id)
-new_adapter = llm.get_hf_pipeline_for_run(run_id)
-```
-
-## Quickstart: Dataset QA
-
-### Classification
-
-```python
-import json
-import os
-
-from hirundo import (
-    HirundoCSV,
-    LabelingType,
-    QADataset,
-    StorageConfig,
-    StorageGCP,
-    StorageTypes,
-)
-
-gcp_bucket = StorageGCP(
-    bucket_name="cifar100bucket",
-    project="Hirundo-global",
-    credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
-)
-
-test_dataset = QADataset(
-    name="TEST-GCP cifar 100 classification dataset",
-    labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
-    storage_config=StorageConfig(
-        name="cifar100bucket",
-        type=StorageTypes.GCP,
-        gcp=gcp_bucket,
-    ),
-    data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
-    labeling_info=HirundoCSV(
-        csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
-    ),
-    classes=cifar100_classes,
-)
-
-test_dataset.run_qa()
-results = test_dataset.check_run()
-print(results)
-```
+## Quickstart examples
 
-### Object detection
-
-```python
-from hirundo import (
-    GitRepo,
-    HirundoCSV,
-    LabelingType,
-    QADataset,
-    StorageConfig,
-    StorageGit,
-    StorageTypes,
-)
-
-git_storage = StorageGit(
-    repo=GitRepo(
-        name="BDD-100k-validation-dataset",
-        repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
-    ),
-    branch="main",
-)
-
-test_dataset = QADataset(
-    name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
-    labeling_type=LabelingType.OBJECT_DETECTION,
-    storage_config=StorageConfig(
-        name="BDD-100k-validation-dataset",
-        type=StorageTypes.GIT,
-        git=git_storage,
-    ),
-    data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
-    labeling_info=HirundoCSV(
-        csv_url=git_storage.get_url(
-            path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
-        ),
-    ),
-)
-
-test_dataset.run_qa()
-results = test_dataset.check_run()
-print(results)
-```
+The full quickstart examples now live in the Sphinx docs so they can be linted,
+formatted, and type-checked as real Python files. See the examples embedded in
+`docs/index.rst`, which are sourced from `docs/*.py` files.
 
 ## Supported dataset storage
 

@@ -0,0 +1,41 @@
+"""Examples for docs/index.rst literalinclude blocks."""
+
+from hirundo import (
+    GitRepo,
+    HirundoCSV,
+    LabelingType,
+    QADataset,
+    StorageConfig,
+    StorageGit,
+    StorageTypes,
+)
+
+git_storage = StorageGit(
+    repo=GitRepo(
+        name="BDD-100k-validation-dataset",
+        repository_url=(
+            "https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only"
+        ),
+    ),
+    branch="main",
+)
+
+test_dataset = QADataset(
+    name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
+    labeling_type=LabelingType.OBJECT_DETECTION,
+    storage_config=StorageConfig(
+        name="BDD-100k-validation-dataset",
+        type=StorageTypes.GIT,
+        git=git_storage,
+    ),
+    data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
+    labeling_info=HirundoCSV(
+        csv_url=git_storage.get_url(
+            path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
+        ),
+    ),
+)
+
+test_dataset.run_qa()
+results = test_dataset.check_run()
+print(results)
@@ -0,0 +1,10 @@
+.. meta::
+   :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none'
+
+hirundo.llm_behavior_eval module
+=============================
+
+.. automodule:: hirundo.llm_behavior_eval
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,10 @@
+.. meta::
+   :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none'
+
+hirundo.llm_behavior_eval_results module
+=============================
+
+.. automodule:: hirundo.llm_behavior_eval_results
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,10 @@
+.. meta::
+   :http-equiv=Content-Security-Policy: default-src 'self', frame-ancestors 'none'
+
+hirundo.llm_bias_type module
+=============================
+
+.. automodule:: hirundo.llm_bias_type
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -17,6 +17,9 @@ Submodules
    hirundo.git
    hirundo.labeling
    hirundo.logger
+   hirundo.llm_behavior_eval
+   hirundo.llm_behavior_eval_results
+   hirundo.llm_bias_type
    hirundo.storage
    hirundo.unlearning_llm
    hirundo.unzip

@@ -13,6 +13,7 @@ Welcome to the ``hirundo`` client library documentation. This SDK connects to th
 Hirundo platform and provides APIs for:
 
 - LLM behavior unlearning runs (reducing bias, prompt injections and other unwanted behaviors).
+- LLM behavior eval runs (measuring bias, hallucination, prompt injection, and more).
 - Dataset QA for machine learning datasets.
 
 Getting started
@@ -45,6 +46,17 @@ Example:
 .. literalinclude:: llm_unlearning_example.py
    :language: python
 
+LLM behavior eval
+-----------------
+
+Run standardized evaluations over an LLM or an unlearning run to quantify
+behavior changes (bias, hallucination, prompt injections, and more).
+
+Example:
+
+.. literalinclude:: llm_behavior_eval_example.py
+   :language: python
+
 Dataset QA
 ----------
 
@@ -63,9 +75,14 @@ Supported storage backends include:
 - Google Cloud Storage (GCS)
 - Git repositories with LFS (GitHub, Hugging Face)
 
-Example:
+Classification example:
+
+.. literalinclude:: dataset_qa_classification_example.py
+   :language: python
+
+Object detection example:
 
-.. literalinclude:: dataset_qa_example.py
+.. literalinclude:: dataset_qa_object_detection_example.py
    :language: python
 
 API reference

@@ -0,0 +1,32 @@
+"""Examples for docs/index.rst literalinclude blocks."""
+
+from hirundo import (
+    BBQBiasType,
+    EvalRunInfo,
+    HuggingFaceTransformersModel,
+    LlmBehaviorEval,
+    LlmModel,
+    ModelOrRun,
+    PresetType,
+)
+
+llm = LlmModel(
+    model_name="Nemotron-Flash-1B",
+    model_source=HuggingFaceTransformersModel(
+        model_name="nvidia/Nemotron-Flash-1B",
+    ),
+)
+llm_id = llm.create()
+
+run_id = LlmBehaviorEval.launch_eval_run(
+    ModelOrRun.MODEL,
+    EvalRunInfo(
+        name="Nemotron BBQ bias eval",
+        model_id=llm_id,
+        preset_type=PresetType.BBQ_BIAS,
+        bias_type=BBQBiasType.ALL,
+    ),
+)
+
+results = LlmBehaviorEval.check_run_by_id(run_id)
+print(results.summary_brief)
@@ -1,8 +1,8 @@
 """Examples for docs/index.rst literalinclude blocks."""
 
 from hirundo import (
+    BBQBiasType,
     BiasRunInfo,
-    BiasType,
     HuggingFaceTransformersModel,
     LlmModel,
     LlmUnlearningRun,
@@ -17,7 +17,7 @@
 llm_id = llm.create()
 run_id = LlmUnlearningRun.launch(
     llm_id,
-    BiasRunInfo(bias_type=BiasType.ALL),
+    BiasRunInfo(bias_type=BBQBiasType.ALL),
 )
 result = LlmUnlearningRun.check_run(run_id)
 new_adapter = llm.get_hf_pipeline_for_run(run_id)