generative-computing · planetf1 · Mar 16, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 11, 2026
@@ -456,3 +456,4 @@ pyrightconfig.json
 # Generated API documentation (built by tooling/docs-autogen/)
 docs/docs/api/
 docs/docs/api-reference.mdx
+.venv-docs-autogen/
@@ -89,10 +89,9 @@ Tests/examples automatically skip if system lacks required resources. Heavy exam
 ## 4. Coding Standards
 - **Types required** on all core functions
 - **Docstrings are prompts** — be specific, the LLM reads them
-- **Google-style docstrings**
+- **Google-style docstrings** — `Args:` on the **class docstring only**; `__init__` gets a single summary sentence. Add `Attributes:` only when a stored value differs in type/behaviour from its constructor input (type transforms, computed values, class constants). See CONTRIBUTING.md for a full example.
 - **Ruff** for linting/formatting
 - Use `...` in `@generative` function bodies
-- Use `...` in `@generative` function bodies
 - Prefer primitives over classes
 - **Friendly Dependency Errors**: Wraps optional backend imports in `try/except ImportError` with a helpful message (e.g., "Please pip install mellea[hf]"). See `mellea/stdlib/session.py` for examples.
 

@@ -132,21 +132,77 @@ Use **[Google-style docstrings](https://google.github.io/styleguide/pyguide.html
 ```python
 def extract_entities(text: str, entity_types: list[str]) -> dict[str, list[str]]:
     """Extract named entities from text.
-    
+
     Args:
         text: The input text to analyze.
         entity_types: List of entity types to extract (e.g., ["PERSON", "ORG"]).
-    
+
     Returns:
         Dictionary mapping entity types to lists of extracted entities.
-    
+
     Example:
         >>> extract_entities("Alice works at IBM", ["PERSON", "ORG"])
         {"PERSON": ["Alice"], "ORG": ["IBM"]}
     """
     ...
 ```
 
+#### Class and `__init__` docstrings
+
+Place `Args:` on the **class docstring only**. The `__init__` docstring should be a
+single summary sentence with no `Args:` section. This keeps hover docs clean in IDEs
+and ensures the docs pipeline (which skips `__init__`) publishes the full parameter
+list.
+
+```python
+class MyComponent(Component[str]):
+    """A component that does something useful.
+
+    Args:
+        name (str): Human-readable label for this component.
+        max_tokens (int): Upper bound on generated tokens.
+    """
+
+    def __init__(self, name: str, max_tokens: int = 256) -> None:
+        """Initialize MyComponent with a name and token budget."""
+        self.name = name
+        self.max_tokens = max_tokens
+```
+
+Add an `Attributes:` section on the class docstring **only** when a stored attribute
+differs in type or behaviour from the constructor input — for example, when a `str`
+argument is wrapped into a `CBlock`, or when a class-level constant is relevant to
+callers. Pure-echo entries that repeat `Args:` verbatim should be omitted.
+
+#### Validating docstrings
+
+Run the coverage and quality audit to check your changes before committing:
+
+```bash
+# Build fresh API docs then audit quality (documented symbols only)
+uv run python tooling/docs-autogen/generate-ast.py
+uv run python tooling/docs-autogen/audit_coverage.py \
+    --quality --no-methods --docs-dir docs/docs/api
+```
+
+Key checks the audit enforces:
+
+| Check | Meaning |
+|-------|---------|
+| `no_class_args` | Class has typed `__init__` params but no `Args:` on the class docstring |
+| `duplicate_init_args` | `Args:` appears in both the class and `__init__` docstrings (Option C violation) |
+| `no_args` | Standalone function has params but no `Args:` section |
+| `no_returns` | Function has a non-trivial return annotation but no `Returns:` section |
+| `param_mismatch` | `Args:` documents names not present in the actual signature |
+
+**IDE hover verification** — open any of these existing classes in VS Code and hover
+over the class name or a constructor call to confirm the hover card shows `Args:` once
+with no duplication:
+
+- `ReactInitiator` ([mellea/stdlib/components/react.py](mellea/stdlib/components/react.py)) — `Args:` + `Attributes:` (`goal: str → CBlock` transform)
+- `BaseSamplingStrategy` ([mellea/stdlib/sampling/base.py](mellea/stdlib/sampling/base.py)) — `Args:` only, no `Attributes:` (pure-echo removed)
+- `TokenToFloat` ([mellea/formatters/granite/intrinsics/output.py](mellea/formatters/granite/intrinsics/output.py)) — `Attributes:` for `YAML_NAME` class constant
+
 ### Code Style
 
 - **Ruff** for linting and formatting

@@ -1,3 +1,12 @@
+"""Typer sub-application for the ``m alora`` command group.
+
+Provides three commands: ``train`` (fine-tune a base causal language model on a JSONL
+dataset to produce a LoRA or aLoRA adapter), ``upload`` (push adapter weights to
+Hugging Face Hub, optionally packaging the adapter as an intrinsic with an
+``io.yaml`` configuration), and ``add-readme`` (use an LLM to auto-generate and
+upload an ``INTRINSIC_README.md`` for the trained adapter).
+"""
+
 import json
 import os
 import tempfile
@@ -22,7 +31,21 @@ def alora_train(
     max_length: int = typer.Option(1024, help="Max sequence length"),
     grad_accum: int = typer.Option(4, help="Gradient accumulation steps"),
 ):
-    """Train an aLoRA or LoRA model on your dataset."""
+    """Train an aLoRA or LoRA model on your dataset.
+
+    Args:
+        datafile: JSONL file with item/label pairs for training.
+        basemodel: Base model ID or path.
+        outfile: Path to save adapter weights.
+        promptfile: Path to load the prompt format file.
+        adapter: Adapter type; ``"alora"`` or ``"lora"``.
+        device: Device to train on: ``"auto"``, ``"cpu"``, ``"cuda"``, or ``"mps"``.
+        epochs: Number of training epochs.
+        learning_rate: Learning rate for the optimizer.
+        batch_size: Per-device training batch size.
+        max_length: Maximum sequence length.
+        grad_accum: Number of gradient accumulation steps.
+    """
     from cli.alora.train import train_model
 
     train_model(
@@ -56,7 +79,17 @@ def alora_upload(
         "processing if the model is invoked as an intrinsic.",
     ),
 ):
-    """Upload trained adapter to remote model registry."""
+    """Upload trained adapter to remote model registry.
+
+    Args:
+        weight_path: Path to saved adapter weights directory.
+        name: Destination model name on Hugging Face Hub
+            (e.g. ``"acme/carbchecker-alora"``).
+        intrinsic: If ``True``, the adapter implements an intrinsic and an
+            ``io.yaml`` file must also be provided.
+        io_yaml: Path to the ``io.yaml`` file configuring input/output processing
+            when the model is invoked as an intrinsic.
+    """
     from cli.alora.intrinsic_uploader import upload_intrinsic
     from cli.alora.upload import upload_model
 
@@ -105,7 +138,20 @@ def alora_add_readme(
         "processing if the model is invoked as an intrinsic.",
     ),
 ):
-    """Generate and upload an INTRINSIC_README.md for a trained adapter."""
+    """Generate and upload an INTRINSIC_README.md for a trained adapter.
+
+    Args:
+        datafile: JSONL file with item/label pairs used to train the adapter.
+        basemodel: Base model ID or path.
+        promptfile: Path to the prompt format file, or ``None``.
+        name: Destination model name on Hugging Face Hub.
+        hints: Path to a file containing additional domain hints, or ``None``.
+        io_yaml: Path to the ``io.yaml`` intrinsic configuration file, or ``None``.
+
+    Raises:
+        OSError: If no Hugging Face authentication token is found.
+        SystemExit: If the user declines to upload the generated README.
+    """
     from huggingface_hub import HfFolder, create_repo, upload_file
 
     from cli.alora.readme_generator import generate_readme

@@ -1,3 +1,12 @@
+"""Upload a trained adapter to Hugging Face Hub in the intrinsic directory layout.
+
+Creates or updates a private Hugging Face repository and uploads adapter weights
+into a ``<intrinsic_name>/<base_model>/<adapter_type>`` sub-directory, together with
+the required ``io.yaml`` configuration file. If an ``INTRINSIC_README.md`` exists in
+the weight directory it is also uploaded as the repository's root ``README.md``.
+Requires an authenticated Hugging Face token obtained via ``huggingface-cli login``.
+"""
+
 import os
 import shutil
 import tempfile
@@ -15,6 +24,36 @@ def upload_intrinsic(
     io_yaml: str,
     private: bool = True,
 ):
+    """Upload an adapter to Hugging Face Hub using the intrinsic directory layout.
+
+    Creates or updates a private Hugging Face repository and uploads adapter
+    weights into a ``<intrinsic_name>/<base_model>/<adapter_type>`` sub-directory,
+    together with the ``io.yaml`` configuration file. If an
+    ``INTRINSIC_README.md`` exists in the weight directory it is also uploaded
+    as the repository root ``README.md``.
+
+    Args:
+        weight_path (str): Local directory containing the adapter weights
+            (output of ``save_pretrained``).
+        model_name (str): Target Hugging Face repository name in
+            ``"<userid>/<intrinsic_name>"`` format (e.g. ``"acme/carbchecker-alora"``).
+        base_model (str): Base model ID or path (e.g.
+            ``"ibm-granite/granite-3.3-2b-instruct"``). Must contain at most
+            one ``"/"`` separator.
+        type (Literal["lora", "alora"]): Adapter type, used as the leaf
+            directory name in the repository layout.
+        io_yaml (str): Path to the ``io.yaml`` configuration file for
+            intrinsic input/output processing.
+        private (bool): Whether the repository should be private. Currently
+            only ``True`` is supported.
+
+    Raises:
+        AssertionError: If ``weight_path`` or ``io_yaml`` do not exist, if
+            ``private`` is ``False``, if ``base_model`` contains more than one
+            ``"/"`` separator, or if ``model_name`` does not contain exactly
+            one ``"/"`` separator.
+        OSError: If no Hugging Face authentication token is found.
+    """
     try:
         assert os.path.exists(weight_path)
         assert os.path.exists(io_yaml)

@@ -1,3 +1,13 @@
+"""LLM-assisted generator for adapter intrinsic README files.
+
+Uses a ``MelleaSession`` with rejection sampling to derive README template variables
+from a JSONL training dataset — including a high-level description, the inferred
+Python argument list, and Jinja2-renderable sample rows. Validates the generated
+output with deterministic requirements (correct naming conventions, syntactically
+valid argument lists) before rendering the final ``INTRINSIC_README.md`` via a
+Jinja2 template.
+"""
+
 import ast
 import json
 import os
@@ -12,6 +22,18 @@
 
 
 class ReadmeTemplateVars(BaseModel):
+    """Pydantic model holding all variables required to render the intrinsic README template.
+
+    Attributes:
+        high_level_description (str): A 2-3 sentence description of what the intrinsic adapter does.
+        dataset_description (str): Brief description of the training dataset contents and format.
+        userid (str): HuggingFace user ID (the namespace portion of the model name).
+        intrinsic_name (str): Short snake_case identifier for the intrinsic (e.g. ``"carbchecker"``).
+        intrinsic_name_camelcase (str): CamelCase version of ``intrinsic_name`` (e.g. ``"CarbChecker"``).
+        arglist (str): Python function argument list with type hints (e.g. ``"description: str"``).
+        arglist_without_type_annotations (str): Argument list without type hints (e.g. ``"description"``).
+    """
+
     high_level_description: str
     dataset_description: str
     userid: str
@@ -119,8 +141,20 @@ def make_readme_jinja_dict(
     """Generate all template variables for the intrinsic README using an LLM.
 
     Loads the first five lines of the JSONL dataset, determines the input structure,
-    and uses m.instruct with deterministic requirements and rejection sampling to
+    and uses ``m.instruct`` with deterministic requirements and rejection sampling to
     generate README template variables.
+
+    Args:
+        m: Active ``MelleaSession`` to use for LLM generation.
+        dataset_path: Path to the JSONL training dataset file.
+        base_model: Base model ID or path used to train the adapter.
+        prompt_file: Path to the prompt format file (empty string if not provided).
+        name: Destination model name on Hugging Face Hub
+            (e.g. ``"acme/carbchecker-alora"``).
+        hints: Optional string of additional domain hints to include in the prompt.
+
+    Returns:
+        Dict of Jinja2 template variables for rendering the ``INTRINSIC_README.md``.
     """
     # Load first 5 lines of the dataset.
     samples = []
@@ -260,8 +294,19 @@ def generate_readme(
 ) -> str:
     """Generate an INTRINSIC_README.md file from the dataset and template.
 
-    Creates a MelleaSession, uses the LLM to generate template variables,
-    renders the Jinja template, and writes the result to output_path.
+    Creates a ``MelleaSession``, uses the LLM to generate template variables,
+    renders the Jinja template, and writes the result to ``output_path``.
+
+    Args:
+        dataset_path: Path to the JSONL training dataset file.
+        base_model: Base model ID or path used to train the adapter.
+        prompt_file: Path to the prompt format file, or ``None``.
+        output_path: Destination path for the generated README file.
+        name: Destination model name on Hugging Face Hub.
+        hints: Optional string of additional domain hints for the LLM.
+
+    Returns:
+        The path to the written output file (same as ``output_path``).
     """
     from jinja2 import Environment, FileSystemLoader