From d38b340bc0778f8b169cd47a502b6fe8caafc17a Mon Sep 17 00:00:00 2001 From: Noza23 Date: Sun, 16 Mar 2025 16:18:42 +0100 Subject: [PATCH 1/2] feat: add readme --- README.md | 112 ++++++++++++++++++++++++++++++++++++++++ dataserious/__init__.py | 3 +- 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fc66078..33826da 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,115 @@ # DataSerious + ![Tests](https://github.com/Noza23/dataserious/actions/workflows/tests.yaml/badge.svg) [![codecov](https://codecov.io/gh/Noza23/dataserious/graph/badge.svg?token=m9yHQyL0sQ)](https://codecov.io/gh/Noza23/dataserious) + +`dataserious` is a Python package that enhances dataclasses with type validation, serialization, and search space generation. It builds on top of the standard `dataclasses` module to provide additional functionality for configuration management, it only has a single dependency on `pyyaml` for YAML support. + +## Features + +- **Type Validation**: Ensures that the attributes of the dataclass instances match their type annotations. +- **Serialization**: Supports serialization to and from JSON and YAML formats. +- **Search Space Generation**: Generates search trees for hyperparameter tuning in grid and random search. + +## Installation + +You can install `dataserious` with YAML support using pip. + +```sh +pip install "dataserious[yaml] @ git+https://github.com/Noza23/dataserious.git" +``` +## Usage + +### Defining a Configuration Class + +To define a configuration class, inherit from `BaseConfig` and use `ConfigField` for fields that require additional metadata. + +```python +from dataserious import BaseConfig, ConfigField + +class ModelConfig(BaseConfig): + name: str + """Name of the model.""" + n_layers: int = ConfigField(searchable=True, description="Number of layers in the model.") + n_heads: int = ConfigField(searchable=True) + """Number of heads in the model.""" +``` + +### Creating Template Configurations +```python +ModelConfig.to_schema() +ModelConfig.schema_to_yaml("schema.yaml") +``` +```yaml +name: 'str: Name of the model.' +n_layers: 'int: Number of layers in the model.' +n_heads: 'int: Number of heads in the model.' +``` + +### Loading and Saving Configurations + +You can load and save configurations in JSON and YAML formats. + +```python +config = ModelConfig(name="GPT", n_layers=12, n_heads=12) +config.to_yaml("config.yaml") +loaded_config = ModelConfig.from_yaml("config.yaml") +``` + +### Generating Search Trees + +Generate search trees for hyperparameter tuning. + +```python +print(config.to_search_tree()) +config.search_tree_to_yaml("search_tree.yaml") +``` + +```yaml +n_layers: +- int +n_heads: +- int +``` + +### Serving Grid and Random Search Spaces + +1. Filled out search tree *test.yaml*: + ```yaml + n_layers: + - 12 + - 24 + - 36 + n_heads: + - 12 + - 24 + - 36 + ``` +2. Loading the search tree and generating search spaces: + ```python + configs = config.get_configs_grid_from_path("test.yaml") + configs_random = config.get_configs_random_from_path("test.yaml", n=2, seed=42) + ``` + +3. Resulting Configs: + ```python + [print(json.dumps(config.to_dict(), indent=4)) for config in configs] + print("\nRandom configs:") + [print(json.dumps(config.to_dict(), indent=4)) for config in configs_random] + ``` + + +### Validating Configurations + +Ensure that the configurations match the expected types. + +```python +try: + config = ModelConfig(name="GPT", n_layers="twelve", n_heads=12) +except TypeError as e: + print(e) +``` + +## Contributing + +Contributions are welcome! Please read the contributing guidelines for more information. diff --git a/dataserious/__init__.py b/dataserious/__init__.py index a4823c8..c387964 100644 --- a/dataserious/__init__.py +++ b/dataserious/__init__.py @@ -1,5 +1,6 @@ """Python dataclasses enhanced with type validation and serialization.""" from .base import BaseConfig +from .fields import ConfigField -__all__ = ["BaseConfig"] +__all__ = ["BaseConfig", "ConfigField"] From fc4e24bdcd318b15837436787536445cb485a504 Mon Sep 17 00:00:00 2001 From: Noza23 Date: Sun, 16 Mar 2025 16:22:41 +0100 Subject: [PATCH 2/2] fix: contributions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 33826da..2a10af1 100644 --- a/README.md +++ b/README.md @@ -112,4 +112,4 @@ except TypeError as e: ## Contributing -Contributions are welcome! Please read the contributing guidelines for more information. +Contributions are welcome!