From c02460509217ce70200ab8843a234e9186a12880 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:36:45 +0000
Subject: [PATCH 1/3] Bump authlib in /requirements in the pip group across 1
 directory

Bumps the pip group with 1 update in the /requirements directory: [authlib](https://github.com/authlib/authlib).


Updates `authlib` from 1.3.2 to 1.6.5
- [Release notes](https://github.com/authlib/authlib/releases)
- [Changelog](https://github.com/authlib/authlib/blob/main/docs/changelog.rst)
- [Commits](https://github.com/authlib/authlib/compare/v1.3.2...v1.6.5)

---
updated-dependencies:
- dependency-name: authlib
  dependency-version: 1.6.5
  dependency-type: direct:development
  dependency-group: pip
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements/dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/dev.txt b/requirements/dev.txt
index d3a3437f..993be821 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -8,7 +8,7 @@ anyio==4.4.0
     # via
     #   -c requirements/requirements.txt
     #   httpx
-authlib==1.3.2
+authlib==1.6.5
     # via safety
 backports-tarfile==1.2.0
     # via

From 4f9de58da0e1e4e1ac8bd8480ea3fb4a1f156bf8 Mon Sep 17 00:00:00 2001
From: Ben Lewis <blewis@hirundo.io>
Date: Mon, 13 Oct 2025 21:35:50 +0300
Subject: [PATCH 2/3] chore: bump authlib to 1.6.5 (#170)

---
 pyproject.toml | 1 +
 uv.lock        | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6699df90..b8a84375 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,6 +67,7 @@ dev = [
     "virtualenv>=20.6.6",
     #  ⬆️ Needed for `pre-commit` and locking version for `safety-cli`
     #  version fixes vulnerability GHSA-rqc4-2hc7-8c8v
+    "authlib>=1.6.5",
     "ruff>=0.12.0",
     "bumpver",
     "platformdirs>=4.3.6",
diff --git a/uv.lock b/uv.lock
index 7c40ac24..b53859a4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -63,14 +63,14 @@ wheels = [
 
 [[package]]
 name = "authlib"
-version = "1.6.1"
+version = "1.6.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cryptography" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8e/a1/d8d1c6f8bc922c0b87ae0d933a8ed57be1bef6970894ed79c2852a153cd3/authlib-1.6.1.tar.gz", hash = "sha256:4dffdbb1460ba6ec8c17981a4c67af7d8af131231b5a36a88a1e8c80c111cdfd", size = 159988 }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/3f/1d3bbd0bf23bdd99276d4def22f29c27a914067b4cf66f753ff9b8bbd0f3/authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b", size = 164553 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/58/cc6a08053f822f98f334d38a27687b69c6655fb05cd74a7a5e70a2aeed95/authlib-1.6.1-py2.py3-none-any.whl", hash = "sha256:e9d2031c34c6309373ab845afc24168fe9e93dc52d252631f52642f21f5ed06e", size = 239299 },
+    { url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608 },
 ]
 
 [[package]]

From 804c2f640732e43351882e58b97f1c67c33f48b4 Mon Sep 17 00:00:00 2001
From: Ben Lewis <blewis@hirundo.io>
Date: Wed, 15 Oct 2025 18:12:51 +0300
Subject: [PATCH 3/3] Fix issues with API since domain was renamed to modality

---
 hirundo/__init__.py                           |   4 +-
 hirundo/dataset_qa.py                         |  27 +-
 on-prem/on_prem_audio_ar_test_notebook.ipynb  |   4 +-
 ...on_prem_audio_he_small_test_notebook.ipynb |   4 +-
 on-prem/on_prem_audio_he_test_notebook.ipynb  |   4 +-
 .../on_prem_audio_sanity_test_notebook.ipynb  | 346 +++++++++---------
 tests/speech-to-text/sanity_stt_git_test.py   |   4 +-
 tests/speech-to-text/stt_git_test.py          |   4 +-
 8 files changed, 200 insertions(+), 197 deletions(-)

diff --git a/hirundo/__init__.py b/hirundo/__init__.py
index f0b85d54..96c51f2e 100644
--- a/hirundo/__init__.py
+++ b/hirundo/__init__.py
@@ -5,8 +5,8 @@
 )
 from .dataset_qa import (
     ClassificationRunArgs,
-    Domain,
     HirundoError,
+    ModalityType,
     ObjectDetectionRunArgs,
     QADataset,
     RunArgs,
@@ -43,7 +43,7 @@
     "KeylabsObjSegImages",
     "KeylabsObjSegVideo",
     "QADataset",
-    "Domain",
+    "ModalityType",
     "RunArgs",
     "ClassificationRunArgs",
     "ObjectDetectionRunArgs",
diff --git a/hirundo/dataset_qa.py b/hirundo/dataset_qa.py
index fcdeea68..7360460c 100644
--- a/hirundo/dataset_qa.py
+++ b/hirundo/dataset_qa.py
@@ -128,27 +128,27 @@ class AugmentationName(str, Enum):
     GAUSSIAN_BLUR = "GaussianBlur"
 
 
-class Domain(str, Enum):
+class ModalityType(str, Enum):
     RADAR = "RADAR"
     VISION = "VISION"
     SPEECH = "SPEECH"
     TABULAR = "TABULAR"
 
 
-DOMAIN_TO_SUPPORTED_LABELING_TYPES = {
-    Domain.RADAR: [
+MODALITY_TO_SUPPORTED_LABELING_TYPES = {
+    ModalityType.RADAR: [
         LabelingType.SINGLE_LABEL_CLASSIFICATION,
         LabelingType.OBJECT_DETECTION,
     ],
-    Domain.VISION: [
+    ModalityType.VISION: [
         LabelingType.SINGLE_LABEL_CLASSIFICATION,
         LabelingType.OBJECT_DETECTION,
         LabelingType.OBJECT_SEGMENTATION,
         LabelingType.SEMANTIC_SEGMENTATION,
         LabelingType.PANOPTIC_SEGMENTATION,
     ],
-    Domain.SPEECH: [LabelingType.SPEECH_TO_TEXT],
-    Domain.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION],
+    ModalityType.SPEECH: [LabelingType.SPEECH_TO_TEXT],
+    ModalityType.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION],
 }
 
 
@@ -206,9 +206,9 @@ class QADataset(BaseModel):
     For audio datasets, this field is ignored.
     If no value is provided, all augmentations are applied to vision datasets.
     """
-    domain: Domain = Domain.VISION
+    modality: ModalityType = ModalityType.VISION
     """
-    Used to define the domain of the dataset.
+    Used to define the modality of the dataset.
     Defaults to Image.
     """
 
@@ -221,13 +221,16 @@ class QADataset(BaseModel):
 
     @model_validator(mode="after")
     def validate_dataset(self):
-        if self.domain not in DOMAIN_TO_SUPPORTED_LABELING_TYPES:
+        if self.modality not in MODALITY_TO_SUPPORTED_LABELING_TYPES:
             raise ValueError(
-                f"Domain {self.domain} is not supported. Supported domains are: {list(DOMAIN_TO_SUPPORTED_LABELING_TYPES.keys())}"
+                f"Modality {self.modality} is not supported. Supported modalities are: {list(MODALITY_TO_SUPPORTED_LABELING_TYPES.keys())}"
             )
-        if self.labeling_type not in DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]:
+        if (
+            self.labeling_type
+            not in MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality]
+        ):
             raise ValueError(
-                f"Labeling type {self.labeling_type} is not supported for domain {self.domain}. Supported labeling types are: {DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]}"
+                f"Labeling type {self.labeling_type} is not supported for modality {self.modality}. Supported labeling types are: {MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality]}"
             )
         if self.storage_config is None and self.storage_config_id is None:
             raise ValueError(
diff --git a/on-prem/on_prem_audio_ar_test_notebook.ipynb b/on-prem/on_prem_audio_ar_test_notebook.ipynb
index 45eacffa..f4874c5d 100644
--- a/on-prem/on_prem_audio_ar_test_notebook.ipynb
+++ b/on-prem/on_prem_audio_ar_test_notebook.ipynb
@@ -10,11 +10,11 @@
     "import os\n",
     "\n",
     "from hirundo import (\n",
-    "    Domain,\n",
     "    GitPlainAuth,\n",
     "    GitRepo,\n",
     "    HirundoCSV,\n",
     "    LabelingType,\n",
+    "    ModalityType,\n",
     "    QADataset,\n",
     "    StorageConfig,\n",
     "    StorageGit,\n",
@@ -67,7 +67,7 @@
     ")\n",
     "test_dataset = QADataset(\n",
     "    name=f\"TEST-STT-MASC-dataset{unique_id}\",\n",
-    "    domain=Domain.SPEECH,\n",
+    "    modality=ModalityType.SPEECH,\n",
     "    labeling_type=LabelingType.SPEECH_TO_TEXT,\n",
     "    language=\"ar\",\n",
     "    storage_config=StorageConfig(\n",
diff --git a/on-prem/on_prem_audio_he_small_test_notebook.ipynb b/on-prem/on_prem_audio_he_small_test_notebook.ipynb
index 7546037d..60724719 100644
--- a/on-prem/on_prem_audio_he_small_test_notebook.ipynb
+++ b/on-prem/on_prem_audio_he_small_test_notebook.ipynb
@@ -10,11 +10,11 @@
     "import os\n",
     "\n",
     "from hirundo import (\n",
-    "    Domain,\n",
     "    GitPlainAuth,\n",
     "    GitRepo,\n",
     "    HirundoCSV,\n",
     "    LabelingType,\n",
+    "    ModalityType,\n",
     "    QADataset,\n",
     "    StorageConfig,\n",
     "    StorageGit,\n",
@@ -51,7 +51,7 @@
     ")\n",
     "test_dataset = QADataset(\n",
     "    name=f\"TEST-STT-RoboShaulGolden-dataset{unique_id}\",\n",
-    "    domain=Domain.SPEECH,\n",
+    "    modality=ModalityType.SPEECH,\n",
     "    labeling_type=LabelingType.SPEECH_TO_TEXT,\n",
     "    language=\"he\",\n",
     "    storage_config=StorageConfig(\n",
diff --git a/on-prem/on_prem_audio_he_test_notebook.ipynb b/on-prem/on_prem_audio_he_test_notebook.ipynb
index 636fa5e4..b2f30df9 100644
--- a/on-prem/on_prem_audio_he_test_notebook.ipynb
+++ b/on-prem/on_prem_audio_he_test_notebook.ipynb
@@ -10,11 +10,11 @@
     "import os\n",
     "\n",
     "from hirundo import (\n",
-    "    Domain,\n",
     "    GitPlainAuth,\n",
     "    GitRepo,\n",
     "    HirundoCSV,\n",
     "    LabelingType,\n",
+    "    ModalityType,\n",
     "    QADataset,\n",
     "    StorageConfig,\n",
     "    StorageGit,\n",
@@ -51,7 +51,7 @@
     ")\n",
     "test_dataset = QADataset(\n",
     "    name=f\"TEST-STT-RoboShaul-dataset{unique_id}\",\n",
-    "    domain=Domain.SPEECH,\n",
+    "    modality=ModalityType.SPEECH,\n",
     "    labeling_type=LabelingType.SPEECH_TO_TEXT,\n",
     "    language=\"he\",\n",
     "    storage_config=StorageConfig(\n",
diff --git a/on-prem/on_prem_audio_sanity_test_notebook.ipynb b/on-prem/on_prem_audio_sanity_test_notebook.ipynb
index c41a880e..1315f244 100644
--- a/on-prem/on_prem_audio_sanity_test_notebook.ipynb
+++ b/on-prem/on_prem_audio_sanity_test_notebook.ipynb
@@ -1,181 +1,181 @@
 {
-  "cells": [
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/Users/blewis/Programming/hirundo-python-sdk/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-            "  from .autonotebook import tqdm as notebook_tqdm\n",
-            "Deleted storage integration with ID: 9\n",
-            "Created storage integration with ID: 11\n",
-            "Created dataset with ID: 8\n",
-            "Started the run with ID: 2f7e0a41-75c4-4568-95fc-a177411c5162\n",
-            "Dataset QA run completed successfully: 100%|██████████| 100.0/100.0 [08:35<00:00,  5.16s/it]      "
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "suspects=     audio_segment_id           audio_path  \\\n",
-            "0                   6  wavs/automatic_0012   \n",
-            "1                  20  wavs/automatic_0017   \n",
-            "2                  15  wavs/automatic_0018   \n",
-            "3                  36  wavs/automatic_0020   \n",
-            "4                  37  wavs/automatic_0010   \n",
-            "..                ...                  ...   \n",
-            "120                32  wavs/automatic_0004   \n",
-            "121                34  wavs/automatic_0012   \n",
-            "122                39  wavs/automatic_0017   \n",
-            "123                39  wavs/automatic_0017   \n",
-            "124                 3  wavs/automatic_0017   \n",
-            "\n",
-            "                                            transcript  \\\n",
-            "0                 ‏וגם אם לא תהיה פה השעה בסופו של דבר   \n",
-            "1    ‏פרשה שהתחילה בחקירה שנפתחה יום אחד ב2017 בעצם...   \n",
-            "2    ‏ללשכת ראש הממשלה הדבר הזה נולד רק דרך אילן יש...   \n",
-            "3                                                   ‏ה   \n",
-            "4    ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור...   \n",
-            "..                                                 ...   \n",
-            "120  ‏היום אנחנו יוצאים אה נו לא אני גאון אני יושב ...   \n",
-            "121  ‏הסיפור הזה נותן לנו הצצה לדברים שאנחנו אף פעם...   \n",
-            "122  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...   \n",
-            "123  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...   \n",
-            "124  ‏הוא האיש האחראי לזה שתיק 4000 בכלל קיים ובא ל...   \n",
-            "\n",
-            "     suspect_region_start_index  suspect_region_end_index  \\\n",
-            "0                             0                         6   \n",
-            "1                             0                        12   \n",
-            "2                             0                         9   \n",
-            "3                             0                        -2   \n",
-            "4                           138                       157   \n",
-            "..                          ...                       ...   \n",
-            "120                          20                        32   \n",
-            "121                           0                        10   \n",
-            "122                          17                        33   \n",
-            "123                          58                        -2   \n",
-            "124                           0                         8   \n",
-            "\n",
-            "                                        suspect_region  suspect_level  \\\n",
-            "0               ‏וגם אם » לא תהיה פה השעה בסופו של דבר            1.0   \n",
-            "1    ‏פרשה שהתחילה » בחקירה שנפתחה יום אחד ב2017 בע...            1.0   \n",
-            "2    ‏ללשכת ראש » הממשלה הדבר הזה נולד רק דרך אילן ...            1.0   \n",
-            "3                                                   ‏ה            1.0   \n",
-            "4    ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור...            1.0   \n",
-            "..                                                 ...            ...   \n",
-            "120  ‏היום אנחנו יוצאים אה « נו לא אני » גאון אני י...            0.5   \n",
-            "121  ‏הסיפור הזה » נותן לנו הצצה לדברים שאנחנו אף פ...            0.5   \n",
-            "122  ‏כי במקור זו הייתה « פרשה שקראו לה » פרשת בזק ...            0.5   \n",
-            "123  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...            0.5   \n",
-            "124  ‏הוא האיש » האחראי לזה שתיק 4000 בכלל קיים ובא...            0.5   \n",
-            "\n",
-            "     suspect_score  rank  start_time  end_time  \n",
-            "0         1.000000     1        0.00      2.96  \n",
-            "1         0.946653     2       58.64     68.00  \n",
-            "2         0.794076     3        8.00     16.00  \n",
-            "3         0.741773     4        0.00      1.00  \n",
-            "4         0.728718     5       17.00     34.00  \n",
-            "..             ...   ...         ...       ...  \n",
-            "120       0.001446   121        0.00      5.20  \n",
-            "121       0.001355   122        2.96      6.64  \n",
-            "122       0.001330   123       50.96     58.64  \n",
-            "123       0.001073   124       50.96     58.64  \n",
-            "124       0.001006   125       44.48     50.96  \n",
-            "\n",
-            "[125 rows x 11 columns] warnings_and_errors=Empty DataFrame\n",
-            "Columns: [audio_path, status]\n",
-            "Index: []\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "import logging\n",
-        "import os\n",
-        "from pathlib import Path\n",
-        "\n",
-        "from hirundo import (\n",
-        "    Domain,\n",
-        "    HirundoCSV,\n",
-        "    LabelingType,\n",
-        "    QADataset,\n",
-        "    StorageConfig,\n",
-        "    StorageTypes,\n",
-        ")\n",
-        "from pydantic_core import Url\n",
-        "\n",
-        "logger = logging.getLogger(__name__)\n",
-        "\n",
-        "unique_id = os.getenv(\"UNIQUE_ID\", \"\").replace(\".\", \"-\").replace(\"/\", \"-\")\n",
-        "\n",
-        "run_ids = [run.run_id for run in QADataset.list_runs() if run.run_id]\n",
-        "for run_id in run_ids:\n",
-        "    QADataset.cancel_by_id(run_id)\n",
-        "dataset_ids = [dataset.id for dataset in QADataset.list_datasets()]\n",
-        "for dataset_id in dataset_ids:\n",
-        "    QADataset.delete_by_id(dataset_id)\n",
-        "storage_ids = [storage_config.id for storage_config in StorageConfig.list()]\n",
-        "for storage_id in storage_ids:\n",
-        "    StorageConfig.delete_by_id(storage_id)\n",
-        "\n",
-        "test_dataset = QADataset(\n",
-        "    name=f\"TEST-STT-RoboShaulTiny-dataset{unique_id}\",\n",
-        "    domain=Domain.SPEECH,\n",
-        "    labeling_type=LabelingType.SPEECH_TO_TEXT,\n",
-        "    language=\"he\",\n",
-        "    storage_config=StorageConfig(\n",
-        "        name=f\"STT-RoboShaulTiny-dataset{unique_id}\",\n",
-        "        type=StorageTypes.LOCAL,\n",
-        "    ),\n",
-        "    data_root_url=Url(Path(\"/datasets/RoboShaulTiny/wavs\").as_uri()),\n",
-        "    labeling_info=HirundoCSV(\n",
-        "        csv_url=Url(Path(\"/datasets/RoboShaulTiny/meta.csv\").as_uri()),\n",
-        "    ),\n",
-        ")\n",
-        "\n",
-        "test_dataset.run_qa()\n",
-        "results = test_dataset.check_run()\n",
-        "print(results)"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/blewis/Programming/hirundo-python-sdk/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "Deleted storage integration with ID: 9\n",
+      "Created storage integration with ID: 11\n",
+      "Created dataset with ID: 8\n",
+      "Started the run with ID: 2f7e0a41-75c4-4568-95fc-a177411c5162\n",
+      "Dataset QA run completed successfully: 100%|██████████| 100.0/100.0 [08:35<00:00,  5.16s/it]      "
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": []
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": ".venv",
-      "language": "python",
-      "name": "python3"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "suspects=     audio_segment_id           audio_path  \\\n",
+      "0                   6  wavs/automatic_0012   \n",
+      "1                  20  wavs/automatic_0017   \n",
+      "2                  15  wavs/automatic_0018   \n",
+      "3                  36  wavs/automatic_0020   \n",
+      "4                  37  wavs/automatic_0010   \n",
+      "..                ...                  ...   \n",
+      "120                32  wavs/automatic_0004   \n",
+      "121                34  wavs/automatic_0012   \n",
+      "122                39  wavs/automatic_0017   \n",
+      "123                39  wavs/automatic_0017   \n",
+      "124                 3  wavs/automatic_0017   \n",
+      "\n",
+      "                                            transcript  \\\n",
+      "0                 ‏וגם אם לא תהיה פה השעה בסופו של דבר   \n",
+      "1    ‏פרשה שהתחילה בחקירה שנפתחה יום אחד ב2017 בעצם...   \n",
+      "2    ‏ללשכת ראש הממשלה הדבר הזה נולד רק דרך אילן יש...   \n",
+      "3                                                   ‏ה   \n",
+      "4    ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור...   \n",
+      "..                                                 ...   \n",
+      "120  ‏היום אנחנו יוצאים אה נו לא אני גאון אני יושב ...   \n",
+      "121  ‏הסיפור הזה נותן לנו הצצה לדברים שאנחנו אף פעם...   \n",
+      "122  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...   \n",
+      "123  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...   \n",
+      "124  ‏הוא האיש האחראי לזה שתיק 4000 בכלל קיים ובא ל...   \n",
+      "\n",
+      "     suspect_region_start_index  suspect_region_end_index  \\\n",
+      "0                             0                         6   \n",
+      "1                             0                        12   \n",
+      "2                             0                         9   \n",
+      "3                             0                        -2   \n",
+      "4                           138                       157   \n",
+      "..                          ...                       ...   \n",
+      "120                          20                        32   \n",
+      "121                           0                        10   \n",
+      "122                          17                        33   \n",
+      "123                          58                        -2   \n",
+      "124                           0                         8   \n",
+      "\n",
+      "                                        suspect_region  suspect_level  \\\n",
+      "0               ‏וגם אם » לא תהיה פה השעה בסופו של דבר            1.0   \n",
+      "1    ‏פרשה שהתחילה » בחקירה שנפתחה יום אחד ב2017 בע...            1.0   \n",
+      "2    ‏ללשכת ראש » הממשלה הדבר הזה נולד רק דרך אילן ...            1.0   \n",
+      "3                                                   ‏ה            1.0   \n",
+      "4    ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור...            1.0   \n",
+      "..                                                 ...            ...   \n",
+      "120  ‏היום אנחנו יוצאים אה « נו לא אני » גאון אני י...            0.5   \n",
+      "121  ‏הסיפור הזה » נותן לנו הצצה לדברים שאנחנו אף פ...            0.5   \n",
+      "122  ‏כי במקור זו הייתה « פרשה שקראו לה » פרשת בזק ...            0.5   \n",
+      "123  ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ...            0.5   \n",
+      "124  ‏הוא האיש » האחראי לזה שתיק 4000 בכלל קיים ובא...            0.5   \n",
+      "\n",
+      "     suspect_score  rank  start_time  end_time  \n",
+      "0         1.000000     1        0.00      2.96  \n",
+      "1         0.946653     2       58.64     68.00  \n",
+      "2         0.794076     3        8.00     16.00  \n",
+      "3         0.741773     4        0.00      1.00  \n",
+      "4         0.728718     5       17.00     34.00  \n",
+      "..             ...   ...         ...       ...  \n",
+      "120       0.001446   121        0.00      5.20  \n",
+      "121       0.001355   122        2.96      6.64  \n",
+      "122       0.001330   123       50.96     58.64  \n",
+      "123       0.001073   124       50.96     58.64  \n",
+      "124       0.001006   125       44.48     50.96  \n",
+      "\n",
+      "[125 rows x 11 columns] warnings_and_errors=Empty DataFrame\n",
+      "Columns: [audio_path, status]\n",
+      "Index: []\n"
+     ]
     },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.9.19"
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
     }
+   ],
+   "source": [
+    "import logging\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from hirundo import (\n",
+    "    HirundoCSV,\n",
+    "    LabelingType,\n",
+    "    ModalityType,\n",
+    "    QADataset,\n",
+    "    StorageConfig,\n",
+    "    StorageTypes,\n",
+    ")\n",
+    "from pydantic_core import Url\n",
+    "\n",
+    "logger = logging.getLogger(__name__)\n",
+    "\n",
+    "unique_id = os.getenv(\"UNIQUE_ID\", \"\").replace(\".\", \"-\").replace(\"/\", \"-\")\n",
+    "\n",
+    "run_ids = [run.run_id for run in QADataset.list_runs() if run.run_id]\n",
+    "for run_id in run_ids:\n",
+    "    QADataset.cancel_by_id(run_id)\n",
+    "dataset_ids = [dataset.id for dataset in QADataset.list_datasets()]\n",
+    "for dataset_id in dataset_ids:\n",
+    "    QADataset.delete_by_id(dataset_id)\n",
+    "storage_ids = [storage_config.id for storage_config in StorageConfig.list()]\n",
+    "for storage_id in storage_ids:\n",
+    "    StorageConfig.delete_by_id(storage_id)\n",
+    "\n",
+    "test_dataset = QADataset(\n",
+    "    name=f\"TEST-STT-RoboShaulTiny-dataset{unique_id}\",\n",
+    "    modality=ModalityType.SPEECH,\n",
+    "    labeling_type=LabelingType.SPEECH_TO_TEXT,\n",
+    "    language=\"he\",\n",
+    "    storage_config=StorageConfig(\n",
+    "        name=f\"STT-RoboShaulTiny-dataset{unique_id}\",\n",
+    "        type=StorageTypes.LOCAL,\n",
+    "    ),\n",
+    "    data_root_url=Url(Path(\"/datasets/RoboShaulTiny/wavs\").as_uri()),\n",
+    "    labeling_info=HirundoCSV(\n",
+    "        csv_url=Url(Path(\"/datasets/RoboShaulTiny/meta.csv\").as_uri()),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "test_dataset.run_qa()\n",
+    "results = test_dataset.check_run()\n",
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 2
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/tests/speech-to-text/sanity_stt_git_test.py b/tests/speech-to-text/sanity_stt_git_test.py
index 8c7bff95..104d5f5f 100644
--- a/tests/speech-to-text/sanity_stt_git_test.py
+++ b/tests/speech-to-text/sanity_stt_git_test.py
@@ -3,11 +3,11 @@
 
 import pytest
 from hirundo import (
-    Domain,
     GitPlainAuth,
     GitRepo,
     HirundoCSV,
     LabelingType,
+    ModalityType,
     QADataset,
     StorageConfig,
     StorageGit,
@@ -35,7 +35,7 @@
 )
 test_dataset = QADataset(
     name=f"TEST-STT-RoboShaulTiny-dataset{unique_id}",
-    domain=Domain.SPEECH,
+    modality=ModalityType.SPEECH,
     labeling_type=LabelingType.SPEECH_TO_TEXT,
     language="he",
     storage_config=StorageConfig(
diff --git a/tests/speech-to-text/stt_git_test.py b/tests/speech-to-text/stt_git_test.py
index eeaaf2f9..6a12a95f 100644
--- a/tests/speech-to-text/stt_git_test.py
+++ b/tests/speech-to-text/stt_git_test.py
@@ -3,11 +3,11 @@
 
 import pytest
 from hirundo import (
-    Domain,
     GitPlainAuth,
     GitRepo,
     HirundoCSV,
     LabelingType,
+    ModalityType,
     QADataset,
     StorageConfig,
     StorageGit,
@@ -35,7 +35,7 @@
 )
 test_dataset = QADataset(
     name=f"TEST-STT-MASC-dataset{unique_id}",
-    domain=Domain.SPEECH,
+    modality=ModalityType.SPEECH,
     labeling_type=LabelingType.SPEECH_TO_TEXT,
     language="ar",
     storage_config=StorageConfig(