From c02460509217ce70200ab8843a234e9186a12880 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:36:45 +0000 Subject: [PATCH 1/3] Bump authlib in /requirements in the pip group across 1 directory Bumps the pip group with 1 update in the /requirements directory: [authlib](https://github.com/authlib/authlib). Updates `authlib` from 1.3.2 to 1.6.5 - [Release notes](https://github.com/authlib/authlib/releases) - [Changelog](https://github.com/authlib/authlib/blob/main/docs/changelog.rst) - [Commits](https://github.com/authlib/authlib/compare/v1.3.2...v1.6.5) --- updated-dependencies: - dependency-name: authlib dependency-version: 1.6.5 dependency-type: direct:development dependency-group: pip ... Signed-off-by: dependabot[bot] --- requirements/dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/dev.txt b/requirements/dev.txt index d3a3437f..993be821 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -8,7 +8,7 @@ anyio==4.4.0 # via # -c requirements/requirements.txt # httpx -authlib==1.3.2 +authlib==1.6.5 # via safety backports-tarfile==1.2.0 # via From 4f9de58da0e1e4e1ac8bd8480ea3fb4a1f156bf8 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 13 Oct 2025 21:35:50 +0300 Subject: [PATCH 2/3] chore: bump authlib to 1.6.5 (#170) --- pyproject.toml | 1 + uv.lock | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6699df90..b8a84375 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ dev = [ "virtualenv>=20.6.6", # ⬆️ Needed for `pre-commit` and locking version for `safety-cli` # version fixes vulnerability GHSA-rqc4-2hc7-8c8v + "authlib>=1.6.5", "ruff>=0.12.0", "bumpver", "platformdirs>=4.3.6", diff --git a/uv.lock b/uv.lock index 7c40ac24..b53859a4 100644 --- a/uv.lock +++ b/uv.lock @@ -63,14 +63,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.1" +version = "1.6.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/a1/d8d1c6f8bc922c0b87ae0d933a8ed57be1bef6970894ed79c2852a153cd3/authlib-1.6.1.tar.gz", hash = "sha256:4dffdbb1460ba6ec8c17981a4c67af7d8af131231b5a36a88a1e8c80c111cdfd", size = 159988 } +sdist = { url = "https://files.pythonhosted.org/packages/cd/3f/1d3bbd0bf23bdd99276d4def22f29c27a914067b4cf66f753ff9b8bbd0f3/authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b", size = 164553 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/58/cc6a08053f822f98f334d38a27687b69c6655fb05cd74a7a5e70a2aeed95/authlib-1.6.1-py2.py3-none-any.whl", hash = "sha256:e9d2031c34c6309373ab845afc24168fe9e93dc52d252631f52642f21f5ed06e", size = 239299 }, + { url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608 }, ] [[package]] From 804c2f640732e43351882e58b97f1c67c33f48b4 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 15 Oct 2025 18:12:51 +0300 Subject: [PATCH 3/3] Fix issues with API since domain was renamed to modality --- hirundo/__init__.py | 4 +- hirundo/dataset_qa.py | 27 +- on-prem/on_prem_audio_ar_test_notebook.ipynb | 4 +- ...on_prem_audio_he_small_test_notebook.ipynb | 4 +- on-prem/on_prem_audio_he_test_notebook.ipynb | 4 +- .../on_prem_audio_sanity_test_notebook.ipynb | 346 +++++++++--------- tests/speech-to-text/sanity_stt_git_test.py | 4 +- tests/speech-to-text/stt_git_test.py | 4 +- 8 files changed, 200 insertions(+), 197 deletions(-) diff --git a/hirundo/__init__.py b/hirundo/__init__.py index f0b85d54..96c51f2e 100644 --- a/hirundo/__init__.py +++ b/hirundo/__init__.py @@ -5,8 +5,8 @@ ) from .dataset_qa import ( ClassificationRunArgs, - Domain, HirundoError, + ModalityType, ObjectDetectionRunArgs, QADataset, RunArgs, @@ -43,7 +43,7 @@ "KeylabsObjSegImages", "KeylabsObjSegVideo", "QADataset", - "Domain", + "ModalityType", "RunArgs", "ClassificationRunArgs", "ObjectDetectionRunArgs", diff --git a/hirundo/dataset_qa.py b/hirundo/dataset_qa.py index fcdeea68..7360460c 100644 --- a/hirundo/dataset_qa.py +++ b/hirundo/dataset_qa.py @@ -128,27 +128,27 @@ class AugmentationName(str, Enum): GAUSSIAN_BLUR = "GaussianBlur" -class Domain(str, Enum): +class ModalityType(str, Enum): RADAR = "RADAR" VISION = "VISION" SPEECH = "SPEECH" TABULAR = "TABULAR" -DOMAIN_TO_SUPPORTED_LABELING_TYPES = { - Domain.RADAR: [ +MODALITY_TO_SUPPORTED_LABELING_TYPES = { + ModalityType.RADAR: [ LabelingType.SINGLE_LABEL_CLASSIFICATION, LabelingType.OBJECT_DETECTION, ], - Domain.VISION: [ + ModalityType.VISION: [ LabelingType.SINGLE_LABEL_CLASSIFICATION, LabelingType.OBJECT_DETECTION, LabelingType.OBJECT_SEGMENTATION, LabelingType.SEMANTIC_SEGMENTATION, LabelingType.PANOPTIC_SEGMENTATION, ], - Domain.SPEECH: [LabelingType.SPEECH_TO_TEXT], - Domain.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION], + ModalityType.SPEECH: [LabelingType.SPEECH_TO_TEXT], + ModalityType.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION], } @@ -206,9 +206,9 @@ class QADataset(BaseModel): For audio datasets, this field is ignored. If no value is provided, all augmentations are applied to vision datasets. """ - domain: Domain = Domain.VISION + modality: ModalityType = ModalityType.VISION """ - Used to define the domain of the dataset. + Used to define the modality of the dataset. Defaults to Image. """ @@ -221,13 +221,16 @@ class QADataset(BaseModel): @model_validator(mode="after") def validate_dataset(self): - if self.domain not in DOMAIN_TO_SUPPORTED_LABELING_TYPES: + if self.modality not in MODALITY_TO_SUPPORTED_LABELING_TYPES: raise ValueError( - f"Domain {self.domain} is not supported. Supported domains are: {list(DOMAIN_TO_SUPPORTED_LABELING_TYPES.keys())}" + f"Modality {self.modality} is not supported. Supported modalities are: {list(MODALITY_TO_SUPPORTED_LABELING_TYPES.keys())}" ) - if self.labeling_type not in DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]: + if ( + self.labeling_type + not in MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality] + ): raise ValueError( - f"Labeling type {self.labeling_type} is not supported for domain {self.domain}. Supported labeling types are: {DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]}" + f"Labeling type {self.labeling_type} is not supported for modality {self.modality}. Supported labeling types are: {MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality]}" ) if self.storage_config is None and self.storage_config_id is None: raise ValueError( diff --git a/on-prem/on_prem_audio_ar_test_notebook.ipynb b/on-prem/on_prem_audio_ar_test_notebook.ipynb index 45eacffa..f4874c5d 100644 --- a/on-prem/on_prem_audio_ar_test_notebook.ipynb +++ b/on-prem/on_prem_audio_ar_test_notebook.ipynb @@ -10,11 +10,11 @@ "import os\n", "\n", "from hirundo import (\n", - " Domain,\n", " GitPlainAuth,\n", " GitRepo,\n", " HirundoCSV,\n", " LabelingType,\n", + " ModalityType,\n", " QADataset,\n", " StorageConfig,\n", " StorageGit,\n", @@ -67,7 +67,7 @@ ")\n", "test_dataset = QADataset(\n", " name=f\"TEST-STT-MASC-dataset{unique_id}\",\n", - " domain=Domain.SPEECH,\n", + " modality=ModalityType.SPEECH,\n", " labeling_type=LabelingType.SPEECH_TO_TEXT,\n", " language=\"ar\",\n", " storage_config=StorageConfig(\n", diff --git a/on-prem/on_prem_audio_he_small_test_notebook.ipynb b/on-prem/on_prem_audio_he_small_test_notebook.ipynb index 7546037d..60724719 100644 --- a/on-prem/on_prem_audio_he_small_test_notebook.ipynb +++ b/on-prem/on_prem_audio_he_small_test_notebook.ipynb @@ -10,11 +10,11 @@ "import os\n", "\n", "from hirundo import (\n", - " Domain,\n", " GitPlainAuth,\n", " GitRepo,\n", " HirundoCSV,\n", " LabelingType,\n", + " ModalityType,\n", " QADataset,\n", " StorageConfig,\n", " StorageGit,\n", @@ -51,7 +51,7 @@ ")\n", "test_dataset = QADataset(\n", " name=f\"TEST-STT-RoboShaulGolden-dataset{unique_id}\",\n", - " domain=Domain.SPEECH,\n", + " modality=ModalityType.SPEECH,\n", " labeling_type=LabelingType.SPEECH_TO_TEXT,\n", " language=\"he\",\n", " storage_config=StorageConfig(\n", diff --git a/on-prem/on_prem_audio_he_test_notebook.ipynb b/on-prem/on_prem_audio_he_test_notebook.ipynb index 636fa5e4..b2f30df9 100644 --- a/on-prem/on_prem_audio_he_test_notebook.ipynb +++ b/on-prem/on_prem_audio_he_test_notebook.ipynb @@ -10,11 +10,11 @@ "import os\n", "\n", "from hirundo import (\n", - " Domain,\n", " GitPlainAuth,\n", " GitRepo,\n", " HirundoCSV,\n", " LabelingType,\n", + " ModalityType,\n", " QADataset,\n", " StorageConfig,\n", " StorageGit,\n", @@ -51,7 +51,7 @@ ")\n", "test_dataset = QADataset(\n", " name=f\"TEST-STT-RoboShaul-dataset{unique_id}\",\n", - " domain=Domain.SPEECH,\n", + " modality=ModalityType.SPEECH,\n", " labeling_type=LabelingType.SPEECH_TO_TEXT,\n", " language=\"he\",\n", " storage_config=StorageConfig(\n", diff --git a/on-prem/on_prem_audio_sanity_test_notebook.ipynb b/on-prem/on_prem_audio_sanity_test_notebook.ipynb index c41a880e..1315f244 100644 --- a/on-prem/on_prem_audio_sanity_test_notebook.ipynb +++ b/on-prem/on_prem_audio_sanity_test_notebook.ipynb @@ -1,181 +1,181 @@ { - "cells": [ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/blewis/Programming/hirundo-python-sdk/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "Deleted storage integration with ID: 9\n", - "Created storage integration with ID: 11\n", - "Created dataset with ID: 8\n", - "Started the run with ID: 2f7e0a41-75c4-4568-95fc-a177411c5162\n", - "Dataset QA run completed successfully: 100%|██████████| 100.0/100.0 [08:35<00:00, 5.16s/it] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "suspects= audio_segment_id audio_path \\\n", - "0 6 wavs/automatic_0012 \n", - "1 20 wavs/automatic_0017 \n", - "2 15 wavs/automatic_0018 \n", - "3 36 wavs/automatic_0020 \n", - "4 37 wavs/automatic_0010 \n", - ".. ... ... \n", - "120 32 wavs/automatic_0004 \n", - "121 34 wavs/automatic_0012 \n", - "122 39 wavs/automatic_0017 \n", - "123 39 wavs/automatic_0017 \n", - "124 3 wavs/automatic_0017 \n", - "\n", - " transcript \\\n", - "0 ‏וגם אם לא תהיה פה השעה בסופו של דבר \n", - "1 ‏פרשה שהתחילה בחקירה שנפתחה יום אחד ב2017 בעצם... \n", - "2 ‏ללשכת ראש הממשלה הדבר הזה נולד רק דרך אילן יש... \n", - "3 ‏ה \n", - "4 ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור... \n", - ".. ... \n", - "120 ‏היום אנחנו יוצאים אה נו לא אני גאון אני יושב ... \n", - "121 ‏הסיפור הזה נותן לנו הצצה לדברים שאנחנו אף פעם... \n", - "122 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... \n", - "123 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... \n", - "124 ‏הוא האיש האחראי לזה שתיק 4000 בכלל קיים ובא ל... \n", - "\n", - " suspect_region_start_index suspect_region_end_index \\\n", - "0 0 6 \n", - "1 0 12 \n", - "2 0 9 \n", - "3 0 -2 \n", - "4 138 157 \n", - ".. ... ... \n", - "120 20 32 \n", - "121 0 10 \n", - "122 17 33 \n", - "123 58 -2 \n", - "124 0 8 \n", - "\n", - " suspect_region suspect_level \\\n", - "0 ‏וגם אם » לא תהיה פה השעה בסופו של דבר 1.0 \n", - "1 ‏פרשה שהתחילה » בחקירה שנפתחה יום אחד ב2017 בע... 1.0 \n", - "2 ‏ללשכת ראש » הממשלה הדבר הזה נולד רק דרך אילן ... 1.0 \n", - "3 ‏ה 1.0 \n", - "4 ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור... 1.0 \n", - ".. ... ... \n", - "120 ‏היום אנחנו יוצאים אה « נו לא אני » גאון אני י... 0.5 \n", - "121 ‏הסיפור הזה » נותן לנו הצצה לדברים שאנחנו אף פ... 0.5 \n", - "122 ‏כי במקור זו הייתה « פרשה שקראו לה » פרשת בזק ... 0.5 \n", - "123 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... 0.5 \n", - "124 ‏הוא האיש » האחראי לזה שתיק 4000 בכלל קיים ובא... 0.5 \n", - "\n", - " suspect_score rank start_time end_time \n", - "0 1.000000 1 0.00 2.96 \n", - "1 0.946653 2 58.64 68.00 \n", - "2 0.794076 3 8.00 16.00 \n", - "3 0.741773 4 0.00 1.00 \n", - "4 0.728718 5 17.00 34.00 \n", - ".. ... ... ... ... \n", - "120 0.001446 121 0.00 5.20 \n", - "121 0.001355 122 2.96 6.64 \n", - "122 0.001330 123 50.96 58.64 \n", - "123 0.001073 124 50.96 58.64 \n", - "124 0.001006 125 44.48 50.96 \n", - "\n", - "[125 rows x 11 columns] warnings_and_errors=Empty DataFrame\n", - "Columns: [audio_path, status]\n", - "Index: []\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "import logging\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "from hirundo import (\n", - " Domain,\n", - " HirundoCSV,\n", - " LabelingType,\n", - " QADataset,\n", - " StorageConfig,\n", - " StorageTypes,\n", - ")\n", - "from pydantic_core import Url\n", - "\n", - "logger = logging.getLogger(__name__)\n", - "\n", - "unique_id = os.getenv(\"UNIQUE_ID\", \"\").replace(\".\", \"-\").replace(\"/\", \"-\")\n", - "\n", - "run_ids = [run.run_id for run in QADataset.list_runs() if run.run_id]\n", - "for run_id in run_ids:\n", - " QADataset.cancel_by_id(run_id)\n", - "dataset_ids = [dataset.id for dataset in QADataset.list_datasets()]\n", - "for dataset_id in dataset_ids:\n", - " QADataset.delete_by_id(dataset_id)\n", - "storage_ids = [storage_config.id for storage_config in StorageConfig.list()]\n", - "for storage_id in storage_ids:\n", - " StorageConfig.delete_by_id(storage_id)\n", - "\n", - "test_dataset = QADataset(\n", - " name=f\"TEST-STT-RoboShaulTiny-dataset{unique_id}\",\n", - " domain=Domain.SPEECH,\n", - " labeling_type=LabelingType.SPEECH_TO_TEXT,\n", - " language=\"he\",\n", - " storage_config=StorageConfig(\n", - " name=f\"STT-RoboShaulTiny-dataset{unique_id}\",\n", - " type=StorageTypes.LOCAL,\n", - " ),\n", - " data_root_url=Url(Path(\"/datasets/RoboShaulTiny/wavs\").as_uri()),\n", - " labeling_info=HirundoCSV(\n", - " csv_url=Url(Path(\"/datasets/RoboShaulTiny/meta.csv\").as_uri()),\n", - " ),\n", - ")\n", - "\n", - "test_dataset.run_qa()\n", - "results = test_dataset.check_run()\n", - "print(results)" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/blewis/Programming/hirundo-python-sdk/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "Deleted storage integration with ID: 9\n", + "Created storage integration with ID: 11\n", + "Created dataset with ID: 8\n", + "Started the run with ID: 2f7e0a41-75c4-4568-95fc-a177411c5162\n", + "Dataset QA run completed successfully: 100%|██████████| 100.0/100.0 [08:35<00:00, 5.16s/it] " + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" + "name": "stdout", + "output_type": "stream", + "text": [ + "suspects= audio_segment_id audio_path \\\n", + "0 6 wavs/automatic_0012 \n", + "1 20 wavs/automatic_0017 \n", + "2 15 wavs/automatic_0018 \n", + "3 36 wavs/automatic_0020 \n", + "4 37 wavs/automatic_0010 \n", + ".. ... ... \n", + "120 32 wavs/automatic_0004 \n", + "121 34 wavs/automatic_0012 \n", + "122 39 wavs/automatic_0017 \n", + "123 39 wavs/automatic_0017 \n", + "124 3 wavs/automatic_0017 \n", + "\n", + " transcript \\\n", + "0 ‏וגם אם לא תהיה פה השעה בסופו של דבר \n", + "1 ‏פרשה שהתחילה בחקירה שנפתחה יום אחד ב2017 בעצם... \n", + "2 ‏ללשכת ראש הממשלה הדבר הזה נולד רק דרך אילן יש... \n", + "3 ‏ה \n", + "4 ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור... \n", + ".. ... \n", + "120 ‏היום אנחנו יוצאים אה נו לא אני גאון אני יושב ... \n", + "121 ‏הסיפור הזה נותן לנו הצצה לדברים שאנחנו אף פעם... \n", + "122 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... \n", + "123 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... \n", + "124 ‏הוא האיש האחראי לזה שתיק 4000 בכלל קיים ובא ל... \n", + "\n", + " suspect_region_start_index suspect_region_end_index \\\n", + "0 0 6 \n", + "1 0 12 \n", + "2 0 9 \n", + "3 0 -2 \n", + "4 138 157 \n", + ".. ... ... \n", + "120 20 32 \n", + "121 0 10 \n", + "122 17 33 \n", + "123 58 -2 \n", + "124 0 8 \n", + "\n", + " suspect_region suspect_level \\\n", + "0 ‏וגם אם » לא תהיה פה השעה בסופו של דבר 1.0 \n", + "1 ‏פרשה שהתחילה » בחקירה שנפתחה יום אחד ב2017 בע... 1.0 \n", + "2 ‏ללשכת ראש » הממשלה הדבר הזה נולד רק דרך אילן ... 1.0 \n", + "3 ‏ה 1.0 \n", + "4 ‏הפרשה הזאת בעצם שואלת האם לחץ להטות את הסיכור... 1.0 \n", + ".. ... ... \n", + "120 ‏היום אנחנו יוצאים אה « נו לא אני » גאון אני י... 0.5 \n", + "121 ‏הסיפור הזה » נותן לנו הצצה לדברים שאנחנו אף פ... 0.5 \n", + "122 ‏כי במקור זו הייתה « פרשה שקראו לה » פרשת בזק ... 0.5 \n", + "123 ‏כי במקור זו הייתה פרשה שקראנו לה פרשת בזק שענ... 0.5 \n", + "124 ‏הוא האיש » האחראי לזה שתיק 4000 בכלל קיים ובא... 0.5 \n", + "\n", + " suspect_score rank start_time end_time \n", + "0 1.000000 1 0.00 2.96 \n", + "1 0.946653 2 58.64 68.00 \n", + "2 0.794076 3 8.00 16.00 \n", + "3 0.741773 4 0.00 1.00 \n", + "4 0.728718 5 17.00 34.00 \n", + ".. ... ... ... ... \n", + "120 0.001446 121 0.00 5.20 \n", + "121 0.001355 122 2.96 6.64 \n", + "122 0.001330 123 50.96 58.64 \n", + "123 0.001073 124 50.96 58.64 \n", + "124 0.001006 125 44.48 50.96 \n", + "\n", + "[125 rows x 11 columns] warnings_and_errors=Empty DataFrame\n", + "Columns: [audio_path, status]\n", + "Index: []\n" + ] }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.19" + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] } + ], + "source": [ + "import logging\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "from hirundo import (\n", + " HirundoCSV,\n", + " LabelingType,\n", + " ModalityType,\n", + " QADataset,\n", + " StorageConfig,\n", + " StorageTypes,\n", + ")\n", + "from pydantic_core import Url\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "unique_id = os.getenv(\"UNIQUE_ID\", \"\").replace(\".\", \"-\").replace(\"/\", \"-\")\n", + "\n", + "run_ids = [run.run_id for run in QADataset.list_runs() if run.run_id]\n", + "for run_id in run_ids:\n", + " QADataset.cancel_by_id(run_id)\n", + "dataset_ids = [dataset.id for dataset in QADataset.list_datasets()]\n", + "for dataset_id in dataset_ids:\n", + " QADataset.delete_by_id(dataset_id)\n", + "storage_ids = [storage_config.id for storage_config in StorageConfig.list()]\n", + "for storage_id in storage_ids:\n", + " StorageConfig.delete_by_id(storage_id)\n", + "\n", + "test_dataset = QADataset(\n", + " name=f\"TEST-STT-RoboShaulTiny-dataset{unique_id}\",\n", + " modality=ModalityType.SPEECH,\n", + " labeling_type=LabelingType.SPEECH_TO_TEXT,\n", + " language=\"he\",\n", + " storage_config=StorageConfig(\n", + " name=f\"STT-RoboShaulTiny-dataset{unique_id}\",\n", + " type=StorageTypes.LOCAL,\n", + " ),\n", + " data_root_url=Url(Path(\"/datasets/RoboShaulTiny/wavs\").as_uri()),\n", + " labeling_info=HirundoCSV(\n", + " csv_url=Url(Path(\"/datasets/RoboShaulTiny/meta.csv\").as_uri()),\n", + " ),\n", + ")\n", + "\n", + "test_dataset.run_qa()\n", + "results = test_dataset.check_run()\n", + "print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 2 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/tests/speech-to-text/sanity_stt_git_test.py b/tests/speech-to-text/sanity_stt_git_test.py index 8c7bff95..104d5f5f 100644 --- a/tests/speech-to-text/sanity_stt_git_test.py +++ b/tests/speech-to-text/sanity_stt_git_test.py @@ -3,11 +3,11 @@ import pytest from hirundo import ( - Domain, GitPlainAuth, GitRepo, HirundoCSV, LabelingType, + ModalityType, QADataset, StorageConfig, StorageGit, @@ -35,7 +35,7 @@ ) test_dataset = QADataset( name=f"TEST-STT-RoboShaulTiny-dataset{unique_id}", - domain=Domain.SPEECH, + modality=ModalityType.SPEECH, labeling_type=LabelingType.SPEECH_TO_TEXT, language="he", storage_config=StorageConfig( diff --git a/tests/speech-to-text/stt_git_test.py b/tests/speech-to-text/stt_git_test.py index eeaaf2f9..6a12a95f 100644 --- a/tests/speech-to-text/stt_git_test.py +++ b/tests/speech-to-text/stt_git_test.py @@ -3,11 +3,11 @@ import pytest from hirundo import ( - Domain, GitPlainAuth, GitRepo, HirundoCSV, LabelingType, + ModalityType, QADataset, StorageConfig, StorageGit, @@ -35,7 +35,7 @@ ) test_dataset = QADataset( name=f"TEST-STT-MASC-dataset{unique_id}", - domain=Domain.SPEECH, + modality=ModalityType.SPEECH, labeling_type=LabelingType.SPEECH_TO_TEXT, language="ar", storage_config=StorageConfig(