From d9724660e9f8e414d002f8f1a4f8d91c7ef382dd Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Fri, 6 Mar 2026 10:38:56 +0100 Subject: [PATCH 1/2] Add query support for multiple collections --- CHANGELOG.md | 6 + tilebox-datasets/tests/test_timeseries.py | 62 +++++ .../recordings/find_s2_datapoint.rpcs.bin | 4 +- .../recordings/query_sentinel2.rpcs.bin | 4 +- .../query_sentinel2_paging.rpcs.bin | 4 +- .../s2_datapoint_not_found.rpcs.bin | 4 +- .../tilebox/datasets/aio/dataset.py | 214 +++++++++++++++--- .../datasets/datasets/v1/data_access_pb2.py | 36 +-- .../datasets/datasets/v1/data_access_pb2.pyi | 12 +- .../datasets/datasets/v1/datasets_pb2.py | 62 ++--- .../datasets/datasets/v1/datasets_pb2.pyi | 22 +- .../datasets/datasets/v1/datasets_pb2_grpc.py | 43 ---- tilebox-datasets/tilebox/datasets/service.py | 11 +- .../tilebox/datasets/sync/dataset.py | 213 ++++++++++++++--- 14 files changed, 530 insertions(+), 167 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfbe1ce..1fa4987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `tilebox-datasets`: Added dataset-level `find` and `query` methods on both sync and async `DatasetClient` to query + across multiple collections. + + ## [0.49.0] - 2026-02-19 ### Added diff --git a/tilebox-datasets/tests/test_timeseries.py b/tilebox-datasets/tests/test_timeseries.py index 133d9b4..e1b874e 100644 --- a/tilebox-datasets/tests/test_timeseries.py +++ b/tilebox-datasets/tests/test_timeseries.py @@ -198,6 +198,68 @@ def test_timeseries_dataset_collection_find_not_found() -> None: mocked.collection.find("14eb91a2-a42f-421f-9397-1dab577f05a9") +@settings(max_examples=1) +@given(example_datapoints(generated_fields=True, missing_fields=True)) +def test_timeseries_dataset_find_multiple_collections(expected_datapoint: ExampleDatapoint) -> None: + """Test that DatasetClient.find() supports querying by mixed collection reference types.""" + dataset, service = _mocked_dataset() + + named_collection = CollectionInfo(Collection(uuid4(), "named-collection"), None, None) + other_collection = CollectionInfo(Collection(uuid4(), "other-collection"), None, None) + + service.get_collections.return_value = Promise.resolve([named_collection, other_collection]) + message = AnyMessage(example_dataset_type_url(), expected_datapoint.SerializeToString()) + service.query_by_id.return_value = Promise.resolve(message) + + datapoint_id = uuid_message_to_uuid(expected_datapoint.id) + datapoint = dataset.find( + datapoint_id, + [ + named_collection.collection.name, + ], + ) + + assert isinstance(datapoint, xr.Dataset) + service.get_collections.assert_called_once_with(dataset._dataset.id, True, True) + service.query_by_id.assert_called_once_with( + dataset._dataset.id, + [ + named_collection.collection.id, + ], + datapoint_id, + False, + ) + + +@settings(max_examples=1) +@given(pages=paginated_query_results()) +def test_timeseries_dataset_query_multiple_collections(pages: list[QueryResultPage]) -> None: + """Test that DatasetClient.query() forwards all selected collection ids to the backend query endpoint.""" + dataset, service = _mocked_dataset() + + named_collection = CollectionInfo(Collection(uuid4(), "named-collection"), None, None) + other_collection = CollectionInfo(Collection(uuid4(), "other-collection"), None, None) + + service.get_collections.return_value = Promise.resolve([named_collection, other_collection]) + service.query.side_effect = [Promise.resolve(page) for page in pages] + + interval = TimeInterval(datetime.now(), datetime.now() + timedelta(days=1)) + queried = dataset.query( + collections=[ + named_collection.collection.name, + ], + temporal_extent=interval, + ) + + _assert_datapoints_match(queried, pages) + service.get_collections.assert_called_once_with(dataset._dataset.id, True, True) + first_call_args = service.query.call_args_list[0][0] + assert first_call_args[0] == dataset._dataset.id + assert first_call_args[1] == [ + named_collection.collection.id, + ] + + @patch("tilebox.datasets.sync.pagination.tqdm") @patch("tilebox.datasets.progress.tqdm") @settings(deadline=1000, max_examples=3) # increase deadline to 1s to not timeout because of the progress bar diff --git a/tilebox-datasets/tests/testdata/recordings/find_s2_datapoint.rpcs.bin b/tilebox-datasets/tests/testdata/recordings/find_s2_datapoint.rpcs.bin index 2a90653..a38850d 100644 --- a/tilebox-datasets/tests/testdata/recordings/find_s2_datapoint.rpcs.bin +++ b/tilebox-datasets/tests/testdata/recordings/find_s2_datapoint.rpcs.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5234a799245c656e37e16e249f6212b0c7a48021d9a2fbc7a672375d7354a57c -size 10004 +oid sha256:7de1b3958bcc1c8aecd8170b71433153cf5b705d0b0c7ea1f424bd78b9e8f66f +size 10056 diff --git a/tilebox-datasets/tests/testdata/recordings/query_sentinel2.rpcs.bin b/tilebox-datasets/tests/testdata/recordings/query_sentinel2.rpcs.bin index 066a6bd..5b86fc0 100644 --- a/tilebox-datasets/tests/testdata/recordings/query_sentinel2.rpcs.bin +++ b/tilebox-datasets/tests/testdata/recordings/query_sentinel2.rpcs.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef04121a4e99f85a25513d0933823b1c271a367b575dece8959be080c85e456d -size 903956 +oid sha256:a964dcaf44e3ef38283e4906269781d287e313e8ebd6834a3dda22ef76539e43 +size 904012 diff --git a/tilebox-datasets/tests/testdata/recordings/query_sentinel2_paging.rpcs.bin b/tilebox-datasets/tests/testdata/recordings/query_sentinel2_paging.rpcs.bin index 3feb51d..84bfe9e 100644 --- a/tilebox-datasets/tests/testdata/recordings/query_sentinel2_paging.rpcs.bin +++ b/tilebox-datasets/tests/testdata/recordings/query_sentinel2_paging.rpcs.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5fee0adc442645fb623f35d133681f34003690f582d532f486ecd91f59cc67b -size 873354 +oid sha256:5ffb02e44b4e9a312cdb3edc8881e5bcffaf21ac28d4df973e1f20ba882c7dfa +size 875182 diff --git a/tilebox-datasets/tests/testdata/recordings/s2_datapoint_not_found.rpcs.bin b/tilebox-datasets/tests/testdata/recordings/s2_datapoint_not_found.rpcs.bin index 746d3fa..a4164cf 100644 --- a/tilebox-datasets/tests/testdata/recordings/s2_datapoint_not_found.rpcs.bin +++ b/tilebox-datasets/tests/testdata/recordings/s2_datapoint_not_found.rpcs.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7428cc2c23ba142cdc148053f5edf9aa5d7f630cf0139c69c13701c516f78037 -size 8560 +oid sha256:da90443beb49a4e0d84fd06f08f0affab5ea703f4ce7e65585c59f3ce421a101 +size 8584 diff --git a/tilebox-datasets/tilebox/datasets/aio/dataset.py b/tilebox-datasets/tilebox/datasets/aio/dataset.py index ff5abfa..9706c73 100644 --- a/tilebox-datasets/tilebox/datasets/aio/dataset.py +++ b/tilebox-datasets/tilebox/datasets/aio/dataset.py @@ -12,7 +12,7 @@ from _tilebox.grpc.aio.producer_consumer import async_producer_consumer from _tilebox.grpc.error import ArgumentError, NotFoundError from tilebox.datasets.aio.pagination import with_progressbar, with_time_progress_callback, with_time_progressbar -from tilebox.datasets.data.collection import CollectionInfo +from tilebox.datasets.data.collection import Collection, CollectionInfo from tilebox.datasets.data.data_access import QueryFilters, SpatialFilter, SpatialFilterLike from tilebox.datasets.data.datapoint import QueryResultPage from tilebox.datasets.data.datasets import Dataset @@ -139,6 +139,122 @@ async def delete_collection(self, collection: "str | UUID | CollectionClient") - await self._service.delete_collection(self._dataset.id, collection_id) + async def find( + self, + datapoint_id: str | UUID, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | None" = None, + skip_data: bool = False, + ) -> xr.Dataset: + """ + Find a specific datapoint in one of the specified collections by its id. + + Args: + datapoint_id: The id of the datapoint to find. + collections: The collections to search in. Supports collection names, ids or collection objects. + If not specified, all collections in the dataset are searched. + skip_data: Whether to skip the actual data of the datapoint. If True, only + datapoint metadata is returned. + + Returns: + The datapoint as an xarray dataset. + """ + collection_ids = await self._collection_ids(collections) + try: + datapoint = await self._service.query_by_id( + self._dataset.id, + collection_ids, + as_uuid(datapoint_id), + skip_data, + ) + except ArgumentError: + raise ValueError(f"Invalid datapoint id: {datapoint_id} is not a valid UUID") from None + except NotFoundError: + raise NotFoundError(f"No such datapoint {datapoint_id}") from None + + message_type = get_message_type(datapoint.type_url) + data = message_type.FromString(datapoint.value) + + converter = MessageToXarrayConverter(initial_capacity=1) + converter.convert(data) + return converter.finalize("time", skip_empty_fields=skip_data).isel(time=0) + + async def query( + self, + *, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | dict[str, CollectionClient] | None", + temporal_extent: TimeIntervalLike, + spatial_extent: SpatialFilterLike | None = None, + skip_data: bool = False, + show_progress: bool | ProgressCallback = False, + ) -> xr.Dataset: + """ + Query datapoints in the specified collections and temporal extent. + + Args: + collections: The collections to query in. Supports collection names, ids or collection objects. + If not specified, all collections in the dataset are queried. + temporal_extent: The temporal extent to query data for. (Required) + spatial_extent: The spatial extent to query data in. (Optional) + skip_data: Whether to skip the actual data of the datapoint. If True, only + datapoint metadata is returned. + show_progress: Whether to show a progress bar while loading the data. + If a callable is specified it is used as callback to report progress percentages. + + Returns: + Matching datapoints in the given temporal and spatial extent as an xarray dataset. + """ + if temporal_extent is None: + raise ValueError("A temporal_extent for your query must be specified") + + collection_ids = await self._collection_ids(collections) + pages = _iter_query_pages( + self._service, + self._dataset.id, + collection_ids, + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self.name, + show_progress=show_progress, + ) + return await _convert_to_dataset(pages, skip_empty_fields=skip_data) + + async def _collection_id(self, collection: "UUID | Collection | CollectionInfo | CollectionClient") -> UUID: + if isinstance(collection, CollectionClient): + return collection._collection.id + if isinstance(collection, CollectionInfo): + return collection.collection.id + if isinstance(collection, Collection): + return collection.id + return collection + + async def _collection_ids( + self, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | dict[str, CollectionClient] | None", + ) -> list[UUID]: + if collections is None: + return [] + + all_collections: list[CollectionInfo] = await self._service.get_collections(self._dataset.id, True, True) + # find all valid collection names and ids + collections_by_name = {c.collection.name: c.collection.id for c in all_collections} + valid_collection_ids = {c.collection.id for c in all_collections} + + collection_ids: list[UUID] = [] + for collection in collections: + if isinstance(collection, str): + try: + collection_ids.append(collections_by_name[collection]) + except KeyError: + raise ValueError(f"Collection {collection} not found in dataset {self.name}") from None + else: + collection_id = await self._collection_id(collection) + if collection_id not in valid_collection_ids: + raise ValueError(f"Collection {collection_id} is not part of the dataset {self.name}") + collection_ids.append(collection_id) + + return collection_ids + def __repr__(self) -> str: return f"{self.name} [Timeseries Dataset]: {self._dataset.summary}" @@ -221,7 +337,7 @@ async def find(self, datapoint_id: str | UUID, skip_data: bool = False) -> xr.Da """ try: datapoint = await self._dataset._service.query_by_id( - [self._collection.id], as_uuid(datapoint_id), skip_data + self._dataset._dataset.id, [self._collection.id], as_uuid(datapoint_id), skip_data ) except ArgumentError: raise ValueError(f"Invalid datapoint id: {datapoint_id} is not a valid UUID") from None @@ -259,8 +375,14 @@ async def _find_interval( filters = QueryFilters(temporal_extent=IDInterval.parse(datapoint_id_interval, end_inclusive=end_inclusive)) async def request(page: PaginationProtocol) -> QueryResultPage: - query_page = Pagination(page.limit, page.starting_after) - return await self._dataset._service.query([self._collection.id], filters, skip_data, query_page) + return await _query_page( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + filters, + skip_data, + page, + ) initial_page = Pagination() pages = paginated_request(request, initial_page) @@ -350,7 +472,16 @@ async def query( if temporal_extent is None: raise ValueError("A temporal_extent for your query must be specified") - pages = self._iter_pages(temporal_extent, spatial_extent, skip_data, show_progress=show_progress) + pages = _iter_query_pages( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self._dataset.name, + show_progress=show_progress, + ) return await _convert_to_dataset(pages, skip_empty_fields=skip_data) async def _iter_pages( @@ -361,29 +492,19 @@ async def _iter_pages( show_progress: bool | ProgressCallback = False, page_size: int | None = None, ) -> AsyncIterator[QueryResultPage]: - time_interval = TimeInterval.parse(temporal_extent) - filters = QueryFilters(time_interval, SpatialFilter.parse(spatial_extent) if spatial_extent else None) - - request = partial(self._query_page, filters, skip_data) - - initial_page = Pagination(limit=page_size) - pages = paginated_request(request, initial_page) - - if callable(show_progress): - pages = with_time_progress_callback(pages, time_interval, show_progress) - elif show_progress: - message = f"Fetching {self._dataset.name}" - pages = with_time_progressbar(pages, time_interval, message) - - async for page in pages: + async for page in _iter_query_pages( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self._dataset.name, + show_progress=show_progress, + page_size=page_size, + ): yield page - async def _query_page( - self, filters: QueryFilters, skip_data: bool, page: PaginationProtocol | None = None - ) -> QueryResultPage: - query_page = Pagination(page.limit, page.starting_after) if page else Pagination() - return await self._dataset._service.query([self._collection.id], filters, skip_data, query_page) - async def ingest( self, data: IngestionData, @@ -477,6 +598,47 @@ async def delete(self, datapoints: DatapointIDs, *, show_progress: bool | Progre return num_deleted +async def _query_page( # noqa: PLR0913 + service: TileboxDatasetService, + dataset_id: UUID, + collection_ids: list[UUID] | None, + filters: QueryFilters, + skip_data: bool, + page: PaginationProtocol | None = None, +) -> QueryResultPage: + query_page = Pagination(page.limit, page.starting_after) if page else Pagination() + return await service.query(dataset_id, collection_ids or [], filters, skip_data, query_page) + + +async def _iter_query_pages( # noqa: PLR0913 + service: TileboxDatasetService, + dataset_id: UUID, + collection_ids: list[UUID] | None, + temporal_extent: TimeIntervalLike, + spatial_extent: SpatialFilterLike | None = None, + skip_data: bool = False, + *, + dataset_name: str, + show_progress: bool | ProgressCallback = False, + page_size: int | None = None, +) -> AsyncIterator[QueryResultPage]: + time_interval = TimeInterval.parse(temporal_extent) + filters = QueryFilters(time_interval, SpatialFilter.parse(spatial_extent) if spatial_extent else None) + + request = partial(_query_page, service, dataset_id, collection_ids, filters, skip_data) + + initial_page = Pagination(limit=page_size) + pages = paginated_request(request, initial_page) + + if callable(show_progress): + pages = with_time_progress_callback(pages, time_interval, show_progress) + elif show_progress: + pages = with_time_progressbar(pages, time_interval, f"Fetching {dataset_name}") + + async for page in pages: + yield page + + async def _convert_to_dataset(pages: AsyncIterator[QueryResultPage], skip_empty_fields: bool = False) -> xr.Dataset: """ Convert an async iterator of QueryResultPages into a single xarray Dataset diff --git a/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.py b/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.py index dc2d153..43f1a6b 100644 --- a/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.py +++ b/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.py @@ -29,7 +29,7 @@ from tilebox.datasets.tilebox.v1 import query_pb2 as tilebox_dot_v1_dot_query__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x64\x61tasets/v1/data_access.proto\x12\x0b\x64\x61tasets.v1\x1a\x1b\x62uf/validate/validate.proto\x1a\x16\x64\x61tasets/v1/core.proto\x1a\"datasets/v1/well_known_types.proto\x1a\x13tilebox/v1/id.proto\x1a\x16tilebox/v1/query.proto\"\xbd\x02\n\x1cGetDatasetForIntervalRequest\x12#\n\rcollection_id\x18\x01 \x01(\tR\x0c\x63ollectionId\x12=\n\rtime_interval\x18\x02 \x01(\x0b\x32\x18.tilebox.v1.TimeIntervalR\x0ctimeInterval\x12\x45\n\x12\x64\x61tapoint_interval\x18\x06 \x01(\x0b\x32\x16.tilebox.v1.IDIntervalR\x11\x64\x61tapointInterval\x12\x38\n\x04page\x18\x03 \x01(\x0b\x32\x1d.datasets.v1.LegacyPaginationB\x05\xaa\x01\x02\x08\x01R\x04page\x12\x1b\n\tskip_data\x18\x04 \x01(\x08R\x08skipData\x12\x1b\n\tskip_meta\x18\x05 \x01(\x08R\x08skipMeta\"k\n\x17GetDatapointByIdRequest\x12#\n\rcollection_id\x18\x01 \x01(\tR\x0c\x63ollectionId\x12\x0e\n\x02id\x18\x02 \x01(\tR\x02id\x12\x1b\n\tskip_data\x18\x03 \x01(\x08R\x08skipData\"\x9a\x01\n\x10QueryByIDRequest\x12\x41\n\x0e\x63ollection_ids\x18\x01 \x03(\x0b\x32\x0e.tilebox.v1.IDB\n\xbaH\x07\x92\x01\x04\x08\x01\x10\x64R\rcollectionIds\x12&\n\x02id\x18\x02 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\x12\x1b\n\tskip_data\x18\x03 \x01(\x08R\x08skipData\"\x83\x02\n\x0cQueryFilters\x12=\n\rtime_interval\x18\x01 \x01(\x0b\x32\x18.tilebox.v1.TimeIntervalR\x0ctimeInterval\x12\x45\n\x12\x64\x61tapoint_interval\x18\x02 \x01(\x0b\x32\x16.tilebox.v1.IDIntervalR\x11\x64\x61tapointInterval\x12\x41\n\x0espatial_extent\x18\x03 \x01(\x0b\x32\x1a.datasets.v1.SpatialFilterR\rspatialExtent:*\xbaH\'\"%\n\rtime_interval\n\x12\x64\x61tapoint_interval\x10\x01\"\xe5\x01\n\rSpatialFilter\x12\x39\n\x08geometry\x18\x01 \x01(\x0b\x32\x15.datasets.v1.GeometryB\x06\xbaH\x03\xc8\x01\x01R\x08geometry\x12<\n\x04mode\x18\x02 \x01(\x0e\x32\x1e.datasets.v1.SpatialFilterModeB\x08\xbaH\x05\x82\x01\x02\x10\x01R\x04mode\x12[\n\x11\x63oordinate_system\x18\x03 \x01(\x0e\x32$.datasets.v1.SpatialCoordinateSystemB\x08\xbaH\x05\x82\x01\x02\x10\x01R\x10\x63oordinateSystem\"\xd6\x01\n\x0cQueryRequest\x12\x41\n\x0e\x63ollection_ids\x18\x01 \x03(\x0b\x32\x0e.tilebox.v1.IDB\n\xbaH\x07\x92\x01\x04\x08\x01\x10\x64R\rcollectionIds\x12\x33\n\x07\x66ilters\x18\x02 \x01(\x0b\x32\x19.datasets.v1.QueryFiltersR\x07\x66ilters\x12\x31\n\x04page\x18\x03 \x01(\x0b\x32\x16.tilebox.v1.PaginationB\x05\xaa\x01\x02\x08\x01R\x04page\x12\x1b\n\tskip_data\x18\x04 \x01(\x08R\x08skipData\"{\n\x0fQueryResultPage\x12,\n\x04\x64\x61ta\x18\x01 \x01(\x0b\x32\x18.datasets.v1.RepeatedAnyR\x04\x64\x61ta\x12:\n\tnext_page\x18\x02 \x01(\x0b\x32\x16.tilebox.v1.PaginationB\x05\xaa\x01\x02\x08\x01R\x08nextPage*~\n\x11SpatialFilterMode\x12#\n\x1fSPATIAL_FILTER_MODE_UNSPECIFIED\x10\x00\x12\"\n\x1eSPATIAL_FILTER_MODE_INTERSECTS\x10\x01\x12 \n\x1cSPATIAL_FILTER_MODE_CONTAINS\x10\x02*\x96\x01\n\x17SpatialCoordinateSystem\x12)\n%SPATIAL_COORDINATE_SYSTEM_UNSPECIFIED\x10\x00\x12\'\n#SPATIAL_COORDINATE_SYSTEM_CARTESIAN\x10\x01\x12\'\n#SPATIAL_COORDINATE_SYSTEM_SPHERICAL\x10\x02\x32\xcd\x02\n\x11\x44\x61taAccessService\x12`\n\x15GetDatasetForInterval\x12).datasets.v1.GetDatasetForIntervalRequest\x1a\x1a.datasets.v1.DatapointPage\"\x00\x12R\n\x10GetDatapointByID\x12$.datasets.v1.GetDatapointByIdRequest\x1a\x16.datasets.v1.Datapoint\"\x00\x12>\n\tQueryByID\x12\x1d.datasets.v1.QueryByIDRequest\x1a\x10.datasets.v1.Any\"\x00\x12\x42\n\x05Query\x12\x19.datasets.v1.QueryRequest\x1a\x1c.datasets.v1.QueryResultPage\"\x00\x42t\n\x0f\x63om.datasets.v1B\x0f\x44\x61taAccessProtoP\x01\xa2\x02\x03\x44XX\xaa\x02\x0b\x44\x61tasets.V1\xca\x02\x0b\x44\x61tasets\\V1\xe2\x02\x17\x44\x61tasets\\V1\\GPBMetadata\xea\x02\x0c\x44\x61tasets::V1\x92\x03\x02\x08\x02\x62\x08\x65\x64itionsp\xe8\x07') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1d\x64\x61tasets/v1/data_access.proto\x12\x0b\x64\x61tasets.v1\x1a\x1b\x62uf/validate/validate.proto\x1a\x16\x64\x61tasets/v1/core.proto\x1a\"datasets/v1/well_known_types.proto\x1a\x13tilebox/v1/id.proto\x1a\x16tilebox/v1/query.proto\"\xbd\x02\n\x1cGetDatasetForIntervalRequest\x12#\n\rcollection_id\x18\x01 \x01(\tR\x0c\x63ollectionId\x12=\n\rtime_interval\x18\x02 \x01(\x0b\x32\x18.tilebox.v1.TimeIntervalR\x0ctimeInterval\x12\x45\n\x12\x64\x61tapoint_interval\x18\x06 \x01(\x0b\x32\x16.tilebox.v1.IDIntervalR\x11\x64\x61tapointInterval\x12\x38\n\x04page\x18\x03 \x01(\x0b\x32\x1d.datasets.v1.LegacyPaginationB\x05\xaa\x01\x02\x08\x01R\x04page\x12\x1b\n\tskip_data\x18\x04 \x01(\x08R\x08skipData\x12\x1b\n\tskip_meta\x18\x05 \x01(\x08R\x08skipMeta\"k\n\x17GetDatapointByIdRequest\x12#\n\rcollection_id\x18\x01 \x01(\tR\x0c\x63ollectionId\x12\x0e\n\x02id\x18\x02 \x01(\tR\x02id\x12\x1b\n\tskip_data\x18\x03 \x01(\x08R\x08skipData\"\xc9\x01\n\x10QueryByIDRequest\x12-\n\ndataset_id\x18\x04 \x01(\x0b\x32\x0e.tilebox.v1.IDR\tdatasetId\x12\x41\n\x0e\x63ollection_ids\x18\x01 \x03(\x0b\x32\x0e.tilebox.v1.IDB\n\xbaH\x07\x92\x01\x04\x08\x00\x10\x64R\rcollectionIds\x12&\n\x02id\x18\x02 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\x12\x1b\n\tskip_data\x18\x03 \x01(\x08R\x08skipData\"\x83\x02\n\x0cQueryFilters\x12=\n\rtime_interval\x18\x01 \x01(\x0b\x32\x18.tilebox.v1.TimeIntervalR\x0ctimeInterval\x12\x45\n\x12\x64\x61tapoint_interval\x18\x02 \x01(\x0b\x32\x16.tilebox.v1.IDIntervalR\x11\x64\x61tapointInterval\x12\x41\n\x0espatial_extent\x18\x03 \x01(\x0b\x32\x1a.datasets.v1.SpatialFilterR\rspatialExtent:*\xbaH\'\"%\n\rtime_interval\n\x12\x64\x61tapoint_interval\x10\x01\"\xe5\x01\n\rSpatialFilter\x12\x39\n\x08geometry\x18\x01 \x01(\x0b\x32\x15.datasets.v1.GeometryB\x06\xbaH\x03\xc8\x01\x01R\x08geometry\x12<\n\x04mode\x18\x02 \x01(\x0e\x32\x1e.datasets.v1.SpatialFilterModeB\x08\xbaH\x05\x82\x01\x02\x10\x01R\x04mode\x12[\n\x11\x63oordinate_system\x18\x03 \x01(\x0e\x32$.datasets.v1.SpatialCoordinateSystemB\x08\xbaH\x05\x82\x01\x02\x10\x01R\x10\x63oordinateSystem\"\x85\x02\n\x0cQueryRequest\x12-\n\ndataset_id\x18\x05 \x01(\x0b\x32\x0e.tilebox.v1.IDR\tdatasetId\x12\x41\n\x0e\x63ollection_ids\x18\x01 \x03(\x0b\x32\x0e.tilebox.v1.IDB\n\xbaH\x07\x92\x01\x04\x08\x00\x10\x64R\rcollectionIds\x12\x33\n\x07\x66ilters\x18\x02 \x01(\x0b\x32\x19.datasets.v1.QueryFiltersR\x07\x66ilters\x12\x31\n\x04page\x18\x03 \x01(\x0b\x32\x16.tilebox.v1.PaginationB\x05\xaa\x01\x02\x08\x01R\x04page\x12\x1b\n\tskip_data\x18\x04 \x01(\x08R\x08skipData\"{\n\x0fQueryResultPage\x12,\n\x04\x64\x61ta\x18\x01 \x01(\x0b\x32\x18.datasets.v1.RepeatedAnyR\x04\x64\x61ta\x12:\n\tnext_page\x18\x02 \x01(\x0b\x32\x16.tilebox.v1.PaginationB\x05\xaa\x01\x02\x08\x01R\x08nextPage*~\n\x11SpatialFilterMode\x12#\n\x1fSPATIAL_FILTER_MODE_UNSPECIFIED\x10\x00\x12\"\n\x1eSPATIAL_FILTER_MODE_INTERSECTS\x10\x01\x12 \n\x1cSPATIAL_FILTER_MODE_CONTAINS\x10\x02*\x96\x01\n\x17SpatialCoordinateSystem\x12)\n%SPATIAL_COORDINATE_SYSTEM_UNSPECIFIED\x10\x00\x12\'\n#SPATIAL_COORDINATE_SYSTEM_CARTESIAN\x10\x01\x12\'\n#SPATIAL_COORDINATE_SYSTEM_SPHERICAL\x10\x02\x32\xcd\x02\n\x11\x44\x61taAccessService\x12`\n\x15GetDatasetForInterval\x12).datasets.v1.GetDatasetForIntervalRequest\x1a\x1a.datasets.v1.DatapointPage\"\x00\x12R\n\x10GetDatapointByID\x12$.datasets.v1.GetDatapointByIdRequest\x1a\x16.datasets.v1.Datapoint\"\x00\x12>\n\tQueryByID\x12\x1d.datasets.v1.QueryByIDRequest\x1a\x10.datasets.v1.Any\"\x00\x12\x42\n\x05Query\x12\x19.datasets.v1.QueryRequest\x1a\x1c.datasets.v1.QueryResultPage\"\x00\x42t\n\x0f\x63om.datasets.v1B\x0f\x44\x61taAccessProtoP\x01\xa2\x02\x03\x44XX\xaa\x02\x0b\x44\x61tasets.V1\xca\x02\x0b\x44\x61tasets\\V1\xe2\x02\x17\x44\x61tasets\\V1\\GPBMetadata\xea\x02\x0c\x44\x61tasets::V1\x92\x03\x02\x08\x02\x62\x08\x65\x64itionsp\xe8\x07') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -40,7 +40,7 @@ _globals['_GETDATASETFORINTERVALREQUEST'].fields_by_name['page']._loaded_options = None _globals['_GETDATASETFORINTERVALREQUEST'].fields_by_name['page']._serialized_options = b'\252\001\002\010\001' _globals['_QUERYBYIDREQUEST'].fields_by_name['collection_ids']._loaded_options = None - _globals['_QUERYBYIDREQUEST'].fields_by_name['collection_ids']._serialized_options = b'\272H\007\222\001\004\010\001\020d' + _globals['_QUERYBYIDREQUEST'].fields_by_name['collection_ids']._serialized_options = b'\272H\007\222\001\004\010\000\020d' _globals['_QUERYBYIDREQUEST'].fields_by_name['id']._loaded_options = None _globals['_QUERYBYIDREQUEST'].fields_by_name['id']._serialized_options = b'\272H\003\310\001\001' _globals['_QUERYFILTERS']._loaded_options = None @@ -52,29 +52,29 @@ _globals['_SPATIALFILTER'].fields_by_name['coordinate_system']._loaded_options = None _globals['_SPATIALFILTER'].fields_by_name['coordinate_system']._serialized_options = b'\272H\005\202\001\002\020\001' _globals['_QUERYREQUEST'].fields_by_name['collection_ids']._loaded_options = None - _globals['_QUERYREQUEST'].fields_by_name['collection_ids']._serialized_options = b'\272H\007\222\001\004\010\001\020d' + _globals['_QUERYREQUEST'].fields_by_name['collection_ids']._serialized_options = b'\272H\007\222\001\004\010\000\020d' _globals['_QUERYREQUEST'].fields_by_name['page']._loaded_options = None _globals['_QUERYREQUEST'].fields_by_name['page']._serialized_options = b'\252\001\002\010\001' _globals['_QUERYRESULTPAGE'].fields_by_name['next_page']._loaded_options = None _globals['_QUERYRESULTPAGE'].fields_by_name['next_page']._serialized_options = b'\252\001\002\010\001' - _globals['_SPATIALFILTERMODE']._serialized_start=1602 - _globals['_SPATIALFILTERMODE']._serialized_end=1728 - _globals['_SPATIALCOORDINATESYSTEM']._serialized_start=1731 - _globals['_SPATIALCOORDINATESYSTEM']._serialized_end=1881 + _globals['_SPATIALFILTERMODE']._serialized_start=1696 + _globals['_SPATIALFILTERMODE']._serialized_end=1822 + _globals['_SPATIALCOORDINATESYSTEM']._serialized_start=1825 + _globals['_SPATIALCOORDINATESYSTEM']._serialized_end=1975 _globals['_GETDATASETFORINTERVALREQUEST']._serialized_start=181 _globals['_GETDATASETFORINTERVALREQUEST']._serialized_end=498 _globals['_GETDATAPOINTBYIDREQUEST']._serialized_start=500 _globals['_GETDATAPOINTBYIDREQUEST']._serialized_end=607 _globals['_QUERYBYIDREQUEST']._serialized_start=610 - _globals['_QUERYBYIDREQUEST']._serialized_end=764 - _globals['_QUERYFILTERS']._serialized_start=767 - _globals['_QUERYFILTERS']._serialized_end=1026 - _globals['_SPATIALFILTER']._serialized_start=1029 - _globals['_SPATIALFILTER']._serialized_end=1258 - _globals['_QUERYREQUEST']._serialized_start=1261 - _globals['_QUERYREQUEST']._serialized_end=1475 - _globals['_QUERYRESULTPAGE']._serialized_start=1477 - _globals['_QUERYRESULTPAGE']._serialized_end=1600 - _globals['_DATAACCESSSERVICE']._serialized_start=1884 - _globals['_DATAACCESSSERVICE']._serialized_end=2217 + _globals['_QUERYBYIDREQUEST']._serialized_end=811 + _globals['_QUERYFILTERS']._serialized_start=814 + _globals['_QUERYFILTERS']._serialized_end=1073 + _globals['_SPATIALFILTER']._serialized_start=1076 + _globals['_SPATIALFILTER']._serialized_end=1305 + _globals['_QUERYREQUEST']._serialized_start=1308 + _globals['_QUERYREQUEST']._serialized_end=1569 + _globals['_QUERYRESULTPAGE']._serialized_start=1571 + _globals['_QUERYRESULTPAGE']._serialized_end=1694 + _globals['_DATAACCESSSERVICE']._serialized_start=1978 + _globals['_DATAACCESSSERVICE']._serialized_end=2311 # @@protoc_insertion_point(module_scope) diff --git a/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.pyi b/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.pyi index fab8d67..fdf5211 100644 --- a/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.pyi +++ b/tilebox-datasets/tilebox/datasets/datasets/v1/data_access_pb2.pyi @@ -57,14 +57,16 @@ class GetDatapointByIdRequest(_message.Message): def __init__(self, collection_id: _Optional[str] = ..., id: _Optional[str] = ..., skip_data: bool = ...) -> None: ... class QueryByIDRequest(_message.Message): - __slots__ = ("collection_ids", "id", "skip_data") + __slots__ = ("dataset_id", "collection_ids", "id", "skip_data") + DATASET_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_IDS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] SKIP_DATA_FIELD_NUMBER: _ClassVar[int] + dataset_id: _id_pb2.ID collection_ids: _containers.RepeatedCompositeFieldContainer[_id_pb2.ID] id: _id_pb2.ID skip_data: bool - def __init__(self, collection_ids: _Optional[_Iterable[_Union[_id_pb2.ID, _Mapping]]] = ..., id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., skip_data: bool = ...) -> None: ... + def __init__(self, dataset_id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., collection_ids: _Optional[_Iterable[_Union[_id_pb2.ID, _Mapping]]] = ..., id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., skip_data: bool = ...) -> None: ... class QueryFilters(_message.Message): __slots__ = ("time_interval", "datapoint_interval", "spatial_extent") @@ -87,16 +89,18 @@ class SpatialFilter(_message.Message): def __init__(self, geometry: _Optional[_Union[_well_known_types_pb2.Geometry, _Mapping]] = ..., mode: _Optional[_Union[SpatialFilterMode, str]] = ..., coordinate_system: _Optional[_Union[SpatialCoordinateSystem, str]] = ...) -> None: ... class QueryRequest(_message.Message): - __slots__ = ("collection_ids", "filters", "page", "skip_data") + __slots__ = ("dataset_id", "collection_ids", "filters", "page", "skip_data") + DATASET_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_IDS_FIELD_NUMBER: _ClassVar[int] FILTERS_FIELD_NUMBER: _ClassVar[int] PAGE_FIELD_NUMBER: _ClassVar[int] SKIP_DATA_FIELD_NUMBER: _ClassVar[int] + dataset_id: _id_pb2.ID collection_ids: _containers.RepeatedCompositeFieldContainer[_id_pb2.ID] filters: QueryFilters page: _query_pb2.Pagination skip_data: bool - def __init__(self, collection_ids: _Optional[_Iterable[_Union[_id_pb2.ID, _Mapping]]] = ..., filters: _Optional[_Union[QueryFilters, _Mapping]] = ..., page: _Optional[_Union[_query_pb2.Pagination, _Mapping]] = ..., skip_data: bool = ...) -> None: ... + def __init__(self, dataset_id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., collection_ids: _Optional[_Iterable[_Union[_id_pb2.ID, _Mapping]]] = ..., filters: _Optional[_Union[QueryFilters, _Mapping]] = ..., page: _Optional[_Union[_query_pb2.Pagination, _Mapping]] = ..., skip_data: bool = ...) -> None: ... class QueryResultPage(_message.Message): __slots__ = ("data", "next_page") diff --git a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.py b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.py index 3deb217..9ec5ac1 100644 --- a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.py +++ b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.py @@ -28,7 +28,7 @@ from tilebox.datasets.tilebox.v1 import id_pb2 as tilebox_dot_v1_dot_id__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x64\x61tasets/v1/datasets.proto\x12\x0b\x64\x61tasets.v1\x1a\x1b\x62uf/validate/validate.proto\x1a\x16\x64\x61tasets/v1/core.proto\x1a\x1e\x64\x61tasets/v1/dataset_type.proto\x1a\x13tilebox/v1/id.proto\"\xa9\x01\n\x14\x43reateDatasetRequest\x12\x1b\n\x04name\x18\x01 \x01(\tB\x07\xbaH\x04r\x02\x10\x01R\x04name\x12\x34\n\x04type\x18\x02 \x01(\x0b\x32\x18.datasets.v1.DatasetTypeB\x06\xbaH\x03\xc8\x01\x01R\x04type\x12\x18\n\x07summary\x18\x03 \x01(\tR\x07summary\x12$\n\tcode_name\x18\x04 \x01(\tB\x07\xbaH\x04r\x02\x10\x01R\x08\x63odeName\"\x86\x01\n\x11GetDatasetRequest\x12>\n\x04slug\x18\x01 \x01(\tB*\xbaH\'r%2#^[a-zA-Z0-9_]+(?:\\.[a-zA-Z0-9_]+)*$R\x04slug\x12\x1e\n\x02id\x18\x02 \x01(\x0b\x32\x0e.tilebox.v1.IDR\x02id:\x11\xbaH\x0e\"\x0c\n\x04slug\n\x02id\x10\x01\"\xb2\x01\n\x14UpdateDatasetRequest\x12&\n\x02id\x18\x01 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\x12\x1b\n\x04name\x18\x02 \x01(\tB\x07\xbaH\x04r\x02\x10\x01R\x04name\x12\x34\n\x04type\x18\x03 \x01(\x0b\x32\x18.datasets.v1.DatasetTypeB\x06\xbaH\x03\xc8\x01\x01R\x04type\x12\x1f\n\x07summary\x18\x04 \x01(\tB\x05\xaa\x01\x02\x08\x01R\x07summary\"t\n\nClientInfo\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12 \n\x0b\x65nvironment\x18\x02 \x01(\tR\x0b\x65nvironment\x12\x30\n\x08packages\x18\x03 \x03(\x0b\x32\x14.datasets.v1.PackageR\x08packages\"7\n\x07Package\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x18\n\x07version\x18\x02 \x01(\tR\x07version\"k\n\x1fUpdateDatasetDescriptionRequest\x12&\n\x02id\x18\x01 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\x12 \n\x0b\x64\x65scription\x18\x02 \x01(\tR\x0b\x64\x65scription\">\n\x14\x44\x65leteDatasetRequest\x12&\n\x02id\x18\x01 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\"1\n\x15\x44\x65leteDatasetResponse\x12\x18\n\x07trashed\x18\x01 \x01(\x08R\x07trashed\"O\n\x13ListDatasetsRequest\x12\x38\n\x0b\x63lient_info\x18\x01 \x01(\x0b\x32\x17.datasets.v1.ClientInfoR\nclientInfo\"\x86\x02\n\x14ListDatasetsResponse\x12\x30\n\x08\x64\x61tasets\x18\x01 \x03(\x0b\x32\x14.datasets.v1.DatasetR\x08\x64\x61tasets\x12\x31\n\x06groups\x18\x02 \x03(\x0b\x32\x19.datasets.v1.DatasetGroupR\x06groups\x12%\n\x0eserver_message\x18\x03 \x01(\tR\rserverMessage\x12%\n\x0eowned_datasets\x18\x04 \x01(\x03R\rownedDatasets\x12;\n\x16maximum_owned_datasets\x18\x05 \x01(\x03\x42\x05\xaa\x01\x02\x08\x01R\x14maximumOwnedDatasets2\x81\x04\n\x0e\x44\x61tasetService\x12J\n\rCreateDataset\x12!.datasets.v1.CreateDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12\x44\n\nGetDataset\x12\x1e.datasets.v1.GetDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12J\n\rUpdateDataset\x12!.datasets.v1.UpdateDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12`\n\x18UpdateDatasetDescription\x12,.datasets.v1.UpdateDatasetDescriptionRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12X\n\rDeleteDataset\x12!.datasets.v1.DeleteDatasetRequest\x1a\".datasets.v1.DeleteDatasetResponse\"\x00\x12U\n\x0cListDatasets\x12 .datasets.v1.ListDatasetsRequest\x1a!.datasets.v1.ListDatasetsResponse\"\x00\x42r\n\x0f\x63om.datasets.v1B\rDatasetsProtoP\x01\xa2\x02\x03\x44XX\xaa\x02\x0b\x44\x61tasets.V1\xca\x02\x0b\x44\x61tasets\\V1\xe2\x02\x17\x44\x61tasets\\V1\\GPBMetadata\xea\x02\x0c\x44\x61tasets::V1\x92\x03\x02\x08\x02\x62\x08\x65\x64itionsp\xe8\x07') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1a\x64\x61tasets/v1/datasets.proto\x12\x0b\x64\x61tasets.v1\x1a\x1b\x62uf/validate/validate.proto\x1a\x16\x64\x61tasets/v1/core.proto\x1a\x1e\x64\x61tasets/v1/dataset_type.proto\x1a\x13tilebox/v1/id.proto\"\xfe\x01\n\x14\x43reateDatasetRequest\x12 \n\x04name\x18\x01 \x01(\tB\x0c\xbaH\tr\x04\x10\x01\x18\x64\xc8\x01\x01R\x04name\x12\x34\n\x04type\x18\x02 \x01(\x0b\x32\x18.datasets.v1.DatasetTypeB\x06\xbaH\x03\xc8\x01\x01R\x04type\x12#\n\x07summary\x18\x03 \x01(\tB\t\xbaH\x06r\x04\x18\xd0\x86\x03R\x07summary\x12<\n\tcode_name\x18\x04 \x01(\tB\x1f\xbaH\x1cr\x17\x10\x01\x18\x64\x32\x11^[a-z][a-z0-9_]*$\xc8\x01\x01R\x08\x63odeName\x12+\n\x0b\x64\x65scription\x18\x05 \x01(\tB\t\xbaH\x06r\x04\x18\xc0\x84=R\x0b\x64\x65scription\"\x86\x01\n\x11GetDatasetRequest\x12>\n\x04slug\x18\x01 \x01(\tB*\xbaH\'r%2#^[a-zA-Z0-9_]+(?:\\.[a-zA-Z0-9_]+)*$R\x04slug\x12\x1e\n\x02id\x18\x02 \x01(\x0b\x32\x0e.tilebox.v1.IDR\x02id:\x11\xbaH\x0e\"\x0c\n\x04slug\n\x02id\x10\x01\"\xb3\x02\n\x14UpdateDatasetRequest\x12&\n\x02id\x18\x01 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\x12\"\n\x04name\x18\x02 \x01(\tB\x0e\xaa\x01\x02\x08\x01\xbaH\x06r\x04\x10\x01\x18\x64R\x04name\x12\x33\n\x04type\x18\x03 \x01(\x0b\x32\x18.datasets.v1.DatasetTypeB\x05\xaa\x01\x02\x08\x01R\x04type\x12(\n\x07summary\x18\x04 \x01(\tB\x0e\xaa\x01\x02\x08\x01\xbaH\x06r\x04\x18\xd0\x86\x03R\x07summary\x12>\n\tcode_name\x18\x05 \x01(\tB!\xaa\x01\x02\x08\x01\xbaH\x19r\x17\x10\x01\x18\x64\x32\x11^[a-z][a-z0-9_]*$R\x08\x63odeName\x12\x30\n\x0b\x64\x65scription\x18\x06 \x01(\tB\x0e\xaa\x01\x02\x08\x01\xbaH\x06r\x04\x18\xc0\x84=R\x0b\x64\x65scription\"t\n\nClientInfo\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12 \n\x0b\x65nvironment\x18\x02 \x01(\tR\x0b\x65nvironment\x12\x30\n\x08packages\x18\x03 \x03(\x0b\x32\x14.datasets.v1.PackageR\x08packages\"7\n\x07Package\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x18\n\x07version\x18\x02 \x01(\tR\x07version\">\n\x14\x44\x65leteDatasetRequest\x12&\n\x02id\x18\x01 \x01(\x0b\x32\x0e.tilebox.v1.IDB\x06\xbaH\x03\xc8\x01\x01R\x02id\"1\n\x15\x44\x65leteDatasetResponse\x12\x18\n\x07trashed\x18\x01 \x01(\x08R\x07trashed\"O\n\x13ListDatasetsRequest\x12\x38\n\x0b\x63lient_info\x18\x01 \x01(\x0b\x32\x17.datasets.v1.ClientInfoR\nclientInfo\"\x86\x02\n\x14ListDatasetsResponse\x12\x30\n\x08\x64\x61tasets\x18\x01 \x03(\x0b\x32\x14.datasets.v1.DatasetR\x08\x64\x61tasets\x12\x31\n\x06groups\x18\x02 \x03(\x0b\x32\x19.datasets.v1.DatasetGroupR\x06groups\x12%\n\x0eserver_message\x18\x03 \x01(\tR\rserverMessage\x12%\n\x0eowned_datasets\x18\x04 \x01(\x03R\rownedDatasets\x12;\n\x16maximum_owned_datasets\x18\x05 \x01(\x03\x42\x05\xaa\x01\x02\x08\x01R\x14maximumOwnedDatasets2\x9f\x03\n\x0e\x44\x61tasetService\x12J\n\rCreateDataset\x12!.datasets.v1.CreateDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12\x44\n\nGetDataset\x12\x1e.datasets.v1.GetDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12J\n\rUpdateDataset\x12!.datasets.v1.UpdateDatasetRequest\x1a\x14.datasets.v1.Dataset\"\x00\x12X\n\rDeleteDataset\x12!.datasets.v1.DeleteDatasetRequest\x1a\".datasets.v1.DeleteDatasetResponse\"\x00\x12U\n\x0cListDatasets\x12 .datasets.v1.ListDatasetsRequest\x1a!.datasets.v1.ListDatasetsResponse\"\x00\x42r\n\x0f\x63om.datasets.v1B\rDatasetsProtoP\x01\xa2\x02\x03\x44XX\xaa\x02\x0b\x44\x61tasets.V1\xca\x02\x0b\x44\x61tasets\\V1\xe2\x02\x17\x44\x61tasets\\V1\\GPBMetadata\xea\x02\x0c\x44\x61tasets::V1\x92\x03\x02\x08\x02\x62\x08\x65\x64itionsp\xe8\x07') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -37,11 +37,15 @@ _globals['DESCRIPTOR']._loaded_options = None _globals['DESCRIPTOR']._serialized_options = b'\n\017com.datasets.v1B\rDatasetsProtoP\001\242\002\003DXX\252\002\013Datasets.V1\312\002\013Datasets\\V1\342\002\027Datasets\\V1\\GPBMetadata\352\002\014Datasets::V1\222\003\002\010\002' _globals['_CREATEDATASETREQUEST'].fields_by_name['name']._loaded_options = None - _globals['_CREATEDATASETREQUEST'].fields_by_name['name']._serialized_options = b'\272H\004r\002\020\001' + _globals['_CREATEDATASETREQUEST'].fields_by_name['name']._serialized_options = b'\272H\tr\004\020\001\030d\310\001\001' _globals['_CREATEDATASETREQUEST'].fields_by_name['type']._loaded_options = None _globals['_CREATEDATASETREQUEST'].fields_by_name['type']._serialized_options = b'\272H\003\310\001\001' + _globals['_CREATEDATASETREQUEST'].fields_by_name['summary']._loaded_options = None + _globals['_CREATEDATASETREQUEST'].fields_by_name['summary']._serialized_options = b'\272H\006r\004\030\320\206\003' _globals['_CREATEDATASETREQUEST'].fields_by_name['code_name']._loaded_options = None - _globals['_CREATEDATASETREQUEST'].fields_by_name['code_name']._serialized_options = b'\272H\004r\002\020\001' + _globals['_CREATEDATASETREQUEST'].fields_by_name['code_name']._serialized_options = b'\272H\034r\027\020\001\030d2\021^[a-z][a-z0-9_]*$\310\001\001' + _globals['_CREATEDATASETREQUEST'].fields_by_name['description']._loaded_options = None + _globals['_CREATEDATASETREQUEST'].fields_by_name['description']._serialized_options = b'\272H\006r\004\030\300\204=' _globals['_GETDATASETREQUEST'].fields_by_name['slug']._loaded_options = None _globals['_GETDATASETREQUEST'].fields_by_name['slug']._serialized_options = b'\272H\'r%2#^[a-zA-Z0-9_]+(?:\\.[a-zA-Z0-9_]+)*$' _globals['_GETDATASETREQUEST']._loaded_options = None @@ -49,37 +53,37 @@ _globals['_UPDATEDATASETREQUEST'].fields_by_name['id']._loaded_options = None _globals['_UPDATEDATASETREQUEST'].fields_by_name['id']._serialized_options = b'\272H\003\310\001\001' _globals['_UPDATEDATASETREQUEST'].fields_by_name['name']._loaded_options = None - _globals['_UPDATEDATASETREQUEST'].fields_by_name['name']._serialized_options = b'\272H\004r\002\020\001' + _globals['_UPDATEDATASETREQUEST'].fields_by_name['name']._serialized_options = b'\252\001\002\010\001\272H\006r\004\020\001\030d' _globals['_UPDATEDATASETREQUEST'].fields_by_name['type']._loaded_options = None - _globals['_UPDATEDATASETREQUEST'].fields_by_name['type']._serialized_options = b'\272H\003\310\001\001' + _globals['_UPDATEDATASETREQUEST'].fields_by_name['type']._serialized_options = b'\252\001\002\010\001' _globals['_UPDATEDATASETREQUEST'].fields_by_name['summary']._loaded_options = None - _globals['_UPDATEDATASETREQUEST'].fields_by_name['summary']._serialized_options = b'\252\001\002\010\001' - _globals['_UPDATEDATASETDESCRIPTIONREQUEST'].fields_by_name['id']._loaded_options = None - _globals['_UPDATEDATASETDESCRIPTIONREQUEST'].fields_by_name['id']._serialized_options = b'\272H\003\310\001\001' + _globals['_UPDATEDATASETREQUEST'].fields_by_name['summary']._serialized_options = b'\252\001\002\010\001\272H\006r\004\030\320\206\003' + _globals['_UPDATEDATASETREQUEST'].fields_by_name['code_name']._loaded_options = None + _globals['_UPDATEDATASETREQUEST'].fields_by_name['code_name']._serialized_options = b'\252\001\002\010\001\272H\031r\027\020\001\030d2\021^[a-z][a-z0-9_]*$' + _globals['_UPDATEDATASETREQUEST'].fields_by_name['description']._loaded_options = None + _globals['_UPDATEDATASETREQUEST'].fields_by_name['description']._serialized_options = b'\252\001\002\010\001\272H\006r\004\030\300\204=' _globals['_DELETEDATASETREQUEST'].fields_by_name['id']._loaded_options = None _globals['_DELETEDATASETREQUEST'].fields_by_name['id']._serialized_options = b'\272H\003\310\001\001' _globals['_LISTDATASETSRESPONSE'].fields_by_name['maximum_owned_datasets']._loaded_options = None _globals['_LISTDATASETSRESPONSE'].fields_by_name['maximum_owned_datasets']._serialized_options = b'\252\001\002\010\001' _globals['_CREATEDATASETREQUEST']._serialized_start=150 - _globals['_CREATEDATASETREQUEST']._serialized_end=319 - _globals['_GETDATASETREQUEST']._serialized_start=322 - _globals['_GETDATASETREQUEST']._serialized_end=456 - _globals['_UPDATEDATASETREQUEST']._serialized_start=459 - _globals['_UPDATEDATASETREQUEST']._serialized_end=637 - _globals['_CLIENTINFO']._serialized_start=639 - _globals['_CLIENTINFO']._serialized_end=755 - _globals['_PACKAGE']._serialized_start=757 - _globals['_PACKAGE']._serialized_end=812 - _globals['_UPDATEDATASETDESCRIPTIONREQUEST']._serialized_start=814 - _globals['_UPDATEDATASETDESCRIPTIONREQUEST']._serialized_end=921 - _globals['_DELETEDATASETREQUEST']._serialized_start=923 - _globals['_DELETEDATASETREQUEST']._serialized_end=985 - _globals['_DELETEDATASETRESPONSE']._serialized_start=987 - _globals['_DELETEDATASETRESPONSE']._serialized_end=1036 - _globals['_LISTDATASETSREQUEST']._serialized_start=1038 - _globals['_LISTDATASETSREQUEST']._serialized_end=1117 - _globals['_LISTDATASETSRESPONSE']._serialized_start=1120 - _globals['_LISTDATASETSRESPONSE']._serialized_end=1382 - _globals['_DATASETSERVICE']._serialized_start=1385 - _globals['_DATASETSERVICE']._serialized_end=1898 + _globals['_CREATEDATASETREQUEST']._serialized_end=404 + _globals['_GETDATASETREQUEST']._serialized_start=407 + _globals['_GETDATASETREQUEST']._serialized_end=541 + _globals['_UPDATEDATASETREQUEST']._serialized_start=544 + _globals['_UPDATEDATASETREQUEST']._serialized_end=851 + _globals['_CLIENTINFO']._serialized_start=853 + _globals['_CLIENTINFO']._serialized_end=969 + _globals['_PACKAGE']._serialized_start=971 + _globals['_PACKAGE']._serialized_end=1026 + _globals['_DELETEDATASETREQUEST']._serialized_start=1028 + _globals['_DELETEDATASETREQUEST']._serialized_end=1090 + _globals['_DELETEDATASETRESPONSE']._serialized_start=1092 + _globals['_DELETEDATASETRESPONSE']._serialized_end=1141 + _globals['_LISTDATASETSREQUEST']._serialized_start=1143 + _globals['_LISTDATASETSREQUEST']._serialized_end=1222 + _globals['_LISTDATASETSRESPONSE']._serialized_start=1225 + _globals['_LISTDATASETSRESPONSE']._serialized_end=1487 + _globals['_DATASETSERVICE']._serialized_start=1490 + _globals['_DATASETSERVICE']._serialized_end=1905 # @@protoc_insertion_point(module_scope) diff --git a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.pyi b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.pyi index 6eda097..fe64924 100644 --- a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.pyi +++ b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2.pyi @@ -11,16 +11,18 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor class CreateDatasetRequest(_message.Message): - __slots__ = ("name", "type", "summary", "code_name") + __slots__ = ("name", "type", "summary", "code_name", "description") NAME_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] SUMMARY_FIELD_NUMBER: _ClassVar[int] CODE_NAME_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] name: str type: _dataset_type_pb2.DatasetType summary: str code_name: str - def __init__(self, name: _Optional[str] = ..., type: _Optional[_Union[_dataset_type_pb2.DatasetType, _Mapping]] = ..., summary: _Optional[str] = ..., code_name: _Optional[str] = ...) -> None: ... + description: str + def __init__(self, name: _Optional[str] = ..., type: _Optional[_Union[_dataset_type_pb2.DatasetType, _Mapping]] = ..., summary: _Optional[str] = ..., code_name: _Optional[str] = ..., description: _Optional[str] = ...) -> None: ... class GetDatasetRequest(_message.Message): __slots__ = ("slug", "id") @@ -31,16 +33,20 @@ class GetDatasetRequest(_message.Message): def __init__(self, slug: _Optional[str] = ..., id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ...) -> None: ... class UpdateDatasetRequest(_message.Message): - __slots__ = ("id", "name", "type", "summary") + __slots__ = ("id", "name", "type", "summary", "code_name", "description") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] SUMMARY_FIELD_NUMBER: _ClassVar[int] + CODE_NAME_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] id: _id_pb2.ID name: str type: _dataset_type_pb2.DatasetType summary: str - def __init__(self, id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., name: _Optional[str] = ..., type: _Optional[_Union[_dataset_type_pb2.DatasetType, _Mapping]] = ..., summary: _Optional[str] = ...) -> None: ... + code_name: str + description: str + def __init__(self, id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., name: _Optional[str] = ..., type: _Optional[_Union[_dataset_type_pb2.DatasetType, _Mapping]] = ..., summary: _Optional[str] = ..., code_name: _Optional[str] = ..., description: _Optional[str] = ...) -> None: ... class ClientInfo(_message.Message): __slots__ = ("name", "environment", "packages") @@ -60,14 +66,6 @@ class Package(_message.Message): version: str def __init__(self, name: _Optional[str] = ..., version: _Optional[str] = ...) -> None: ... -class UpdateDatasetDescriptionRequest(_message.Message): - __slots__ = ("id", "description") - ID_FIELD_NUMBER: _ClassVar[int] - DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - id: _id_pb2.ID - description: str - def __init__(self, id: _Optional[_Union[_id_pb2.ID, _Mapping]] = ..., description: _Optional[str] = ...) -> None: ... - class DeleteDatasetRequest(_message.Message): __slots__ = ("id",) ID_FIELD_NUMBER: _ClassVar[int] diff --git a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2_grpc.py b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2_grpc.py index 5729c02..3d0a4c6 100644 --- a/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2_grpc.py +++ b/tilebox-datasets/tilebox/datasets/datasets/v1/datasets_pb2_grpc.py @@ -31,11 +31,6 @@ def __init__(self, channel): request_serializer=datasets_dot_v1_dot_datasets__pb2.UpdateDatasetRequest.SerializeToString, response_deserializer=datasets_dot_v1_dot_core__pb2.Dataset.FromString, _registered_method=True) - self.UpdateDatasetDescription = channel.unary_unary( - '/datasets.v1.DatasetService/UpdateDatasetDescription', - request_serializer=datasets_dot_v1_dot_datasets__pb2.UpdateDatasetDescriptionRequest.SerializeToString, - response_deserializer=datasets_dot_v1_dot_core__pb2.Dataset.FromString, - _registered_method=True) self.DeleteDataset = channel.unary_unary( '/datasets.v1.DatasetService/DeleteDataset', request_serializer=datasets_dot_v1_dot_datasets__pb2.DeleteDatasetRequest.SerializeToString, @@ -70,12 +65,6 @@ def UpdateDataset(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def UpdateDatasetDescription(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - def DeleteDataset(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) @@ -106,11 +95,6 @@ def add_DatasetServiceServicer_to_server(servicer, server): request_deserializer=datasets_dot_v1_dot_datasets__pb2.UpdateDatasetRequest.FromString, response_serializer=datasets_dot_v1_dot_core__pb2.Dataset.SerializeToString, ), - 'UpdateDatasetDescription': grpc.unary_unary_rpc_method_handler( - servicer.UpdateDatasetDescription, - request_deserializer=datasets_dot_v1_dot_datasets__pb2.UpdateDatasetDescriptionRequest.FromString, - response_serializer=datasets_dot_v1_dot_core__pb2.Dataset.SerializeToString, - ), 'DeleteDataset': grpc.unary_unary_rpc_method_handler( servicer.DeleteDataset, request_deserializer=datasets_dot_v1_dot_datasets__pb2.DeleteDatasetRequest.FromString, @@ -214,33 +198,6 @@ def UpdateDataset(request, metadata, _registered_method=True) - @staticmethod - def UpdateDatasetDescription(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/datasets.v1.DatasetService/UpdateDatasetDescription', - datasets_dot_v1_dot_datasets__pb2.UpdateDatasetDescriptionRequest.SerializeToString, - datasets_dot_v1_dot_core__pb2.Dataset.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - @staticmethod def DeleteDataset(request, target, diff --git a/tilebox-datasets/tilebox/datasets/service.py b/tilebox-datasets/tilebox/datasets/service.py index e9ca4dd..3c1b5cb 100644 --- a/tilebox-datasets/tilebox/datasets/service.py +++ b/tilebox-datasets/tilebox/datasets/service.py @@ -197,8 +197,15 @@ def get_collection_by_name( CollectionInfo.from_message, ) - def query_by_id(self, collection_ids: list[UUID], datapoint_id: UUID, skip_data: bool) -> Promise[AnyMessage]: + def query_by_id( + self, + dataset_id: UUID, + collection_ids: list[UUID], + datapoint_id: UUID, + skip_data: bool, + ) -> Promise[AnyMessage]: req = QueryByIDRequest( + dataset_id=must_uuid_to_uuid_message(dataset_id), collection_ids=list(map(must_uuid_to_uuid_message, collection_ids)), id=must_uuid_to_uuid_message(datapoint_id), skip_data=skip_data, @@ -207,12 +214,14 @@ def query_by_id(self, collection_ids: list[UUID], datapoint_id: UUID, skip_data: def query( self, + dataset_id: UUID, collection_ids: list[UUID], filters: QueryFilters, skip_data: bool, page: Pagination | None = None, ) -> Promise[QueryResultPage]: req = QueryRequest( + dataset_id=must_uuid_to_uuid_message(dataset_id), collection_ids=list(map(must_uuid_to_uuid_message, collection_ids)), filters=filters.to_message(), page=page.to_message() if page is not None else None, diff --git a/tilebox-datasets/tilebox/datasets/sync/dataset.py b/tilebox-datasets/tilebox/datasets/sync/dataset.py index 45ec3b0..7f95fa6 100644 --- a/tilebox-datasets/tilebox/datasets/sync/dataset.py +++ b/tilebox-datasets/tilebox/datasets/sync/dataset.py @@ -10,7 +10,7 @@ from _tilebox.grpc.pagination import Pagination as PaginationProtocol from _tilebox.grpc.pagination import paginated_request from _tilebox.grpc.producer_consumer import concurrent_producer_consumer -from tilebox.datasets.data.collection import CollectionInfo +from tilebox.datasets.data.collection import Collection, CollectionInfo from tilebox.datasets.data.data_access import QueryFilters, SpatialFilter, SpatialFilterLike from tilebox.datasets.data.datapoint import QueryResultPage from tilebox.datasets.data.datasets import Dataset @@ -140,6 +140,122 @@ def delete_collection(self, collection: "str | UUID | CollectionClient") -> None self._service.delete_collection(self._dataset.id, collection_id).get() + def find( + self, + datapoint_id: str | UUID, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | None" = None, + skip_data: bool = False, + ) -> xr.Dataset: + """ + Find a specific datapoint in one of the specified collections by its id. + + Args: + datapoint_id: The id of the datapoint to find. + collections: The collections to search in. Supports collection names, ids or collection objects. + If not specified, all collections in the dataset are searched. + skip_data: Whether to skip the actual data of the datapoint. If True, only + datapoint metadata is returned. + + Returns: + The datapoint as an xarray dataset. + """ + collection_ids = self._collection_ids(collections) + try: + datapoint = self._service.query_by_id( + self._dataset.id, + collection_ids, + as_uuid(datapoint_id), + skip_data, + ).get() + except ArgumentError: + raise ValueError(f"Invalid datapoint id: {datapoint_id} is not a valid UUID") from None + except NotFoundError: + raise NotFoundError(f"No such datapoint {datapoint_id}") from None + + message_type = get_message_type(datapoint.type_url) + data = message_type.FromString(datapoint.value) + + converter = MessageToXarrayConverter(initial_capacity=1) + converter.convert(data) + return converter.finalize("time", skip_empty_fields=skip_data).isel(time=0) + + def query( + self, + *, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | dict[str, CollectionClient] | None" = None, + temporal_extent: TimeIntervalLike, + spatial_extent: SpatialFilterLike | None = None, + skip_data: bool = False, + show_progress: bool | ProgressCallback = False, + ) -> xr.Dataset: + """ + Query datapoints in the specified collections and temporal extent. + + Args: + collections: The collections to query in. Supports collection names, ids or collection objects. + If not specified, all collections in the dataset are queried. + temporal_extent: The temporal extent to query data for. (Required) + spatial_extent: The spatial extent to query data in. (Optional) + skip_data: Whether to skip the actual data of the datapoint. If True, only + datapoint metadata is returned. + show_progress: Whether to show a progress bar while loading the data. + If a callable is specified it is used as callback to report progress percentages. + + Returns: + Matching datapoints in the given temporal and spatial extent as an xarray dataset. + """ + if temporal_extent is None: + raise ValueError("A temporal_extent for your query must be specified") + + collection_ids = self._collection_ids(collections) + pages = _iter_query_pages( + self._service, + self._dataset.id, + collection_ids, + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self.name, + show_progress=show_progress, + ) + return _convert_to_dataset(pages, skip_empty_fields=skip_data) + + def _collection_id(self, collection: "UUID | Collection | CollectionInfo | CollectionClient") -> UUID: + if isinstance(collection, CollectionClient): + return collection._collection.id + if isinstance(collection, CollectionInfo): + return collection.collection.id + if isinstance(collection, Collection): + return collection.id + return collection + + def _collection_ids( + self, + collections: "list[str] | list[UUID] | list[Collection] | list[CollectionInfo] | list[CollectionClient] | dict[str, CollectionClient] | None", + ) -> list[UUID]: + if collections is None: + return [] + + all_collections: list[CollectionInfo] = self._service.get_collections(self._dataset.id, True, True).get() + # find all valid collection names and ids + collections_by_name = {c.collection.name: c.collection.id for c in all_collections} + valid_collection_ids = {c.collection.id for c in all_collections} + + collection_ids: list[UUID] = [] + for collection in collections: + if isinstance(collection, str): + try: + collection_ids.append(collections_by_name[collection]) + except KeyError: + raise ValueError(f"Collection {collection} not found in dataset {self.name}") from None + else: + collection_id = self._collection_id(collection) + if collection_id not in valid_collection_ids: + raise ValueError(f"Collection {collection_id} is not part of the dataset {self.name}") + collection_ids.append(collection_id) + + return collection_ids + def __repr__(self) -> str: return f"{self.name} [Timeseries Dataset]: {self._dataset.summary}" @@ -215,7 +331,7 @@ def find(self, datapoint_id: str | UUID, skip_data: bool = False) -> xr.Dataset: """ try: datapoint = self._dataset._service.query_by_id( - [self._collection.id], as_uuid(datapoint_id), skip_data + self._dataset._dataset.id, [self._collection.id], as_uuid(datapoint_id), skip_data ).get() except ArgumentError: raise ValueError(f"Invalid datapoint id: {datapoint_id} is not a valid UUID") from None @@ -253,8 +369,14 @@ def _find_interval( filters = QueryFilters(temporal_extent=IDInterval.parse(datapoint_id_interval, end_inclusive=end_inclusive)) def request(page: PaginationProtocol) -> QueryResultPage: - query_page = Pagination(page.limit, page.starting_after) - return self._dataset._service.query([self._collection.id], filters, skip_data, query_page).get() + return _query_page( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + filters, + skip_data, + page, + ) initial_page = Pagination() pages = paginated_request(request, initial_page) @@ -343,7 +465,16 @@ def query( if temporal_extent is None: raise ValueError("A temporal_extent for your query must be specified") - pages = self._iter_pages(temporal_extent, spatial_extent, skip_data, show_progress=show_progress) + pages = _iter_query_pages( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self._dataset.name, + show_progress=show_progress, + ) return _convert_to_dataset(pages, skip_empty_fields=skip_data) def _iter_pages( @@ -354,27 +485,17 @@ def _iter_pages( show_progress: bool | ProgressCallback = False, page_size: int | None = None, ) -> Iterator[QueryResultPage]: - time_interval = TimeInterval.parse(temporal_extent) - filters = QueryFilters(time_interval, SpatialFilter.parse(spatial_extent) if spatial_extent else None) - - request = partial(self._query_page, filters, skip_data) - - initial_page = Pagination(limit=page_size) - pages = paginated_request(request, initial_page) - - if callable(show_progress): - pages = with_time_progress_callback(pages, time_interval, show_progress) - elif show_progress: - message = f"Fetching {self._dataset.name}" - pages = with_time_progressbar(pages, time_interval, message) - - yield from pages - - def _query_page( - self, filters: QueryFilters, skip_data: bool, page: PaginationProtocol | None = None - ) -> QueryResultPage: - query_page = Pagination(page.limit, page.starting_after) if page else Pagination() - return self._dataset._service.query([self._collection.id], filters, skip_data, query_page).get() + return _iter_query_pages( + self._dataset._service, + self._dataset._dataset.id, + [self._collection.id], + temporal_extent, + spatial_extent, + skip_data, + dataset_name=self._dataset.name, + show_progress=show_progress, + page_size=page_size, + ) def ingest( self, @@ -469,6 +590,46 @@ def delete(self, datapoints: DatapointIDs, *, show_progress: bool | ProgressCall return num_deleted +def _query_page( # noqa: PLR0913 + service: TileboxDatasetService, + dataset_id: UUID, + collection_ids: list[UUID] | None, + filters: QueryFilters, + skip_data: bool, + page: PaginationProtocol | None = None, +) -> QueryResultPage: + query_page = Pagination(page.limit, page.starting_after) if page else Pagination() + return service.query(dataset_id, collection_ids or [], filters, skip_data, query_page).get() + + +def _iter_query_pages( # noqa: PLR0913 + service: TileboxDatasetService, + dataset_id: UUID, + collection_ids: list[UUID] | None, + temporal_extent: TimeIntervalLike, + spatial_extent: SpatialFilterLike | None = None, + skip_data: bool = False, + *, + dataset_name: str, + show_progress: bool | ProgressCallback = False, + page_size: int | None = None, +) -> Iterator[QueryResultPage]: + time_interval = TimeInterval.parse(temporal_extent) + filters = QueryFilters(time_interval, SpatialFilter.parse(spatial_extent) if spatial_extent else None) + + request = partial(_query_page, service, dataset_id, collection_ids, filters, skip_data) + + initial_page = Pagination(limit=page_size) + pages = paginated_request(request, initial_page) + + if callable(show_progress): + pages = with_time_progress_callback(pages, time_interval, show_progress) + elif show_progress: + pages = with_time_progressbar(pages, time_interval, f"Fetching {dataset_name}") + + yield from pages + + def _convert_to_dataset(pages: Iterator[QueryResultPage], skip_empty_fields: bool = False) -> xr.Dataset: """ Convert an iterator of QueryResultPages into a single xarray Dataset From e98020e8ff5b050ee036bc514f9d630363fd1322 Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Fri, 6 Mar 2026 10:47:23 +0100 Subject: [PATCH 2/2] Prepare release v0.50 --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fa4987..f42ee30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.50.0] - 2026-03-06 + ### Added - `tilebox-datasets`: Added dataset-level `find` and `query` methods on both sync and async `DatasetClient` to query @@ -339,7 +341,8 @@ the first client that does not cache data (since it's already on the local file - Released under the [MIT](https://opensource.org/license/mit) license. - Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc` -[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.49.0...HEAD +[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.50.0...HEAD +[0.50.0]: https://github.com/tilebox/tilebox-python/compare/v0.49.0...v0.50.0 [0.49.0]: https://github.com/tilebox/tilebox-python/compare/v0.48.0...v0.49.0 [0.48.0]: https://github.com/tilebox/tilebox-python/compare/v0.47.0...v0.48.0 [0.47.0]: https://github.com/tilebox/tilebox-python/compare/v0.46.0...v0.47.0