Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ Release History

Unreleased Changes
------------------
* Added an optional ``spatial`` attribute for tables, archives, and
collections. The ``spatial`` attribute for rasters and vectors remains
required. Spatial information for Collections represents the union of the
bounding boxes of all items within a collection. Table and archive resources
have no spatial information by default, but the attributes can be set manually.
https://github.com/natcap/geometamaker/issues/93
* The Natural Capital Project changed its name to the Natural Capital Alliance.
References to the old name and website URL have been updated to reflect
this change. https://github.com/natcap/geometamaker/issues/115
Expand Down
33 changes: 22 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,40 @@ how to do the same thing, if possible, using the CLI.
```python
import geometamaker

# For a vector:
data_path = 'data/watershed_gura.shp'
resource = geometamaker.describe(data_path)
vector_resource = geometamaker.describe(data_path)

resource.set_title('My Dataset')
resource.set_description('all about my dataset')
resource.set_keywords(['hydrology', 'watersheds'])
vector_resource.set_title('My Dataset')
vector_resource.set_description('all about my dataset')
vector_resource.set_keywords(['hydrology', 'watersheds'])

# For a vector:
resource.set_field_description(
vector_resource.set_field_description(
'field_name', # the name of an actual field in the vector's table
description='something about the field',
units='mm')
vector_resource.write()

# or for a raster:
# For a raster:
data_path = 'data/dem.tif'
resource = geometamaker.describe(data_path)
resource.set_band_description(
raster_resource = geometamaker.describe(data_path)
raster_resource.set_band_description(
1, # a raster band index, starting at 1
description='something about the band',
units='mm')
raster_resource.write()


resource.write()
# For a CSV:
data_path = 'data/table.csv'
table_resource = geometamaker.describe(data_path)
table_resource.set_field_description(
'field_name', # the name of an actual field in the table
description='something about the field',
units='mm')
# A table does not have inherent spatial information, but the
# property may be set manually:
table_resource.set_spatial(raster_resource.spatial)
table_resource.write()
```
For a complete list of methods and attributes:
https://geometamaker.readthedocs.io/en/latest/index.html
Expand Down
4 changes: 2 additions & 2 deletions docs/environment-rtd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ name: env-readthedocs
channels:
- conda-forge
dependencies:
- python=3.10
- python=3.13
- pip
- frictionless
- fsspec
- gdal>=3
- numpy
- pygeoprocessing>=2.4.2
- pygeoprocessing>=2.4.5
- pyyaml
- sphinx_rtd_theme
- myst-parser
Expand Down
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import sys
import sphinx.ext.apidoc
from pkg_resources import get_distribution
from importlib.metadata import version

sys.path.insert(0, os.path.abspath('../../src'))

Expand Down Expand Up @@ -81,5 +81,5 @@ def process_module_specific_docstrings(app, what, name, obj, options, lines):
os.path.join(DOCS_SOURCE_DIR, '..', '..', 'src'),
])

release = get_distribution('geometamaker').version
release = version('geometamaker')
version = '.'.join(release.split('.')[:2])
71 changes: 63 additions & 8 deletions src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,20 @@ def _wkt_to_epsg_units_string(wkt_string):
return crs_string, units_string


def _epsg_to_wkt_units_string(epsg_code):
wkt_string = 'unknown'
units_string = 'unknown'
try:
srs = osr.SpatialReference()
srs.ImportFromEPSG(epsg_code)
wkt_string = srs.ExportToWkt()
units_string = srs.GetAttrValue('UNIT', 0)
except RuntimeError:
LOGGER.warning(
f'EPSG: {epsg_code} cannot be interpreted as a coordinate reference system')
return wkt_string, units_string


def _list_files_with_depth(directory, depth, exclude_regex=None,
exclude_hidden=True):
"""List files in directory up to depth
Expand Down Expand Up @@ -504,6 +518,8 @@ def describe_collection(directory, depth=numpy.iinfo(numpy.int16).max,
root_ext_map, root_list = _group_files_by_root(file_list)

items = []
collection_crs_set = set()
item_spatial_list = []

for root in root_list:
extensions = root_ext_map[root]
Expand All @@ -518,29 +534,67 @@ def describe_collection(directory, depth=numpy.iinfo(numpy.int16).max,
for ext in extensions:
filepath = os.path.join(directory, f'{root}{ext}')
try:
this_desc = describe(filepath, **kwargs)
item_resource = describe(filepath, **kwargs)
if item_resource.spatial is not None:
collection_crs_set.add(item_resource.spatial.crs)
item_spatial_list.append(item_resource.spatial)

except ValueError:
# if file type isn't supported by geometamaker, e.g. pdf
# or if trying to describe a dir
this_desc = None
item_resource = None

if describe_files and this_desc:
this_desc.write(backup=backup)
if describe_files and item_resource:
item_resource.write(backup=backup)

if ext and os.path.exists(filepath + '.yml'):
metadata_yml = f'{root}{ext}' + '.yml'
else:
metadata_yml = ''

this_resource = models.CollectionItemSchema(
collection_item = models.CollectionItemSchema(
path=f'{root}{ext}',
description=this_desc.description if this_desc else '',
description=item_resource.description if item_resource else '',
metadata=metadata_yml
)
items.append(this_resource)
items.append(collection_item)

total_bytes, last_modified, uid = _get_collection_size_time_uid(directory)

spatial = None
if len(collection_crs_set) == 1:
collection_bbox = pygeoprocessing.merge_bounding_box_list(
[list(spatial.bounding_box) for spatial in item_spatial_list],
'union')
spatial = models.SpatialSchema(
bounding_box=models.BoundingBox(*collection_bbox),
crs=item_spatial_list[0].crs,
crs_units=item_spatial_list[0].crs_units)

if len(collection_crs_set) > 1:
wgs84_bbox_list = []
target_projection_wkt, crs_units = _epsg_to_wkt_units_string(4326)
try:
for spatial in item_spatial_list:
base_projection_wkt, crs_units = _epsg_to_wkt_units_string(
int(spatial.crs.split(':')[1]))
bbox = pygeoprocessing.transform_bounding_box(
bounding_box=list(spatial.bounding_box),
base_projection_wkt=base_projection_wkt,
target_projection_wkt=target_projection_wkt)
wgs84_bbox_list.append(bbox)
collection_bbox = pygeoprocessing.merge_bounding_box_list(
wgs84_bbox_list, 'union')
spatial = models.SpatialSchema(
bounding_box=models.BoundingBox(*collection_bbox),
crs='EPSG:4326',
crs_units=crs_units)
except (ValueError, RuntimeError) as error:
# transform_bounding_box can raise a ValueError
LOGGER.error(error)
LOGGER.warning(
f'Cannot define spatial attribute for Collection {directory}')

resource = models.CollectionResource(
path=directory,
type='collection',
Expand All @@ -549,7 +603,8 @@ def describe_collection(directory, depth=numpy.iinfo(numpy.int16).max,
bytes=total_bytes,
last_modified=last_modified,
items=items,
uid=uid
uid=uid,
spatial=spatial
)

# Check if there is existing metadata for the collection
Expand Down
35 changes: 33 additions & 2 deletions src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _deep_update_dict(self_dict, other_dict):
"""
for k, v in other_dict.items():
if k in self_dict:
if isinstance(v, collections.abc.Mapping):
if self_dict[k] is not None and isinstance(v, collections.abc.Mapping):
self_dict[k] = _deep_update_dict(self_dict[k], v)
else:
if v is not None and (v or isinstance(v, numbers.Number)):
Expand Down Expand Up @@ -56,10 +56,20 @@ class BoundingBox:
xmax: float
ymax: float

def __iter__(self):
"""Return the bounding box as an iterator."""
return iter([self.xmin, self.ymin, self.xmax, self.ymax])

def to_list(self):
"""Return the bounding box as a list: [xmin, ymin, xmax, ymax]."""
return list(self)


class SpatialSchema(Parent):
"""Class for keeping track of spatial info."""

model_config = ConfigDict(frozen=True)

bounding_box: BoundingBox
"""Spatial extent [xmin, ymin, xmax, ymax]."""
crs: str
Expand Down Expand Up @@ -435,6 +445,8 @@ class BaseResource(BaseMetadata):
"""The title of the resource."""
url: str = ''
"""A URL where the resource is available."""
spatial: SpatialSchema | None = None
"""An object for describing spatial properties of the resource."""

@classmethod
def load(cls, filepath):
Expand Down Expand Up @@ -740,13 +752,21 @@ def set_field_description(self, name, title=None, description=None,
self.data_model.set_field_description(
name, title, description, units, type)

def set_spatial(self, spatial: SpatialSchema):
"""Set spatial properties of a resource."""
self.spatial = spatial


class ArchiveResource(Resource):
"""Class for metadata for an archive resource."""

compression: str = ''
"""The compression method used to create the archive."""

def set_spatial(self, spatial: SpatialSchema):
"""Set spatial properties of a resource."""
self.spatial = spatial


class CollectionItemSchema(Parent):
"""Class for metadata for collection items."""
Expand All @@ -759,7 +779,14 @@ class CollectionItemSchema(Parent):


class CollectionResource(BaseResource):
"""Class for metadata for a collection resource."""
"""Class for metadata for a collection resource.

In the spatial properties of a collection, the bounding box is the
union of the bounding boxes of all the items in the collection.
If all items share the same CRS, the collection's bounding box
will match that CRS. If items use a different CRS from each other,
bounding boxes are transformed to WGS84 before unioning.
"""

items: list[CollectionItemSchema] = Field(default_factory=list)
"""Files in collection."""
Expand All @@ -773,6 +800,10 @@ def _default_metadata_path(self):
"""Add -metadata tag"""
return f'{self.path}-metadata.yml'

def set_spatial(self, spatial: SpatialSchema):
"""Set spatial properties of a resource."""
self.spatial = spatial


class VectorResource(Resource):
"""Class for metadata for a vector resource."""
Expand Down
57 changes: 56 additions & 1 deletion tests/test_geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_describe_csv(self):
self.assertEqual(resource.get_field_description('Strings').type, 'string')
self.assertEqual(resource.get_field_description('Ints').type, 'integer')
self.assertEqual(resource.get_field_description('Reals').type, 'number')
self.assertIsNone(resource.spatial)

title = 'title'
description = 'some abstract'
Expand All @@ -160,11 +161,18 @@ def test_describe_csv(self):
field_names[1],
units=units)

spatial = geometamaker.models.SpatialSchema(
bounding_box=geometamaker.models.BoundingBox(0, 0, 2, 2),
crs='EPSG:4326',
crs_units='degree')
resource.set_spatial(spatial)

field = [field for field in resource.data_model.fields
if field.name == field_names[1]][0]
self.assertEqual(field.title, title)
self.assertEqual(field.description, description)
self.assertEqual(field.units, units)
self.assertEqual(resource.spatial, spatial)

def test_describe_bad_csv(self):
"""MetadataControl: CSV with extra item in row does not fail."""
Expand Down Expand Up @@ -870,6 +878,54 @@ def test_describe_collection_with_shapefile(self):
self.assertTrue(os.path.exists(os.path.join(
self.workspace_dir, f'{root_name}.csv.yml')))

def test_describe_collection_spatial_single_crs(self):
"""Test describe_collection spatial section is populated."""
import geometamaker

collection_path = os.path.join(self.workspace_dir, "collection")
os.mkdir(collection_path)

raster_path = os.path.join(collection_path, 'raster.tif')
create_raster(numpy.int16, raster_path, projection_epsg=3857)

resource = geometamaker.describe_collection(collection_path)
self.assertEqual(resource.spatial.crs, 'EPSG:3857')
self.assertEqual(resource.spatial.crs_units, 'metre')
self.assertEqual(resource.spatial.bounding_box.to_list(), [0, 0, 2, 2])

def test_describe_collection_spatial_multiple_crs(self):
"""Test describe_collection spatial section represents union."""
import geometamaker

collection_path = os.path.join(self.workspace_dir, "collection")
os.mkdir(collection_path)

raster_path = os.path.join(collection_path, 'raster.tif')
create_raster(numpy.int16, raster_path, projection_epsg=3857,
origin=(0, 0))
raster2_path = os.path.join(collection_path, 'raster2.tif')
create_raster(numpy.int16, raster2_path, projection_epsg=4326,
origin=(2, 2))

resource = geometamaker.describe_collection(collection_path)
self.assertEqual(resource.spatial.crs, 'EPSG:4326')
self.assertEqual(resource.spatial.crs_units, 'degree')
self.assertEqual(resource.spatial.bounding_box.to_list(), [0, 0, 4, 4])

def test_describe_collection_spatial_no_crs(self):
"""Test describe_collection spatial section is None."""
import geometamaker

collection_path = os.path.join(self.workspace_dir, "collection")
os.mkdir(collection_path)

csv_path = os.path.join(collection_path, 'table.csv')
with open(csv_path, 'w') as file:
file.write('a,b,c')

resource = geometamaker.describe_collection(collection_path)
self.assertIsNone(resource.spatial)

def test_describe_collection_with_depth(self):
"""Test describe_collection with depth and exclude_regex parameters"""
import geometamaker
Expand Down Expand Up @@ -959,7 +1015,6 @@ def test_describe_collection_preexisting_invalid_yml(self):
with open(csv_path, 'w') as file:
file.write('a,b,c')


# Describing a collection that already has an invalid yaml
# sidecar file should issue a warning.
with self.assertLogs('geometamaker', level='WARNING') as cm:
Expand Down