diff --git a/README.md b/README.md index 4db554f..66ddc16 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ -# NetCDF Metadata +# Dataset Profile ## Overview -A utility to describe the structure of NetCDF4 datasets. - -Reads a NetCDF4 file and reports the group structure and information -about any dimensions, variables, and attributes that are defined. +A utility to describe the structure of datasets in netCDF, GeoTiff, +and ESRI Shapefile format. ## Installation @@ -19,27 +17,45 @@ The optional test suite may be installed and run with: ```bash $ python -m pip install .[test] -$ pytest --cov=ncmetadata tests +$ pytest --cov=dsprofile tests ``` ## Usage ```bash -usage: ncmetadata [-h] [-o {category,group}] [-e ,,...] [-m] [-d] filename +usage: dsprofile [-h] {netcdf,geotiff,shape} ... + +Describes datasets in a variety of formats + +options: + -h, --help show this help message and exit + +Dataset formats: + {netcdf,geotiff,shape} + netcdf Extracts metadata from netCDF4 files + geotiff Extracts metadata from GeoTIFF files + shape Extracts metadata from ESRI Shape files +``` + +## NetCDF Options + +Reads a netCDF4 file and reports the group structure and information +about any dimensions, variables, and attributes that are defined. -Extracts metadata from netCDF4 files +```bash +usage: dsprofile netcdf [-h] [-o {category,group}] [-e ,,...] [-m] [-d] filename positional arguments: filename options: - -h, --help show this help message and exit - -o {category,group} --order-by {category,group} - (default group) + -h, --help show this help message and exit + -o {category,group}, --order-by {category,group} + (default group) -e ,,..., --exclude-groups ,,... - Exclude each of the named arguments - -m, --omit-metadata Output only netCDF file contents, not file metadata - -d, --omit-digest Do not include a hash digest in file metadata + Exclude each of the named arguments + -m, --omit-metadata Output only netCDF file contents, not file metadata + -d, --omit-digest Do not include a hash digest in file metadata ``` The `--order-by` option allows the resulting output to be arranged in one of two ways: @@ -53,11 +69,52 @@ The `--omit-digest` option prevents calculation of a SHA256 hash for the process This may be desirable for very large files or test workflows to avoid the potentially time-consuming hashing operation. -## Example +### NetCDF Example For example, to report on the contents of the netCDF4 file `test.nc` using the default output options... ```bash -$ ncmetadata test.nc +$ dsprofile netcdf test.nc +``` + +## GeoTiff Options + +```bash +usage: dsprofile geotiff [-h] [-m] [-d] filename + +positional arguments: + filename + +options: + -h, --help show this help message and exit + -m, --omit-metadata Output only GeoTIFF file contents, not file metadata + -d, --omit-digest Do not include a hash digest in file metadata +``` + +## ESRI Shapefile Options + +```bash +usage: dsprofile shape [-h] [-m] [-d] filename + +positional arguments: + filename + +options: + -h, --help show this help message and exit + -m, --omit-metadata Output only Shape file contents, not file metadata + -d, --omit-digest Do not include a hash digest in file metadata +``` + +A Shapefile may be read by opening any of its components, for example... + +```bash +$ dsprofile shape shapefile.shp +``` +...is equivalent to... + +```bash +$ dsprofile shape shapefile.dbf ``` +Note however that where a hex digest of a hash is included in the output, +this will refer only to file provided as a command-line argument. diff --git a/ncmetadata/__init__.py b/dsprofile/__init__.py similarity index 100% rename from ncmetadata/__init__.py rename to dsprofile/__init__.py diff --git a/dsprofile/lib/__init__.py b/dsprofile/lib/__init__.py new file mode 100644 index 0000000..f3b362c --- /dev/null +++ b/dsprofile/lib/__init__.py @@ -0,0 +1,9 @@ +from .reader import ( + Reader, + reader_type_map, + make_reader +) + +from .netcdf import NetCDFReader +from .tiff import GeoTIFFReader +from .shape import ShapefileReader diff --git a/dsprofile/lib/netcdf.py b/dsprofile/lib/netcdf.py new file mode 100644 index 0000000..7a0d988 --- /dev/null +++ b/dsprofile/lib/netcdf.py @@ -0,0 +1,194 @@ +#from dsprofile.util import ( +# read_dataset +#) +import pathlib +import sys +import weakref + +from collections.abc import Sequence + +from dsprofile.lib import Reader + +import netCDF4 as nc + + +exclude_groups = [] + + +def walk_groups_breadth_first(ds): + yield ds.groups.values() + for group in ds.groups.values(): + yield from walk_groups_breadth_first(group) + + +def walk_groups_depth_first(ds): + for group in ds.groups.values(): + yield from walk_groups_depth_first(group) + yield ds.groups.values() + + +def walk_groups_ordered(ds): + for group in ds.groups.values(): + if group.path in exclude_groups: + continue + yield from walk_groups_ordered(group) + yield ds + + +walk_func_map = { + "breadth": walk_groups_breadth_first, + "depth": walk_groups_depth_first, + "ordered": walk_groups_ordered +} + + +def walk_groups(ds, order="ordered"): + return walk_func_map[order](ds) + + +class NetCDFReader(Reader): + + format = "netcdf" + + def __init__(self, filename, order_by="group", exclude=None): + self.ds = self.__class__.read_dataset(filename) + self._finalizer = weakref.finalize(self, self.finalize_close, self.ds) + self.order_by = order_by + # Note that the order is significant here + # a str is a Sequence type + if not exclude: + self.exclude_groups = [] + elif isinstance(exclude, str): + self.exclude_groups = [exclude] + elif issubclass(type(exclude), Sequence): + self.exclude_groups = exclude + # TODO: Questionable... + global exclude_groups + exclude_groups = self.exclude_groups + + @staticmethod + def read_dataset(filename): + """ + Handle OSError, PermissionError, FileNotFoundError neatly + Inform neatly for non-netCDF4 files + Allow all other exceptions to raise unhandled + """ + + try: + ds = nc.Dataset(filename, 'r') + except (OSError, PermissionError, FileNotFoundError) as e: + print(f"{e.strerror} for file '{filename}'", file=sys.stderr) + sys.exit(1) + + if ds.data_model != "NETCDF4": + print(f"File '{filename}' has format '{ds.data_model}', " + f"not 'NETCDF4' as required", file=sys.stderr) + ds.close() + sys.exit(1) + + return ds + + @staticmethod + def finalize_close(ncdf): + if isinstance(ncdf, nc.Dataset) and ncdf.isopen(): + ncdf.close() + + def close(self): + if self._finalizer.alive: + self._finalizer() + + def gather_by_group(self): + """ + A categorisation of dimensions, variables, and + attributes defined in the Dataset argument, + ordered by the group to which they belong. + """ + dims = self.describe_dimensions() + ncvars = self.describe_variables() + attrs = self.describe_attributes() + by_group = {} + for group in walk_groups(self.ds): + by_group[group.path] = { + "dimensions": dims[group.path], + "variables": ncvars[group.path], + "attributes": attrs[group.path] + } + + return by_group + + def gather_by_type(self): + """ + A categorisation of dimensions, variables, and + attributes defined in the Dataset argument, + ordered by type. + """ + return { + "dimensions": self.describe_dimensions(), + "variables": self.describe_variables(), + "attributes": self.describe_attributes() + } + + process_func_map = { + "category": gather_by_type, + "group": gather_by_group + } + + def describe_dimensions(self): + dimensions = {} + + for group in walk_groups(self.ds): + dimensions[group.path] = {d.name: {"size": d.size} for d in group.dimensions.values()} + + return dimensions + + + def describe_variables(self): + variables = {} + for group in walk_groups(self.ds): + variables[group.path] = {v.name: {"dtype": v.dtype.name, + "dimensions": v.dimensions, + "fill_value": str(v.get_fill_value())} + for v in group.variables.values()} + return variables + + def describe_attributes(self): + attrs = {} + for group in walk_groups(self.ds): + attrs[group.path] = {"group": [a for a in group.ncattrs()], + "vars": {v.name: [a for a in v.ncattrs()] for v in group.variables.values()} + } + return attrs + + def process(self): + return self.process_func_map[self.order_by](self) + + @classmethod + def build_subparser(cls, sp): + parser = sp.add_parser(cls.format, + help="Extracts metadata from netCDF4 files") + parser.add_argument("filename", type=pathlib.Path) + parser.add_argument("-o", "--order-by", choices=["category", "group"], + default="group", help="(default group)") + parser.add_argument("-e", "--exclude-groups", metavar=",,...", + help="Exclude each of the named arguments") + parser.add_argument("-m", "--omit-metadata", action="store_true", + help="Output only netCDF file contents, not file metadata") + parser.add_argument("-d", "--omit-digest", action="store_true", + help="Do not include a hash digest in file metadata") + return parser + + @classmethod + def handle_args(cls, args): + if args.filename.is_dir(): + print(f"A valid file is required not directory '{args.filename}'", + file=sys.stderr) + sys.exit(1) + + exclude = [] + if hasattr(args, "exclude_groups"): + exclude = args.exclude_groups.split(',') if args.exclude_groups else [] + + ctor_args = [args.filename, args.order_by, exclude] + ctor_kwargs = {} + + return ctor_args, ctor_kwargs diff --git a/dsprofile/lib/reader.py b/dsprofile/lib/reader.py new file mode 100644 index 0000000..1942c15 --- /dev/null +++ b/dsprofile/lib/reader.py @@ -0,0 +1,40 @@ +from abc import ( + ABC, + abstractmethod +) + + +reader_type_map = {} + + +class Reader(ABC): + """ + An abstract base for all Reader types. + """ + + subclass_type_key = "format" + + def __init_subclass__(cls, /, **kwargs): + super().__init_subclass__(**kwargs) + keyattr = __class__.subclass_type_key + reader_type = getattr(cls, keyattr, None) + if not reader_type or not isinstance(reader_type, str): + raise NotImplementedError(f"Reader subclass {cls.__qualname__} " + f"does not define a {reader_type} key") + reader_type_map[reader_type] = cls + + + @abstractmethod + def process(self): + pass + + @classmethod + @abstractmethod + def handle_args(cls, args): + pass + + +def make_reader(args): + cls = reader_type_map[args.command] + ctor_args, ctor_kwargs = cls.handle_args(args) + return cls(*ctor_args, **ctor_kwargs) diff --git a/dsprofile/lib/shape.py b/dsprofile/lib/shape.py new file mode 100644 index 0000000..af7a665 --- /dev/null +++ b/dsprofile/lib/shape.py @@ -0,0 +1,91 @@ +import pathlib +import sys +import weakref + +import fiona +from fiona import Collection + +from dsprofile.lib import Reader + + +class ShapefileReader(Reader): + + format = "shape" + + def __init__(self, filename): + super().__init__() + self.shp = self.__class__.read_dataset(filename) + self._finalizer = weakref.finalize(self, self.finalize_close, self.shp) + + @staticmethod + def finalize_close(shpinst): + if not isinstance(shpinst, Collection): + return + + if not shpinst.closed: + shpinst.close() + + def close(self): + if self._finalizer.alive: + self._finalizer() + + @classmethod + def build_subparser(cls, sp): + parser = sp.add_parser(cls.format, + help="Extracts metadata from ESRI Shape files") + parser.add_argument("filename", type=pathlib.Path) + parser.add_argument("-m", "--omit-metadata", action="store_true", + help="Output only Shape file contents, not file metadata") + parser.add_argument("-d", "--omit-digest", action="store_true", + help="Do not include a hash digest in file metadata") + return parser + + @classmethod + def handle_args(cls, args): + if args.filename.is_dir(): + print(f"A valid file is required not directory '{args.filename}'", + file=sys.stderr) + sys.exit(1) + + ctor_args = [args.filename] + ctor_kwargs = {} + + return ctor_args, ctor_kwargs + + def process(self): + output = { + "bounds": self.shp.bounds, + "features": [] + } + + units, factor = self.shp.crs.units_factor + output["units"] = units + output["factor"] = factor + + auth = self.shp.crs.to_authority() + if auth is not None: + if len(auth) == 2: + registry, code = auth + output["crs"] = f"{registry}:{code}" + elif len(auth) == 1: + output["crs"] = str(auth) + + for feat in self.shp: + fdata = { + "type": feat.geometry.type, + "coordinates": len(feat.geometry.coordinates[0]), + "properties": {k: v for k, v in feat.properties.items()} + } + output["features"].append(fdata) + + return output + + @staticmethod + def read_dataset(filename): + try: + shp = fiona.open(filename) + except fiona.errors.DriverError as e: + print(f"Unable to read '{filename}': {e}", file=sys.stderr) + sys.exit(1) + + return shp diff --git a/dsprofile/lib/tiff.py b/dsprofile/lib/tiff.py new file mode 100644 index 0000000..e9d5083 --- /dev/null +++ b/dsprofile/lib/tiff.py @@ -0,0 +1,78 @@ +import pathlib +import sys +import weakref + +from dsprofile.lib import Reader + +import rasterio as rio + + +class GeoTIFFReader(Reader): + + format = "geotiff" + + def __init__(self, filename): + super().__init__() + self.tif = rio.open(filename, 'r') + self._finalizer = weakref.finalize(self, self.finalize_close, self.tif) + + @staticmethod + def finalize_close(rioinst): + if not isinstance(rioinst, rio.io.DatasetReader): + return + + if not rioinst.closed: + rioinst.close() + + def close(self): + if self._finalizer.alive: + self._finalizer() + + @classmethod + def build_subparser(cls, sp): + parser = sp.add_parser(cls.format, + help="Extracts metadata from GeoTIFF files") + parser.add_argument("filename", type=pathlib.Path) + parser.add_argument("-m", "--omit-metadata", action="store_true", + help="Output only GeoTIFF file contents, not file metadata") + parser.add_argument("-d", "--omit-digest", action="store_true", + help="Do not include a hash digest in file metadata") + return parser + + @classmethod + def handle_args(cls, args): + if args.filename.is_dir(): + print(f"A valid file is required not directory '{args.filename}'", + file=sys.stderr) + sys.exit(1) + + ctor_args = [args.filename] + ctor_kwargs = {} + + return ctor_args, ctor_kwargs + + def process(self): + output = { + "shape": { + "width": self.tif.width, + "height": self.tif.height + }, + "bands": {idx: dtype for idx, dtype in zip(self.tif.indexes, self.tif.dtypes)}, + "bounds": { + "left": self.tif.bounds.left, + "bottom": self.tif.bounds.bottom, + "right": self.tif.bounds.right, + "top": self.tif.bounds.top + }, + "units": self.tif.crs.linear_units, + "lin_step": self.tif.res + } + auth = self.tif.crs.to_authority() + if auth is not None: + if len(auth) == 2: + registry, code = auth + output["crs"] = f"{registry}:{code}" + elif len(auth) == 1: + output["crs"] = str(auth) + + return output diff --git a/dsprofile/main.py b/dsprofile/main.py new file mode 100644 index 0000000..98a9898 --- /dev/null +++ b/dsprofile/main.py @@ -0,0 +1,53 @@ +import argparse +import json +import sys + + +from dsprofile.lib import ( + reader_type_map, + make_reader +) + +from dsprofile.util import make_file_profile + + +def parse_args(argv): + parser = argparse.ArgumentParser( + prog="dsprofile", + description="Describes datasets in a variety of formats", + epilog="TODO: attribution/repo/docs" + ) + + sp = parser.add_subparsers(title="Dataset formats", + dest="command") + for cls in reader_type_map.values(): + cls.build_subparser(sp) + + if len(argv) == 1: + parser.print_help() + parser.exit(0) + + args = parser.parse_args() + return args + + +def handle_args(args): + output = {} + if hasattr(args, "omit_metadata") and not args.omit_metadata: + output["metadata"] = make_file_profile(args) + + inst = make_reader(args) + #output["content"] = process_file(args.filename, args.order_by, exclude) + output["content"] = inst.process() + print(json.dumps(output, indent=2)) + + +def main(): + args = parse_args(sys.argv) + handle_args(args) + #reader_type_map[args.command].handle_args(args) + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/ncmetadata/util.py b/dsprofile/util.py similarity index 100% rename from ncmetadata/util.py rename to dsprofile/util.py diff --git a/ncmetadata/main.py b/ncmetadata/main.py deleted file mode 100644 index 7bc8fef..0000000 --- a/ncmetadata/main.py +++ /dev/null @@ -1,53 +0,0 @@ -import argparse -import json -import pathlib -import sys - -from ncmetadata.reader import process_file -from ncmetadata.util import make_file_profile - - -def parse_args(argv): - parser = argparse.ArgumentParser( - prog="ncmetadata", - description="Extracts metadata from netCDF4 files", - epilog="TODO: attribution/repo/docs" - ) - - parser.add_argument("filename", type=pathlib.Path) - parser.add_argument("-o", "--order-by", choices=["category", "group"], - default="group", help="(default group)") - parser.add_argument("-e", "--exclude-groups", metavar=",,...", - help="Exclude each of the named arguments") - parser.add_argument("-m", "--omit-metadata", action="store_true", - help="Output only netCDF file contents, not file metadata") - parser.add_argument("-d", "--omit-digest", action="store_true", - help="Do not include a hash digest in file metadata") - - return parser.parse_args() - - -def handle_args(args): - if args.filename.is_dir(): - print(f"A valid file is required not directory '{args.filename}'", - file=sys.stderr) - sys.exit(1) - - output = {} - if not args.omit_metadata: - output["metadata"] = make_file_profile(args) - - exclude = args.exclude_groups.split(',') if args.exclude_groups else [] - output["content"] = process_file(args.filename, args.order_by, exclude) - - print(json.dumps(output, indent=2)) - - -def main(): - args = parse_args(sys.argv) - handle_args(args) - sys.exit(0) - - -if __name__ == "__main__": - main() diff --git a/ncmetadata/reader.py b/ncmetadata/reader.py deleted file mode 100644 index 1908f91..0000000 --- a/ncmetadata/reader.py +++ /dev/null @@ -1,110 +0,0 @@ -from ncmetadata.util import ( - read_dataset -) - -exclude_groups = [] - - - -def walk_groups_breadth_first(ds): - yield ds.groups.values() - for group in ds.groups.values(): - yield from walk_groups_breadth_first(group) - - -def walk_groups_depth_first(ds): - for group in ds.groups.values(): - yield from walk_groups_depth_first(group) - yield ds.groups.values() - - -def walk_groups_ordered(ds): - for group in ds.groups.values(): - if group.path in exclude_groups: - continue - yield from walk_groups_ordered(group) - yield ds - - -walk_func_map = { - "breadth": walk_groups_breadth_first, - "depth": walk_groups_depth_first, - "ordered": walk_groups_ordered -} - - -def walk_groups(ds, order="ordered"): - return walk_func_map[order](ds) - - -def describe_dimensions(ds): - dimensions = {} - - for group in walk_groups(ds): - dimensions[group.path] = {d.name: {"size": d.size} for d in group.dimensions.values()} - - return dimensions - - -def describe_variables(ds): - variables = {} - for group in walk_groups(ds): - variables[group.path] = {v.name: {"dtype": v.dtype.name, - "dimensions": v.dimensions, - "fill_value": str(v.get_fill_value())} - for v in group.variables.values()} - return variables - - -def describe_attributes(ds): - attrs = {} - for group in walk_groups(ds): - attrs[group.path] = {"group": [a for a in group.ncattrs()], - "vars": {v.name: [a for a in v.ncattrs()] for v in group.variables.values()} - } - - return attrs - - -def gather_by_group(ds): - """ - A categorisation of dimensions, variables, and - attributes defined in the Dataset argument, - ordered by the group to which they belong. - """ - dims = describe_dimensions(ds) - ncvars = describe_variables(ds) - attrs = describe_attributes(ds) - by_group = {} - for group in walk_groups(ds): - by_group[group.path] = { - "dimensions": dims[group.path], - "variables": ncvars[group.path], - "attributes": attrs[group.path] - } - - return by_group - - -def gather_by_type(ds): - """ - A categorisation of dimensions, variables, and - attributes defined in the Dataset argument, - ordered by type. - """ - return { - "dimensions": describe_dimensions(ds), - "variables": describe_variables(ds), - "attributes": describe_attributes(ds) - } - -process_func_map = { - "category": gather_by_type, - "group": gather_by_group -} - -def process_file(filename, order_by, exclude): - global exclude_groups - exclude_groups = exclude - ds = read_dataset(filename) - return process_func_map[order_by](ds) diff --git a/setup.py b/setup.py index a47aa0f..21e4bd8 100644 --- a/setup.py +++ b/setup.py @@ -17,17 +17,21 @@ setup( - name = "ncmetadata", + name = "dsprofile", version = "0.1.0", - packages = find_packages(include=["ncmetadata", "ncmetadata.*"]), + packages = find_packages(include=["dsprofile", "dsprofile.*"]), install_requires = [ - "netCDF4" + "setuptools==68.1.2", # earthpy uses pkg_resources + "netCDF4", + "earthpy", # Includes GeoPandas, rasterio + "fiona", # ESRI Shapefile support + "pyproj" # CRS parsing ], extras_require = { "dev": dev_requires, "test": test_requires }, entry_points = { - "console_scripts": ["ncmetadata=ncmetadata.main:main"] + "console_scripts": ["dsprofile=dsprofile.main:main"] } ) diff --git a/tests/data/GeogToWGS84GeoKey5.tif b/tests/data/GeogToWGS84GeoKey5.tif new file mode 100755 index 0000000..e878c60 Binary files /dev/null and b/tests/data/GeogToWGS84GeoKey5.tif differ diff --git a/tests/data/SJER_crop2.dbf b/tests/data/SJER_crop2.dbf new file mode 100755 index 0000000..4e7c869 Binary files /dev/null and b/tests/data/SJER_crop2.dbf differ diff --git a/tests/data/SJER_crop2.prj b/tests/data/SJER_crop2.prj new file mode 100755 index 0000000..02487dd --- /dev/null +++ b/tests/data/SJER_crop2.prj @@ -0,0 +1 @@ +PROJCS["WGS_1984_UTM_Zone_11N",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-117],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["Meter",1]] \ No newline at end of file diff --git a/tests/data/SJER_crop2.qpj b/tests/data/SJER_crop2.qpj new file mode 100755 index 0000000..e1e8714 --- /dev/null +++ b/tests/data/SJER_crop2.qpj @@ -0,0 +1 @@ +PROJCS["WGS 84 / UTM zone 11N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-117],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32611"]] diff --git a/tests/data/SJER_crop2.shp b/tests/data/SJER_crop2.shp new file mode 100755 index 0000000..7b2952d Binary files /dev/null and b/tests/data/SJER_crop2.shp differ diff --git a/tests/data/SJER_crop2.shx b/tests/data/SJER_crop2.shx new file mode 100755 index 0000000..8ae2d0a Binary files /dev/null and b/tests/data/SJER_crop2.shx differ diff --git a/tests/test_geotiff.py b/tests/test_geotiff.py new file mode 100644 index 0000000..0024bfc --- /dev/null +++ b/tests/test_geotiff.py @@ -0,0 +1,63 @@ +import os + +import pytest +import rasterio as rio + +from dsprofile.lib.tiff import GeoTIFFReader + + +TEST_DATA_PATH = os.getenv("DSPROFILE_TEST_DATA_PATH", "tests/data") + + +@pytest.fixture +def geotiff_test_file(): + return { + "path": os.path.join(TEST_DATA_PATH, "GeogToWGS84GeoKey5.tif"), + "meta": { + "shape": { + "width": 101, + "height": 101 + }, + "bands": {1: "uint8"}, + "bounds": { + "left": 8.999654601821101, + "bottom": 51.9999732301211, + "right": 9.0024601573789, + "top": 52.0027787856789 + } + } + } + + +class TestGeoTIFF: + def test_reader_instance(self, geotiff_test_file): + """ + Can an instance of the GeoTIFFReader be created + with the expected default attributes? + """ + r = GeoTIFFReader(geotiff_test_file["path"]) + assert r.format == GeoTIFFReader.format + + def test_dataset_fileops(self, geotiff_test_file): + """ + Can a GeoTIFF file be opened correctly, and + does the finalizer correctly close the file + when manually invoked? + """ + r = GeoTIFFReader(geotiff_test_file["path"]) + assert isinstance(r.tif, rio.io.DatasetReader) + assert r._finalizer.alive + tref = r.tif + assert not tref.closed # Our tiff file is open... + r.close() # ...the finalizer is invoked... + assert tref.closed # ...so the file must be closed + + def test_read_dataset(self, geotiff_test_file): + """ + Are the expected metadata values correctly + retrieved from the test GeoTIFF file? + """ + r = GeoTIFFReader(geotiff_test_file["path"]) + data = r.process() + for k, v in geotiff_test_file["meta"].items(): + assert data[k] == v diff --git a/tests/test_groups.py b/tests/test_netcdf.py similarity index 60% rename from tests/test_groups.py rename to tests/test_netcdf.py index 1b6b249..32b4266 100644 --- a/tests/test_groups.py +++ b/tests/test_netcdf.py @@ -2,14 +2,14 @@ import pytest -from ncmetadata.reader import ( - exclude_groups, - read_dataset, +from dsprofile.lib.netcdf import ( + NetCDFReader, walk_groups ) -TEST_DATA_PATH = os.getenv("NCDF_TEST_DATA_PATH", "tests/data") +TEST_DATA_PATH = os.getenv("DSPROFILE_TEST_DATA_PATH", "tests/data") + @pytest.fixture def synthetic_test_file(): @@ -28,20 +28,32 @@ def synthetic_test_file(): } -class TestGroups: +class TestNetCDF: + def test_reader_instance(self, synthetic_test_file): + """ + Can an instance of the NetCDFReader be created + and does it have the correct defaults? + """ + r = NetCDFReader(synthetic_test_file["path"]) + assert r.format == NetCDFReader.format + assert r.order_by == "group" + assert r.exclude_groups == [] + def test_read_dataset(self, synthetic_test_file): """ Can a netCDF4 file be opened correctly? """ - ds = read_dataset(synthetic_test_file["path"]) + ds = NetCDFReader.read_dataset(synthetic_test_file["path"]) + assert ds.data_model == "NETCDF4" def test_walk_groups(self, synthetic_test_file): """ Are groups correctly identified and appear in the expected order? """ - ds = read_dataset(synthetic_test_file["path"]) - groupnames = [group.path for group in walk_groups(ds)] + r = NetCDFReader(synthetic_test_file["path"]) + r.process() + groupnames = [group.path for group in walk_groups(r.ds)] for idx in range(len(groupnames)): assert groupnames[idx] == synthetic_test_file["groups"][idx] @@ -50,10 +62,9 @@ def test_exclude_groups(self, synthetic_test_file): Are group paths excluded from the search correctly omitted? """ - ds = read_dataset(synthetic_test_file["path"]) exclusion = "/top01/nest_b" - exclude_groups.append(exclusion) - groupnames = [group.path for group in walk_groups(ds)] + r = NetCDFReader(synthetic_test_file["path"], exclude=exclusion) + groupnames = [group.path for group in walk_groups(r.ds)] filtered_groups = [group for group in synthetic_test_file["groups"] if not group.startswith(exclusion)] for idx in range(len(groupnames)): assert groupnames[idx] == filtered_groups[idx] diff --git a/tests/test_setup.py b/tests/test_setup.py index 57298f6..5dccfba 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -1,11 +1,10 @@ -import os from packaging import version -import pytest - min_version_map = { - "netCDF4": version.parse("1.7.0") + "netCDF4": version.parse("1.7.0"), + "rasterio": version.parse("1.5.0"), + "fiona": version.parse("0.9.0") } @@ -18,4 +17,3 @@ def test_lib_versions(self): for libname, semver in min_version_map.items(): lib = importlib.import_module(libname) assert version.parse(lib.__version__) >= semver - diff --git a/tests/test_shapefile.py b/tests/test_shapefile.py new file mode 100644 index 0000000..95742a1 --- /dev/null +++ b/tests/test_shapefile.py @@ -0,0 +1,66 @@ +import os + +import pytest +from fiona import Collection +import numpy as np + +from dsprofile.lib import ShapefileReader + + +TEST_DATA_PATH = os.getenv("DSPROFILE_TEST_DATA_PATH", "tests/data") + + +@pytest.fixture +def shape_test_file(): + return { + "path": os.path.join(TEST_DATA_PATH, "SJER_crop2.shp"), + "meta": { + "bounds": [ + 255209.5107915717, + 4108471.237186788, + 257532.73265945335, + 4110975.960763098 + ], + "features": [ + { + "type": "Polygon", + "coordinates": 7, + "properties": { + "id": 1 + } + } + ], + "units": "metre", + "factor": 1.0, + "crs": "EPSG:32611" + } + } + + +class TestShapefile: + def test_instance(self, shape_test_file): + """ + Can an instance of the ShapefileReader be created + with the expected default attributes? + """ + s = ShapefileReader(shape_test_file["path"]) + assert s.format == ShapefileReader.format + assert s.shp.driver.lower() == "esri shapefile" + + def test_read_dataset(self, shape_test_file): + """ + Are the expected metadata values correctly + retrieved from the test Shape file? + """ + r = ShapefileReader(shape_test_file["path"]) + assert isinstance(r.shp, Collection) + data = r.process() + + for key in shape_test_file["meta"]: + assert key in data + + assert np.allclose(data["bounds"], shape_test_file["meta"]["bounds"]) + assert len(data["features"]) == len(shape_test_file["meta"]["features"]) + direct_attrs = ("units", "factor", "crs") + for attr in direct_attrs: + assert data[attr] == shape_test_file["meta"][attr]