Skip to content

Commit 7c6bfaa

Browse files
authored
Merge pull request #180 from aodn/origin/python-311
Origin/python 311
2 parents ea2f11c + edcfd59 commit 7c6bfaa

8 files changed

Lines changed: 61 additions & 57 deletions

File tree

.github/workflows/test.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: ubuntu-latest
1313
strategy:
1414
matrix:
15-
python-version: [ '3.8' ]
15+
python-version: [ '3.11' ]
1616
steps:
1717
- uses: actions/checkout@v2
1818
- name: Set up Python ${{ matrix.python-version }}
@@ -22,7 +22,6 @@ jobs:
2222
- name: Install dependencies
2323
run: |
2424
python -m pip install --upgrade pip
25-
pip install "numpy<1.19.0"
2625
pip install -r test_requirements.txt
2726
pip install pytest-cov
2827
- name: Test with pytest

aodntools/ncwriter/schema.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,57 @@
11
"""This module holds schema definitions for validating the various :py:class:`dicts` that make up parts of a
22
template, and also the helper functions necessary to validate an object against their respective schema.
33
"""
4-
54
import json
6-
75
import numpy as np
86
from jsonschema import validators, Draft4Validator, FormatChecker, ValidationError
97
from pkg_resources import resource_filename
108

9+
# helper function that will later be used to tell the schema validator how to validate objects of type "array"
10+
def is_array(checker, instance):
11+
return isinstance(instance, (list, np.ndarray))
1112

12-
# Create a new validator class (based on Draft4Validator) to allow templates to use
13-
# * Python types or numpy dtypes to specify variable data types; and
14-
# * numpy arrays to specify variable data.
15-
TemplateValidator = validators.create(meta_schema=Draft4Validator.META_SCHEMA,
16-
validators=Draft4Validator.VALIDATORS)
17-
format_checker = FormatChecker()
13+
# Extend the default type checker by redefining "array"
14+
# whenever a schema expects a value of type "array", it will now use the is_array function to check if the value is acceptable.
15+
custom_type_checker = Draft4Validator.TYPE_CHECKER.redefine("array", is_array)
1816

17+
# Create a custom validator that uses the new type checker.
18+
# any validation performed with CustomValidator will use the custom array checker
19+
CustomValidator = validators.extend(Draft4Validator, type_checker=custom_type_checker)
20+
format_checker = FormatChecker()
1921

22+
# Define a custom format checker
23+
# called when a JSON schema specifies that a value should have the format "datatype"
2024
@format_checker.checks('datatype')
2125
def is_python_datatype(value):
2226
"""Return whether the given value is a valid data type specification for a NetCDF variable"""
2327
if isinstance(value, np.dtype):
2428
return True
2529
if isinstance(value, type):
2630
return issubclass(value, np.number)
27-
2831
return False
2932

30-
31-
TYPES = {'array': (list, np.ndarray)}
32-
33+
# Load JSON schema file
3334
TEMPLATE_SCHEMA_JSON = resource_filename(__name__, 'template_schema.json')
3435
with open(TEMPLATE_SCHEMA_JSON) as f:
3536
TEMPLATE_SCHEMA = json.load(f)
36-
TemplateValidator.check_schema(TEMPLATE_SCHEMA)
3737

38-
template_validator = TemplateValidator(TEMPLATE_SCHEMA, types=TYPES, format_checker=format_checker)
38+
# Use the custom validator to check it is valid according to Draft 4 rules
39+
CustomValidator.check_schema(TEMPLATE_SCHEMA)
40+
41+
# ready-to-use validator that applies both custom type and format checks
42+
template_validator = CustomValidator(TEMPLATE_SCHEMA, format_checker=format_checker)
3943

4044

45+
# Validation checks
4146
def validate_template(t):
4247
template_validator.validate(t)
4348

44-
4549
def validate_dimensions(d):
4650
validate_template({'_dimensions': d})
4751

48-
4952
def validate_variables(v):
5053
validate_template({'_variables': v})
51-
52-
54+
5355
def validate_global_attributes(a):
5456
if hasattr(a, 'keys'):
5557
special = [k for k in a.keys() if k.startswith('_')]

aodntools/timeseries_products/common.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime, timezone
33

44
import numpy as np
5+
import xarray as xr
56

67
# Common date/time format strings
78
TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
@@ -179,7 +180,7 @@ def in_water_index(nc):
179180
"""
180181
time_deployment_start = np.datetime64(nc.attrs['time_deployment_start'][:-1])
181182
time_deployment_end = np.datetime64(nc.attrs['time_deployment_end'][:-1])
182-
TIME = nc['TIME'][:]
183+
TIME = nc['TIME'].values
183184
return (TIME >= time_deployment_start) & (TIME <= time_deployment_end)
184185

185186
def in_water(nc):
@@ -189,8 +190,11 @@ def in_water(nc):
189190
:param nc: xarray dataset
190191
:return: xarray dataset
191192
"""
192-
return nc.where(in_water_index(nc), drop=True)
193-
193+
condition = in_water_index(nc) # NumPy boolean array
194+
# Get the integer indices where condition is True.
195+
indices = np.nonzero(condition)[0]
196+
# Use positional indexing to select the TIME entries that satisfy the condition.
197+
return nc.isel(TIME=indices)
194198

195199
def current_utc_timestamp(format=TIMESTAMP_FORMAT):
196200
return datetime.now(timezone.utc).strftime(format)

aodntools/timeseries_products/hourly_timeseries.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,27 +30,27 @@ def check_files(file_list, site_code, parameter_names_accepted, input_dir=''):
3030
:param input_dir: base path where source files are stored
3131
:return: dictionary with the file name and list of failed tests, list good files chronologically ordered
3232
"""
33-
34-
file_list_dataframe = pd.DataFrame(columns=["url", "deployment_date"])
33+
rows = []
3534
error_dict = {}
3635

3736
for file in file_list:
3837
with xr.open_dataset(os.path.join(input_dir, file)) as nc:
3938
error_list = check_file(nc, site_code, parameter_names_accepted)
4039
if error_list:
41-
error_dict.update({file: error_list})
40+
error_dict[file] = error_list
4241
else:
43-
file_list_dataframe = file_list_dataframe.append({'url': file,
44-
'deployment_date': parse(nc.time_deployment_start)},
45-
ignore_index=True)
42+
rows.append({
43+
'url': file,
44+
'deployment_date': parse(nc.time_deployment_start)
45+
})
4646

47+
file_list_dataframe = pd.DataFrame(rows, columns=["url", "deployment_date"])
4748
file_list_dataframe = file_list_dataframe.sort_values(by='deployment_date')
48-
file_list = file_list_dataframe['url'].to_list()
49-
if file_list == []:
49+
sorted_files = file_list_dataframe['url'].to_list()
50+
if not sorted_files:
5051
raise NoInputFilesError("no valid input files to aggregate")
5152

52-
return file_list, error_dict
53-
53+
return sorted_files, error_dict
5454

5555

5656
def get_parameter_names(nc):
@@ -308,7 +308,7 @@ def PDresample_by_hour(df, function_dict, function_stats):
308308
df_data = pd.DataFrame(index=pd.DatetimeIndex([]))
309309
for variable in varnames:
310310
ds_var = df[variable]
311-
ds_var_resample = ds_var.resample('1H', base=0.5) # shift by half hour to centre bin on the hour
311+
ds_var_resample = ds_var.resample('1h', offset='30min') # shift by half hour to centre bin on the hour
312312
ds_var_mean = ds_var_resample.apply(function_dict[variable]).astype(np.float32)
313313
df_data = pd.concat([df_data, ds_var_mean], axis=1, sort=False)
314314
for stat_method in function_stats:
@@ -366,8 +366,6 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
366366
variable_attribute_dictionary = json.load(json_file)['_variables']
367367

368368
df_data = pd.DataFrame()
369-
370-
371369
## create empty DF with dtypes
372370
metadata_df_types = [('source_file', str),
373371
('instrument_id', str),
@@ -380,6 +378,7 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
380378
parameter_names_all = []
381379
applied_offset = []
382380
qc_count_all = {}
381+
metadata_rows = []
383382

384383
for file_index, file in enumerate(files_to_aggregate):
385384
print(file_index)
@@ -398,13 +397,16 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
398397
qc_count = get_QCcount(nc_clean, qcflags)
399398
qc_count_all = update_QCcount(qc_count_all, qc_count)
400399
nc_clean = good_data_only(nc_clean, qcflags) # good quality data only
401-
df_metadata = df_metadata.append({'source_file': file,
402-
'instrument_id': utils.get_instrument_id(nc),
403-
'LONGITUDE': nc.LONGITUDE.squeeze().values,
404-
'LATITUDE': nc.LATITUDE.squeeze().values,
405-
'NOMINAL_DEPTH': get_nominal_depth(nc)},
406-
ignore_index=True)
407-
400+
401+
# Append a new row as a dictionary to the list.
402+
metadata_rows.append({
403+
'source_file': file,
404+
'instrument_id': utils.get_instrument_id(nc),
405+
'LONGITUDE': nc.LONGITUDE.squeeze().values,
406+
'LATITUDE': nc.LATITUDE.squeeze().values,
407+
'NOMINAL_DEPTH': get_nominal_depth(nc)
408+
})
409+
408410
# If TIME had out-of-range values before cleaning, nc_clean would now have a CFTimeIndex, which
409411
# breaks the resampling further down. Here we reset it to a DatetimeIndex as suggested here:
410412
# https://stackoverflow.com/questions/55786995/converting-cftime-datetimejulian-to-datetime/55787899#55787899
@@ -421,6 +423,7 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
421423
df_temp['instrument_index'] = np.repeat(file_index, len(df_temp)).astype(np.int32)
422424
df_data = pd.concat([df_data, df_temp.reset_index()], ignore_index=True, sort=False)
423425

426+
df_metadata = pd.DataFrame(metadata_rows, columns=['source_file', 'instrument_id', 'LONGITUDE', 'LATITUDE', 'NOMINAL_DEPTH'])
424427
df_metadata.index.rename('INSTRUMENT', inplace=True)
425428
df_data.index.rename('OBSERVATION', inplace=True)
426429
## rename index to TIME

aodntools/timeseries_products/velocity_hourly_timeseries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def append_resampled_values(nc_cell, ds, slice_start, binning_functions):
5858
# shift the index forward 30min to centre the bins on the hour
5959
df_cell.index = df_cell.index + pd.Timedelta(minutes=30)
6060

61-
df_cell_1H = df_cell.resample('1H')
61+
df_cell_1H = df_cell.resample('1h')
6262
slice_end = len(df_cell_1H) + slice_start
6363

6464
# set binned timestamps

constraints.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +0,0 @@
1-
cftime<1.1.1;python_version=='3.5'
2-
netCDF4<1.5.4;python_version=='3.5'
3-
pandas<0.25.0;python_version=='3.5'
4-
xarray<0.14.0;python_version=='3.5'

examples/rottnest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646
var_type = var['_datatype']
4747
for attr in ('valid_min', 'valid_max'):
4848
if attr in var:
49-
var[attr] = np.cast[var_type](var[attr])
49+
var[attr] = np.array(var[attr], dtype=var_type)
50+
5051

5152
# update range attributes
5253
template.add_extent_attributes()

setup.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from setuptools import setup, find_packages
22

33
INSTALL_REQUIRES = [
4-
'jsonschema>=2.6.0,<3.0.0',
5-
'numpy>=1.13.0',
6-
'netCDF4>=1.5.3',
7-
'pandas>=0.24.2',
8-
'xarray>=0.11.3'
4+
'jsonschema>=4.23.0',
5+
'numpy>=2.2.4',
6+
'netCDF4>=1.7.2',
7+
'pandas>=2.2.3',
8+
'xarray>=2023.1.0'
99
]
1010

1111
TESTS_REQUIRE = [
@@ -37,7 +37,7 @@
3737
author_email='projectofficers@emii.org.au',
3838
description='AODN data tools library',
3939
zip_safe=False,
40-
python_requires='>=3.5',
40+
python_requires='>=3.11, <3.12',
4141
install_requires=INSTALL_REQUIRES,
4242
tests_require=TESTS_REQUIRE,
4343
extras_require=EXTRAS_REQUIRE,
@@ -49,8 +49,7 @@
4949
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
5050
'Programming Language :: Python',
5151
'Programming Language :: Python :: 3',
52-
'Programming Language :: Python :: 3.5',
53-
'Programming Language :: Python :: 3.6',
52+
'Programming Language :: Python :: 3.11',
5453
'Programming Language :: Python :: Implementation :: CPython',
5554
]
5655
)

0 commit comments

Comments
 (0)