From cb6a1622202c435784c23fb0a1be36f8bfec16c4 Mon Sep 17 00:00:00 2001 From: jucordero Date: Tue, 19 May 2026 20:00:32 +0100 Subject: [PATCH 1/7] Edited contributing guidelines, multiconstructor for yaml loader --- CONTRIBUTING.md | 4 ++-- agrifoodpy/pipeline/pipeline.py | 18 ++++++++++++++++++ docs/config_file.rst | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5f31a3f..aebc387 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,8 +54,8 @@ for each new feature simplifies the development, review and merge processes by maintining logical separation. To create a feature branch: ```bash - git fetch agrifoodpy - git checkout -b agrifoodpy/main + git fetch afp + git checkout -b afp/main ``` ### Hack away! diff --git a/agrifoodpy/pipeline/pipeline.py b/agrifoodpy/pipeline/pipeline.py index 06e98b0..c664e53 100644 --- a/agrifoodpy/pipeline/pipeline.py +++ b/agrifoodpy/pipeline/pipeline.py @@ -10,6 +10,7 @@ import time import yaml import importlib +import builtins from ..utils.dict_utils import get_dict, set_dict class Pipeline(): @@ -47,6 +48,23 @@ def read(cls, filename): The pipeline object. """ + def dynamic_call_constructor(loader, suffix, node): + """Multi-constructor for arbitrary functions""" + func = cls._load_function(suffix) + + if isinstance(node, yaml.ScalarNode): + return func + elif isinstance(node, yaml.SequenceNode): + args = loader.construct_sequence(node) + return func(*args) + elif isinstance(node, yaml.MappingNode): + kwargs = loader.construct_mapping(node) + return func(**kwargs) + + # Register the multi-constructor for all tags starting with '!' + yaml.add_multi_constructor("!", dynamic_call_constructor, + Loader=yaml.FullLoader) + with open(filename, "r") as f: config = yaml.load(f, Loader=yaml.FullLoader) diff --git a/docs/config_file.rst b/docs/config_file.rst index 63eac7b..7e09384 100644 --- a/docs/config_file.rst +++ b/docs/config_file.rst @@ -4,7 +4,7 @@ Command line tool ================= The ``agrifoodpy`` command line tool allows you to run a pipeline of functions -defined in a configuration file. This is useful for automating workflows and +defined in a YAML configuration file. This is useful for automating workflows and reproducibility. You can specify the configuration file and an output file for the results. From 440f013144d3a432036b0c19d1c67cbd078266cd Mon Sep 17 00:00:00 2001 From: jucordero Date: Tue, 19 May 2026 20:28:51 +0100 Subject: [PATCH 2/7] whitelisted numpy and xarray --- agrifoodpy/pipeline/pipeline.py | 76 +++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/agrifoodpy/pipeline/pipeline.py b/agrifoodpy/pipeline/pipeline.py index c664e53..f60d972 100644 --- a/agrifoodpy/pipeline/pipeline.py +++ b/agrifoodpy/pipeline/pipeline.py @@ -10,7 +10,6 @@ import time import yaml import importlib -import builtins from ..utils.dict_utils import get_dict, set_dict class Pipeline(): @@ -33,6 +32,20 @@ def _load_function(path): module = importlib.import_module(module_path) return getattr(module, func_name) + @staticmethod + def _is_supported_yaml_function(path): + """Return True for dotted numpy/xarray function paths.""" + if not isinstance(path, str) or "." not in path: + return False + + module_path, _ = path.rsplit(".", 1) + return ( + module_path == "numpy" + or module_path.startswith("numpy.") + or module_path == "xarray" + or module_path.startswith("xarray.") + ) + @classmethod def read(cls, filename): """Read a pipeline configuration from a YAML file @@ -48,22 +61,51 @@ def read(cls, filename): The pipeline object. """ - def dynamic_call_constructor(loader, suffix, node): - """Multi-constructor for arbitrary functions""" - func = cls._load_function(suffix) - - if isinstance(node, yaml.ScalarNode): - return func - elif isinstance(node, yaml.SequenceNode): - args = loader.construct_sequence(node) - return func(*args) - elif isinstance(node, yaml.MappingNode): - kwargs = loader.construct_mapping(node) - return func(**kwargs) - - # Register the multi-constructor for all tags starting with '!' - yaml.add_multi_constructor("!", dynamic_call_constructor, - Loader=yaml.FullLoader) + def dynamic_call_constructor(package_name): + """Build a multi-constructor for supported package functions.""" + + def constructor(loader, suffix, node): + func_path = f"{package_name}.{suffix}" if suffix else package_name + + # Check if the function path is supported + if not cls._is_supported_yaml_function(func_path): + raise yaml.constructor.ConstructorError( + None, + None, + f"Unsupported YAML function tag '!{func_path}'.", + node.start_mark, + ) + + func = cls._load_function(func_path) + + if isinstance(node, yaml.ScalarNode): + return func + if isinstance(node, yaml.SequenceNode): + args = loader.construct_sequence(node) + return func(*args) + if isinstance(node, yaml.MappingNode): + kwargs = loader.construct_mapping(node) + return func(**kwargs) + + raise yaml.constructor.ConstructorError( + None, + None, + f"Unsupported YAML node type for '!{func_path}'.", + node.start_mark, + ) + + return constructor + + yaml.add_multi_constructor( + "!numpy.", + dynamic_call_constructor("numpy"), + Loader=yaml.FullLoader, + ) + yaml.add_multi_constructor( + "!xarray.", + dynamic_call_constructor("xarray"), + Loader=yaml.FullLoader, + ) with open(filename, "r") as f: config = yaml.load(f, Loader=yaml.FullLoader) From ff2ee09a5e06a146bcf4108605f6f380800deb61 Mon Sep 17 00:00:00 2001 From: jucordero Date: Wed, 20 May 2026 14:34:41 +0100 Subject: [PATCH 3/7] Unit tests --- agrifoodpy/pipeline/pipeline.py | 4 +- .../tests/data/test_config_numpy_array.yaml | 6 ++ .../data/test_config_numpy_array_kwargs.yaml | 6 ++ .../test_config_unsupported_function.yaml | 6 ++ .../data/test_config_xarray_dataarray.yaml | 6 ++ .../test_config_xarray_dataarray_kwargs.yaml | 6 ++ agrifoodpy/pipeline/tests/test_pipeline.py | 56 +++++++++++++++++++ 7 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 agrifoodpy/pipeline/tests/data/test_config_numpy_array.yaml create mode 100644 agrifoodpy/pipeline/tests/data/test_config_numpy_array_kwargs.yaml create mode 100644 agrifoodpy/pipeline/tests/data/test_config_unsupported_function.yaml create mode 100644 agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray.yaml create mode 100644 agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray_kwargs.yaml diff --git a/agrifoodpy/pipeline/pipeline.py b/agrifoodpy/pipeline/pipeline.py index f60d972..10c0e41 100644 --- a/agrifoodpy/pipeline/pipeline.py +++ b/agrifoodpy/pipeline/pipeline.py @@ -81,10 +81,10 @@ def constructor(loader, suffix, node): if isinstance(node, yaml.ScalarNode): return func if isinstance(node, yaml.SequenceNode): - args = loader.construct_sequence(node) + args = loader.construct_sequence(node, deep=True) return func(*args) if isinstance(node, yaml.MappingNode): - kwargs = loader.construct_mapping(node) + kwargs = loader.construct_mapping(node, deep=True) return func(**kwargs) raise yaml.constructor.ConstructorError( diff --git a/agrifoodpy/pipeline/tests/data/test_config_numpy_array.yaml b/agrifoodpy/pipeline/tests/data/test_config_numpy_array.yaml new file mode 100644 index 0000000..2a8fdcf --- /dev/null +++ b/agrifoodpy/pipeline/tests/data/test_config_numpy_array.yaml @@ -0,0 +1,6 @@ +nodes: + - function: agrifoodpy.utils.nodes.write_to_datablock + name: Numpy Array + params: + key: "test_numpy_array" + value: !numpy.array [[1, 2, 3]] \ No newline at end of file diff --git a/agrifoodpy/pipeline/tests/data/test_config_numpy_array_kwargs.yaml b/agrifoodpy/pipeline/tests/data/test_config_numpy_array_kwargs.yaml new file mode 100644 index 0000000..82ab817 --- /dev/null +++ b/agrifoodpy/pipeline/tests/data/test_config_numpy_array_kwargs.yaml @@ -0,0 +1,6 @@ +nodes: + - function: agrifoodpy.utils.nodes.write_to_datablock + name: Numpy Array + params: + key: "test_numpy_array" + value: !numpy.array {object: [1, 2, 3]} \ No newline at end of file diff --git a/agrifoodpy/pipeline/tests/data/test_config_unsupported_function.yaml b/agrifoodpy/pipeline/tests/data/test_config_unsupported_function.yaml new file mode 100644 index 0000000..995e9ed --- /dev/null +++ b/agrifoodpy/pipeline/tests/data/test_config_unsupported_function.yaml @@ -0,0 +1,6 @@ +nodes: + - function: agrifoodpy.utils.nodes.write_to_datablock + name: Unsupported Function + params: + key: "test_pandas_array" + value: !pandas.DataFrame {data: [1, 2, 3], columns: ["A", "B", "C"]} \ No newline at end of file diff --git a/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray.yaml b/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray.yaml new file mode 100644 index 0000000..71f9a71 --- /dev/null +++ b/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray.yaml @@ -0,0 +1,6 @@ +nodes: + - function: agrifoodpy.utils.nodes.write_to_datablock + name: Xarray DataArray + params: + key: "test_value" + value: !xarray.DataArray [[1,2,3] , {Year: [2020, 2021, 2022]}, "Year"] \ No newline at end of file diff --git a/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray_kwargs.yaml b/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray_kwargs.yaml new file mode 100644 index 0000000..7da04f6 --- /dev/null +++ b/agrifoodpy/pipeline/tests/data/test_config_xarray_dataarray_kwargs.yaml @@ -0,0 +1,6 @@ +nodes: + - function: agrifoodpy.utils.nodes.write_to_datablock + name: Xarray DataArray + params: + key: "test_value" + value: !xarray.DataArray {data: [1, 2, 3], dims: ["Year"], coords: {Year: [2020, 2021, 2022]}} \ No newline at end of file diff --git a/agrifoodpy/pipeline/tests/test_pipeline.py b/agrifoodpy/pipeline/tests/test_pipeline.py index 413149f..0aa753f 100644 --- a/agrifoodpy/pipeline/tests/test_pipeline.py +++ b/agrifoodpy/pipeline/tests/test_pipeline.py @@ -1,5 +1,8 @@ from agrifoodpy.pipeline import Pipeline, standalone +import numpy as np +import xarray as xr import pytest +import os def test_init(): pipeline = Pipeline() @@ -359,3 +362,56 @@ def reserved_param_node(x, datablock=None): @pipeline_node(['wrong_key']) def unknown_input_node(right_key): pass + + +# Test reading YAML config with numpy array parameters and values +def test_read_yaml_numpy_array_(): + + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_numpy_array.yaml") + + pipeline = Pipeline.read(str(config_path)) + pipeline.run() + + assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) + assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) + +def test_read_yaml_numpy_array_kwargs(): + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_numpy_array_kwargs.yaml") + + pipeline = Pipeline.read(str(config_path)) + pipeline.run() + + assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) + assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) + +def test_read_yaml_xarray_dataarray(): + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray.yaml") + + pipeline = Pipeline.read(str(config_path)) + pipeline.run() + + expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) + xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) + xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) + +def test_read_yaml_xarray_dataarray_kwargs(): + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray_kwargs.yaml") + + pipeline = Pipeline.read(str(config_path)) + pipeline.run() + + expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) + xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) + xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) + +def test_read_yaml_unsupported_function(): + from yaml.constructor import ConstructorError + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_unsupported_function.yaml") + + with pytest.raises(ConstructorError): + pipeline = Pipeline.read(str(config_path)) \ No newline at end of file From ba5f67d21c834ceca26b713b32e91e304a6ef498 Mon Sep 17 00:00:00 2001 From: jucordero Date: Wed, 20 May 2026 14:56:07 +0100 Subject: [PATCH 4/7] Test credentials From 19af36e3182e42997f13d68caf46f71fa81ac6c1 Mon Sep 17 00:00:00 2001 From: jucordero Date: Wed, 20 May 2026 15:10:21 +0100 Subject: [PATCH 5/7] Check credentials again From b7416ff3ea460953fe1d56e9c7d99e9e073fe9ce Mon Sep 17 00:00:00 2001 From: jucordero Date: Wed, 20 May 2026 15:27:42 +0100 Subject: [PATCH 6/7] Check credentials again From 198e162165e2240b82f7a7f207abc6f3207ba53e Mon Sep 17 00:00:00 2001 From: jucordero Date: Wed, 20 May 2026 17:25:15 +0100 Subject: [PATCH 7/7] fixed unit tests indentation and names --- agrifoodpy/pipeline/tests/test_pipeline.py | 66 +++++++++++----------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/agrifoodpy/pipeline/tests/test_pipeline.py b/agrifoodpy/pipeline/tests/test_pipeline.py index 0aa753f..6f05ad2 100644 --- a/agrifoodpy/pipeline/tests/test_pipeline.py +++ b/agrifoodpy/pipeline/tests/test_pipeline.py @@ -365,53 +365,53 @@ def unknown_input_node(right_key): # Test reading YAML config with numpy array parameters and values -def test_read_yaml_numpy_array_(): - - script_dir = os.path.dirname(__file__) - config_path = os.path.join(script_dir, "data/test_config_numpy_array.yaml") +def test_read_yaml_numpy_array(): + + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_numpy_array.yaml") - pipeline = Pipeline.read(str(config_path)) - pipeline.run() + pipeline = Pipeline.read(str(config_path)) + pipeline.run() - assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) - assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) + assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) + assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) def test_read_yaml_numpy_array_kwargs(): - script_dir = os.path.dirname(__file__) - config_path = os.path.join(script_dir, "data/test_config_numpy_array_kwargs.yaml") + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_numpy_array_kwargs.yaml") - pipeline = Pipeline.read(str(config_path)) - pipeline.run() + pipeline = Pipeline.read(str(config_path)) + pipeline.run() - assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) - assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) + assert np.array_equal(pipeline.params[0]['value'], np.array([1, 2, 3])) + assert np.array_equal(pipeline.datablock["test_numpy_array"], np.array([1, 2, 3])) def test_read_yaml_xarray_dataarray(): - script_dir = os.path.dirname(__file__) - config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray.yaml") + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray.yaml") - pipeline = Pipeline.read(str(config_path)) - pipeline.run() + pipeline = Pipeline.read(str(config_path)) + pipeline.run() - expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) - xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) - xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) + expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) + xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) + xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) def test_read_yaml_xarray_dataarray_kwargs(): - script_dir = os.path.dirname(__file__) - config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray_kwargs.yaml") + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_xarray_dataarray_kwargs.yaml") - pipeline = Pipeline.read(str(config_path)) - pipeline.run() + pipeline = Pipeline.read(str(config_path)) + pipeline.run() - expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) - xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) - xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) + expected_array = xr.DataArray([1, 2, 3], coords={"Year": [2020, 2021, 2022]}, dims=["Year"]) + xr.testing.assert_equal(pipeline.params[0]['value'], expected_array) + xr.testing.assert_equal(pipeline.datablock["test_value"], expected_array) def test_read_yaml_unsupported_function(): - from yaml.constructor import ConstructorError - script_dir = os.path.dirname(__file__) - config_path = os.path.join(script_dir, "data/test_config_unsupported_function.yaml") + from yaml.constructor import ConstructorError + script_dir = os.path.dirname(__file__) + config_path = os.path.join(script_dir, "data/test_config_unsupported_function.yaml") - with pytest.raises(ConstructorError): - pipeline = Pipeline.read(str(config_path)) \ No newline at end of file + with pytest.raises(ConstructorError): + pipeline = Pipeline.read(str(config_path)) \ No newline at end of file