From ad5acad40630d7034790f4a1775baff12643d804 Mon Sep 17 00:00:00 2001 From: Inne Vanderkelen Date: Fri, 20 Mar 2026 14:59:42 +0100 Subject: [PATCH 1/5] adapt CCLM inputconvertor identifiers --- src/valenspy/input/converter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/valenspy/input/converter.py b/src/valenspy/input/converter.py index 75e0a62b..ecaf8483 100644 --- a/src/valenspy/input/converter.py +++ b/src/valenspy/input/converter.py @@ -153,6 +153,12 @@ def convert_input(self, data_sources: Path | list[Path] | xr.Dataset, metadata_i "CCLM": InputConverter("CCLM_lookup", CCLM_to_CF, metadata_info={"dataset": "CCLM"}), + "CCLM_monthly": InputConverter("CCLM_lookup", + CCLM_to_CF, + metadata_info={"dataset": "CCLM"}), + "CCLM_hourly": InputConverter("CCLM_lookup", + CCLM_to_CF, + metadata_info={"dataset": "CCLM"}), "ALARO_K": InputConverter("ALARO-SFX_K_lookup", ALARO_K_to_CF, metadata_info={"dataset": "ALARO_K"}), From 77120e0f8913f33cd23ce817e3bc7d5b159c78a4 Mon Sep 17 00:00:00 2001 From: kobebryant432 Date: Mon, 23 Mar 2026 16:08:10 +0100 Subject: [PATCH 2/5] add option to specify IC function as input_convertor in the yml files --- src/valenspy/input/esm_catalog_builder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/valenspy/input/esm_catalog_builder.py b/src/valenspy/input/esm_catalog_builder.py index f894109f..7c532e22 100644 --- a/src/valenspy/input/esm_catalog_builder.py +++ b/src/valenspy/input/esm_catalog_builder.py @@ -176,8 +176,10 @@ def _process_dataset_for_catalog(self, dataset_name, dataset_info): regex = re.compile(dataset_root.as_posix() + r"/" + regex_pattern) dataset_meta_data = dataset_info.get("meta_data", {}) - - IC = INPUT_CONVERTORS.get(dataset_name, None) + if dataset_IC := dataset_info.get("input_convertor", None): + IC = INPUT_CONVERTORS.get(dataset_IC, None) #Use the specified ICs + else: + IC = INPUT_CONVERTORS.get(dataset_name, None) #Use the dataset name to find the IC if IC: CORDEX_variable_set = IC.cordex_variables variable_set = IC.raw_variables From 2dab88b55c5cd2a3a7c740acdc02ddbe99a8110f Mon Sep 17 00:00:00 2001 From: kobebryant432 Date: Mon, 23 Mar 2026 16:40:20 +0100 Subject: [PATCH 3/5] add option to have multiple patterns --- src/valenspy/input/esm_catalog_builder.py | 33 ++++++++++++++--------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/valenspy/input/esm_catalog_builder.py b/src/valenspy/input/esm_catalog_builder.py index 7c532e22..5616aa9e 100644 --- a/src/valenspy/input/esm_catalog_builder.py +++ b/src/valenspy/input/esm_catalog_builder.py @@ -110,10 +110,12 @@ def _validate_dataset_info(self): for dataset_name, dataset_info in self.datasets_info.items(): key_set = set(dataset_info.get("meta_data", {}).keys()) - pattern = dataset_info.get("pattern", None) - if pattern: - for key in re.findall(r"<(.*?)>", pattern): - key_set.add(key) + + if pattern := dataset_info.get("pattern", None): + if isinstance(pattern, str): + pattern = [pattern] + #Add keys present in all patterns to the key_set by extracting the identifiers in the pattern. + key_set.update(set.intersection(*[set(re.findall(r"<(.*?)>", pat)) for pat in pattern])) # Check if all required identifiers are present for identifier in required_identifiers: @@ -172,14 +174,19 @@ def _process_dataset_for_catalog(self, dataset_name, dataset_info): """ dataset_root = Path(dataset_info.get("root")) - regex_pattern = create_named_regex(dataset_info.get("pattern", None)) - regex = re.compile(dataset_root.as_posix() + r"/" + regex_pattern) + + if pattern := dataset_info.get("pattern", None): + if isinstance(pattern, str): + pattern = [pattern] + + regex = [re.compile(dataset_root.as_posix() + r"/" + create_named_regex(pat)) for pat in pattern] dataset_meta_data = dataset_info.get("meta_data", {}) if dataset_IC := dataset_info.get("input_convertor", None): IC = INPUT_CONVERTORS.get(dataset_IC, None) #Use the specified ICs else: IC = INPUT_CONVERTORS.get(dataset_name, None) #Use the dataset name to find the IC + if IC: CORDEX_variable_set = IC.cordex_variables variable_set = IC.raw_variables @@ -190,13 +197,13 @@ def _process_dataset_for_catalog(self, dataset_name, dataset_info): for file in files: if file.endswith(".nc"): file_path = os.path.join(root, file) - if match := regex.match(file_path): - file_metadata = match.groupdict() - else: - #Add the skipped file to the skipped files dictionary (create the entry if it does not exist) - if dataset_name not in self.skipped_files: - self.skipped_files[dataset_name] = [] - self.skipped_files[dataset_name].append(file_path) + for reg in regex: + if match := reg.match(file_path): + file_metadata = match.groupdict() + break + + if not match: + self.skipped_files.setdefault(dataset_name, []).append(file_path) continue # Add the file path to the metadata From 47e732d7d3abb22c69c9fb7356a12f10c0aea7f1 Mon Sep 17 00:00:00 2001 From: kobebryant432 Date: Wed, 25 Mar 2026 10:49:54 +0100 Subject: [PATCH 4/5] added ERA5 hourly data to hortense input manager --- src/valenspy/ancilliary_data/dataset_info.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/valenspy/ancilliary_data/dataset_info.yml b/src/valenspy/ancilliary_data/dataset_info.yml index 9dc1a9c6..ae49d519 100644 --- a/src/valenspy/ancilliary_data/dataset_info.yml +++ b/src/valenspy/ancilliary_data/dataset_info.yml @@ -40,7 +40,10 @@ hortense: ERA5: #Note that there is an aggregation variable here! What to do with this? This is messy as it can not be uniquely identified by the filename! root: /dodrio/scratch/projects/2022_200/external/era5/ - pattern: ///_era5____.nc + pattern: #Warning: patterns should be mutually exclusive to ensure correct parsing of the information from the path. A path should not be able to match two patterns! + - ///_era5__monthly__.nc + - ///_era5__daily__.nc + - ///_era5__hourly_.nc meta_data: source_id: "ERA5" source_type: "reanalysis" From 6a5c1f26374483610c38ad638a23b91870659455 Mon Sep 17 00:00:00 2001 From: kobebryant432 Date: Wed, 25 Mar 2026 14:02:18 +0100 Subject: [PATCH 5/5] Clean up ICs --- src/valenspy/input/converter.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/valenspy/input/converter.py b/src/valenspy/input/converter.py index ecaf8483..75e0a62b 100644 --- a/src/valenspy/input/converter.py +++ b/src/valenspy/input/converter.py @@ -153,12 +153,6 @@ def convert_input(self, data_sources: Path | list[Path] | xr.Dataset, metadata_i "CCLM": InputConverter("CCLM_lookup", CCLM_to_CF, metadata_info={"dataset": "CCLM"}), - "CCLM_monthly": InputConverter("CCLM_lookup", - CCLM_to_CF, - metadata_info={"dataset": "CCLM"}), - "CCLM_hourly": InputConverter("CCLM_lookup", - CCLM_to_CF, - metadata_info={"dataset": "CCLM"}), "ALARO_K": InputConverter("ALARO-SFX_K_lookup", ALARO_K_to_CF, metadata_info={"dataset": "ALARO_K"}),