From 54f6402584176577f8553046a2802505e69df4bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Tue, 24 Mar 2026 10:46:07 +0100 Subject: [PATCH] add(fap): new model and transformation rules --- .../rdm/records/transform/models/fap.py | 42 +++++++++++++++++++ .../transform/xml_processing/rules/fap.py | 37 ++++++++++++++++ cds_migrator_kit/rdm/streams.yaml | 12 ++++++ setup.cfg | 5 +++ 4 files changed, 96 insertions(+) create mode 100644 cds_migrator_kit/rdm/records/transform/models/fap.py create mode 100644 cds_migrator_kit/rdm/records/transform/xml_processing/rules/fap.py diff --git a/cds_migrator_kit/rdm/records/transform/models/fap.py b/cds_migrator_kit/rdm/records/transform/models/fap.py new file mode 100644 index 00000000..f6039ef1 --- /dev/null +++ b/cds_migrator_kit/rdm/records/transform/models/fap.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2026 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM FAP (Finance and Administrative Processes) model.""" +from cds_migrator_kit.rdm.records.transform.models.base_record import ( + rdm_base_record_model, +) +from cds_migrator_kit.transform.overdo import CdsOverdo + + +class FAPModel(CdsOverdo): + """Translation model for FAP records.""" + + __query__ = "980__:INTNOTEFAPPUBL" + + __ignore_keys__ = { + "100__m", + "300__a", # number of pages + "8564_8", + "8564_s", + "8564_x", + "8564_y", # file description - done by files dump + "937__c", # last modified by + "937__s", # last modification date + "960__a", # base number usually 12 + } + + _default_fields = { + # TODO: is this resource type correct? + "resource_type": {"id": "publication-report"}, + "custom_fields": {}, + } + + +fap_model = FAPModel( + bases=(rdm_base_record_model,), + entry_point_group="cds_migrator_kit.migrator.rules.fap", +) diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/fap.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/fap.py new file mode 100644 index 00000000..ea9c778c --- /dev/null +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/fap.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2026 CERN. +# +# CDS-RDM is free software; you can redistribute it and/or modify it under +# the terms of the MIT License; see LICENSE file for more details. + +"""CDS-RDM FAP (Finance and Administrative Processes) rules.""" + +from dojson.errors import IgnoreKey +from dojson.utils import for_each_value + +from cds_migrator_kit.errors import UnexpectedValue + +from ...models.fap import fap_model as model + + +@model.over("collection", "^690C_") +@for_each_value +def collection(self, key, value): + """Translates document type field.""" + collection = value.get("a").strip().lower() + # TODO: can we drop them? + if collection not in ["cern", "intnote", "publfap"]: + raise UnexpectedValue(subfield="a", value=value, field=key) + raise IgnoreKey("collection") + + +@model.over("resource_type", "^980__", override=True) +def resource_type(self, key, value): + """Translates resource_type.""" + value = value.get("a") + if value: + value = value.strip().upper() + if value != "INTNOTEFAPPUBL": + raise UnexpectedValue("Unknown resource type (FAP)", field=key, value=value) + raise IgnoreKey("resource_type") diff --git a/cds_migrator_kit/rdm/streams.yaml b/cds_migrator_kit/rdm/streams.yaml index 19c5d2ca..87e52a0a 100644 --- a/cds_migrator_kit/rdm/streams.yaml +++ b/cds_migrator_kit/rdm/streams.yaml @@ -110,3 +110,15 @@ records: - "77617386-632c-4b86-8dd2-68de77ae0018" load: legacy_pids_to_redirect: cds_migrator_kit/rdm/data/hr_restricted/duplicated_pids.json + fap: + data_dir: cds_migrator_kit/rdm/data/fap + tmp_dir: cds_migrator_kit/rdm/tmp/fap + log_dir: cds_migrator_kit/rdm/log/fap + restricted: "True" + extract: + dirpath: cds_migrator_kit/rdm/data/fap/dump/ + transform: + files_dump_dir: cds_migrator_kit/rdm/data/fap/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "" diff --git a/setup.cfg b/setup.cfg index 803a3173..b4a737e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -81,6 +81,7 @@ cds_migrator_kit.migrator.models = te = cds_migrator_kit.rdm.records.transform.models.te:te_model en = cds_migrator_kit.rdm.records.transform.models.en:en_model annual_rep = cds_migrator_kit.rdm.records.transform.models.annual_report:annual_rep_model + fap = cds_migrator_kit.rdm.records.transform.models.fap:fap_model cds_migrator_kit.migrator.rules.base = base = cds_migrator_kit.transform.xml_processing.rules.base cds_migrator_kit.migrator.rdm.rules.base = @@ -158,6 +159,10 @@ cds_migrator_kit.migrator.rules.annual_rep = base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base publication = cds_migrator_kit.rdm.records.transform.xml_processing.rules.publications annual_rep = cds_migrator_kit.rdm.records.transform.xml_processing.rules.annual_rep +cds_migrator_kit.migrator.rules.fap = + base = cds_migrator_kit.transform.xml_processing.rules.base + base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base + fap = cds_migrator_kit.rdm.records.transform.xml_processing.rules.fap cds_migrator_kit.migrator.rules.people = people = cds_migrator_kit.rdm.users.transform.xml_processing.rules.people invenio_pidstore.minters =