Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions cds_migrator_kit/rdm/records/transform/models/staff_association.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Staff Association model."""
from cds_migrator_kit.rdm.records.transform.models.base_record import (
rdm_base_record_model,
)
from cds_migrator_kit.transform.overdo import CdsOverdo


class StaffAssociationModel(CdsOverdo):
"""Translation model for Staff Association."""

__query__ = """980__:BULLETINSTAFF
-980__:CERN_BULLETIN_ARTICLE
-980__:CERN_BULLETIN_ISSUE
"""

# Copy-pasted from bulletin issue
__ignore_keys__ = {
"0248_a",
"0248_p",
"0248_q",
"100__m", # email of contributor
"110__a", # corporate author, always CERN, safe to ignore
"300__a", # number of pages
"336__a", # DM metadata
"506__m", # 2120833, ignored with confirmation from IR-ECO-CO
"5831_2", # DM tags 1054836
"5831_5", # DM tags
"5831_a", # DM tags
"5831_c", # DM tags
"5831_f", # DM tags
"5831_i", # DM tags
"5831_k", # DM tags
"5831_u", # DM tags
"5831_3", # DM tags
"5831_6", # DM tags
"5831_n", # DM tags
"5831_b", # DM tags
"5831_o", # DM tags
"583__a", # DM tags
"583__c", # DM tags
"583__z", # DM tags
"590__b", # staff association? value, redundant with language
"594__a", # specifies if the related articles menu has a separator or not (display feature)
"650172", # scheme of subjects
"6531_9", # scheme of keywords
"691__a", # draft/online values, redundant
"700__m", # email of contributor
"773__p", # title of the "CERN Bulletin" series
"773__t", # CERN Bulletin value, redundant
"773__y", # year, duplicate of 260
"773__u", # broken links on record 44920
"787__i", # one referenced record (video in 1755835, 1754359)
"859__a", # empty value
"856__q", # 619830 broken link
"8560_f", # contact email
"8564_8", # file id
"8564_s", # bibdoc id
"8564_x", # icon thumbnails sizes
"8564_y", # file description - done by files dump
"8564_2", # DM metadata
"8564_q", # DM metadata
"8564_w", # DM metadata
"8564_z", # DM metadata
"8567_2", # DM tags
"8567_q", # DM tags
"8567_w", # DM tags
"8567_d", # DM tags
"906__m", # edit rights, will be granted by the community
"937__c", # last modified by
"937__s", # last modification date
"960__a", # base number
"961__a", # CDS modification tag # TODO
"961__b", # CDS modification tag # TODO
"961__c", # CDS modification tag # TODO
"961__h", # CDS modification tag # TODO
"961__l", # CDS modification tag # TODO
"961__x", # CDS modification tag # TODO
"981__a", # duplicate record id
"980__b",
# "246_1a",
# "690C_a",
}

_default_fields = {
# TODO should we keep this custom field?
"custom_fields": {"journal:journal": {"title": "CERN Bulletin"}},
"creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}],
}


staff_association_model = StaffAssociationModel(
bases=(rdm_base_record_model,),
entry_point_group="cds_migrator_kit.migrator.rules.staff_association",
)
Original file line number Diff line number Diff line change
Expand Up @@ -797,10 +797,25 @@ def related_identifiers_787(self, key, value):
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-conferencepaper"},
},
"corresponding video": {
"relation_type": {"id": "references"},
# TODO: do we need new resource type for video?
"resource_type": {"id": "audio"},
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes we need to add a resource type for a video to be able to create relations with videos. But this resource type should be only populated for the related records, we should not have it in the main resource type. Where does the link point? old CDS or videos platform?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a digitized video record, in old CDS.

record: https://cds.cern.ch/record/1494034/
related record: https://cds.cern.ch/record/2271254

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this resource type should be only populated for the related records, we should not have it in the main resource type.

@kpsherva do you have any idea how we can add this resource type to only related records?

},
Comment on lines +800 to +804
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example record: https://cds.cern.ch/record/1494034/

It's a digitized video record. What should be the resource_type? Do we need a new one?

"manuscript": {
"relation_type": {"id": "isderivedfrom"},
"resource_type": {"id": "publication-preprint"},
},
Comment on lines +805 to +808
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

if recid:
if description:
if description not in relation_map.keys():
raise UnexpectedValue(
f"Unexpected relation description {description}",
field=key,
value=value,
)
new_id = {
"identifier": recid,
"scheme": "cds",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def urls_bulletin_bis(self, key, value):


@model.over("custom_fields_journal", "(^916__)", override=True)
def issue_number(self, key, value):
def custom_fields_journal(self, key, value):
_custom_fields = self.get("custom_fields", {})

issue = value.get("z")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Staff Association rules."""

from cds_migrator_kit.errors import UnexpectedValue

from ...models.staff_association import staff_association_model as model
from .bulletin_issue import (
additional_descriptions,
additional_titles_bulletin,
bull_related_identifiers,
bulletin_report_number,
collection,
creators,
custom_fields_journal,
description,
imprint_info,
issue_number,
journal,
rel_identifiers,
subjects_bulletin,
translated_description,
urls_bulletin,
urls_bulletin_bis,
)

# Re-register all shared bulletin rules onto staff_association_model
model.over("creators", "^100__", override=True)(creators)
model.over("additional_titles", "(^246_[1_])", override=True)(
additional_titles_bulletin
)
model.over("description", "^520__", override=True)(description)
model.over("collection", "^690C_", override=True)(collection)
model.over("publication_date", "(^260__)", override=True)(imprint_info)
model.over("custom_fields", "(^773__)")(journal)
model.over("additional_descriptions", "(^500__)")(additional_descriptions)
model.over("additional_descriptions", "(^590__)")(translated_description)
model.over("subjects", "(^650[12_][7_])|(^6531_)", override=True)(subjects_bulletin)
model.over("url_identifiers", "^8564_", override=True)(urls_bulletin)
model.over("urls_bulletin", "^856__")(urls_bulletin_bis)
model.over("custom_fields_journal", "(^916__)", override=True)(custom_fields_journal)
model.over("bulletin_report_number", "(^037__)|(^088__)", override=True)(
bulletin_report_number
)
model.over("custom_fields", "(^925__)")(issue_number)
model.over("bull_related_identifiers_1", "(^941__)")(bull_related_identifiers)
model.over("bull_related_identifiers_2", "(^962__)", override=True)(rel_identifiers)


@model.over("resource_type", "^980__", override=True)
def resource_type(self, key, value):
"""Translates resource_type."""
value = value.get("a") if "a" in value else value.get("b")
if value:
value = value.lower()
if value == "bulletinstaff":
# TODO what is the resource type?
return {"id": "publication-periodicalarticle"}
Comment on lines +61 to +63
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is resource_type correct?

raise UnexpectedValue(
"Unknown resource type (STAFF ASSOCIATION)", field=key, value=value
)
11 changes: 11 additions & 0 deletions cds_migrator_kit/rdm/streams.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,14 @@ records:
- "77617386-632c-4b86-8dd2-68de77ae0018"
load:
legacy_pids_to_redirect: cds_migrator_kit/rdm/data/hr_restricted/duplicated_pids.json
staff_association:
data_dir: cds_migrator_kit/rdm/data/staff_association
tmp_dir: cds_migrator_kit/rdm/tmp/staff_association
log_dir: cds_migrator_kit/rdm/log/staff_association
extract:
dirpath: cds_migrator_kit/rdm/data/staff_association/dump/
transform:
files_dump_dir: cds_migrator_kit/rdm/data/staff_association/files/
missing_users: cds_migrator_kit/rdm/data/users
communities_ids:
- "9ab1f6bd-b213-4bb7-9249-13b9665e453a"
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ cds_migrator_kit.migrator.models =
te = cds_migrator_kit.rdm.records.transform.models.te:te_model
en = cds_migrator_kit.rdm.records.transform.models.en:en_model
annual_rep = cds_migrator_kit.rdm.records.transform.models.annual_report:annual_rep_model
staff_association = cds_migrator_kit.rdm.records.transform.models.staff_association:staff_association_model
cds_migrator_kit.migrator.rules.base =
base = cds_migrator_kit.transform.xml_processing.rules.base
cds_migrator_kit.migrator.rdm.rules.base =
Expand Down Expand Up @@ -158,6 +159,10 @@ cds_migrator_kit.migrator.rules.annual_rep =
base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base
publication = cds_migrator_kit.rdm.records.transform.xml_processing.rules.publications
annual_rep = cds_migrator_kit.rdm.records.transform.xml_processing.rules.annual_rep
cds_migrator_kit.migrator.rules.staff_association =
base = cds_migrator_kit.transform.xml_processing.rules.base
base_records = cds_migrator_kit.rdm.records.transform.xml_processing.rules.base
staff_association = cds_migrator_kit.rdm.records.transform.xml_processing.rules.staff_association
cds_migrator_kit.migrator.rules.people =
people = cds_migrator_kit.rdm.users.transform.xml_processing.rules.people
invenio_pidstore.minters =
Expand Down
Loading