-
Notifications
You must be signed in to change notification settings - Fork 10
add(staff-association): new model and transformation rules #391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # Copyright (C) 2026 CERN. | ||
| # | ||
| # CDS-RDM is free software; you can redistribute it and/or modify it under | ||
| # the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """CDS-RDM Staff Association model.""" | ||
| from cds_migrator_kit.rdm.records.transform.models.base_record import ( | ||
| rdm_base_record_model, | ||
| ) | ||
| from cds_migrator_kit.transform.overdo import CdsOverdo | ||
|
|
||
|
|
||
| class StaffAssociationModel(CdsOverdo): | ||
| """Translation model for Staff Association.""" | ||
|
|
||
| __query__ = """980__:BULLETINSTAFF | ||
| -980__:CERN_BULLETIN_ARTICLE | ||
| -980__:CERN_BULLETIN_ISSUE | ||
| """ | ||
|
|
||
| # Copy-pasted from bulletin issue | ||
| __ignore_keys__ = { | ||
| "0248_a", | ||
| "0248_p", | ||
| "0248_q", | ||
| "100__m", # email of contributor | ||
| "110__a", # corporate author, always CERN, safe to ignore | ||
| "300__a", # number of pages | ||
| "336__a", # DM metadata | ||
| "506__m", # 2120833, ignored with confirmation from IR-ECO-CO | ||
| "5831_2", # DM tags 1054836 | ||
| "5831_5", # DM tags | ||
| "5831_a", # DM tags | ||
| "5831_c", # DM tags | ||
| "5831_f", # DM tags | ||
| "5831_i", # DM tags | ||
| "5831_k", # DM tags | ||
| "5831_u", # DM tags | ||
| "5831_3", # DM tags | ||
| "5831_6", # DM tags | ||
| "5831_n", # DM tags | ||
| "5831_b", # DM tags | ||
| "5831_o", # DM tags | ||
| "583__a", # DM tags | ||
| "583__c", # DM tags | ||
| "583__z", # DM tags | ||
| "590__b", # staff association? value, redundant with language | ||
| "594__a", # specifies if the related articles menu has a separator or not (display feature) | ||
| "650172", # scheme of subjects | ||
| "6531_9", # scheme of keywords | ||
| "691__a", # draft/online values, redundant | ||
| "700__m", # email of contributor | ||
| "773__p", # title of the "CERN Bulletin" series | ||
| "773__t", # CERN Bulletin value, redundant | ||
| "773__y", # year, duplicate of 260 | ||
| "773__u", # broken links on record 44920 | ||
| "787__i", # one referenced record (video in 1755835, 1754359) | ||
| "859__a", # empty value | ||
| "856__q", # 619830 broken link | ||
| "8560_f", # contact email | ||
| "8564_8", # file id | ||
| "8564_s", # bibdoc id | ||
| "8564_x", # icon thumbnails sizes | ||
| "8564_y", # file description - done by files dump | ||
| "8564_2", # DM metadata | ||
| "8564_q", # DM metadata | ||
| "8564_w", # DM metadata | ||
| "8564_z", # DM metadata | ||
| "8567_2", # DM tags | ||
| "8567_q", # DM tags | ||
| "8567_w", # DM tags | ||
| "8567_d", # DM tags | ||
| "906__m", # edit rights, will be granted by the community | ||
| "937__c", # last modified by | ||
| "937__s", # last modification date | ||
| "960__a", # base number | ||
| "961__a", # CDS modification tag # TODO | ||
| "961__b", # CDS modification tag # TODO | ||
| "961__c", # CDS modification tag # TODO | ||
| "961__h", # CDS modification tag # TODO | ||
| "961__l", # CDS modification tag # TODO | ||
| "961__x", # CDS modification tag # TODO | ||
| "981__a", # duplicate record id | ||
| "980__b", | ||
| # "246_1a", | ||
| # "690C_a", | ||
| } | ||
|
|
||
| _default_fields = { | ||
| # TODO should we keep this custom field? | ||
| "custom_fields": {"journal:journal": {"title": "CERN Bulletin"}}, | ||
| "creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}], | ||
| } | ||
|
|
||
|
|
||
| staff_association_model = StaffAssociationModel( | ||
| bases=(rdm_base_record_model,), | ||
| entry_point_group="cds_migrator_kit.migrator.rules.staff_association", | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -797,10 +797,25 @@ def related_identifiers_787(self, key, value): | |
| "relation_type": {"id": "references"}, | ||
| "resource_type": {"id": "publication-conferencepaper"}, | ||
| }, | ||
| "corresponding video": { | ||
| "relation_type": {"id": "references"}, | ||
| # TODO: do we need new resource type for video? | ||
| "resource_type": {"id": "audio"}, | ||
| }, | ||
|
Comment on lines
+800
to
+804
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Example record: https://cds.cern.ch/record/1494034/ It's a digitized video record. What should be the resource_type? Do we need a new one? |
||
| "manuscript": { | ||
| "relation_type": {"id": "isderivedfrom"}, | ||
| "resource_type": {"id": "publication-preprint"}, | ||
| }, | ||
|
Comment on lines
+805
to
+808
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Example record: https://cds.cern.ch/record/2846702 |
||
| } | ||
|
|
||
| if recid: | ||
| if description: | ||
| if description not in relation_map.keys(): | ||
| raise UnexpectedValue( | ||
| f"Unexpected relation description {description}", | ||
| field=key, | ||
| value=value, | ||
| ) | ||
| new_id = { | ||
| "identifier": recid, | ||
| "scheme": "cds", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # Copyright (C) 2026 CERN. | ||
| # | ||
| # CDS-RDM is free software; you can redistribute it and/or modify it under | ||
| # the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """CDS-RDM Staff Association rules.""" | ||
|
|
||
| from cds_migrator_kit.errors import UnexpectedValue | ||
|
|
||
| from ...models.staff_association import staff_association_model as model | ||
| from .bulletin_issue import ( | ||
| additional_descriptions, | ||
| additional_titles_bulletin, | ||
| bull_related_identifiers, | ||
| bulletin_report_number, | ||
| collection, | ||
| creators, | ||
| custom_fields_journal, | ||
| description, | ||
| imprint_info, | ||
| issue_number, | ||
| journal, | ||
| rel_identifiers, | ||
| subjects_bulletin, | ||
| translated_description, | ||
| urls_bulletin, | ||
| urls_bulletin_bis, | ||
| ) | ||
|
|
||
| # Re-register all shared bulletin rules onto staff_association_model | ||
| model.over("creators", "^100__", override=True)(creators) | ||
| model.over("additional_titles", "(^246_[1_])", override=True)( | ||
| additional_titles_bulletin | ||
| ) | ||
| model.over("description", "^520__", override=True)(description) | ||
| model.over("collection", "^690C_", override=True)(collection) | ||
| model.over("publication_date", "(^260__)", override=True)(imprint_info) | ||
| model.over("custom_fields", "(^773__)")(journal) | ||
| model.over("additional_descriptions", "(^500__)")(additional_descriptions) | ||
| model.over("additional_descriptions", "(^590__)")(translated_description) | ||
| model.over("subjects", "(^650[12_][7_])|(^6531_)", override=True)(subjects_bulletin) | ||
| model.over("url_identifiers", "^8564_", override=True)(urls_bulletin) | ||
| model.over("urls_bulletin", "^856__")(urls_bulletin_bis) | ||
| model.over("custom_fields_journal", "(^916__)", override=True)(custom_fields_journal) | ||
| model.over("bulletin_report_number", "(^037__)|(^088__)", override=True)( | ||
| bulletin_report_number | ||
| ) | ||
| model.over("custom_fields", "(^925__)")(issue_number) | ||
| model.over("bull_related_identifiers_1", "(^941__)")(bull_related_identifiers) | ||
| model.over("bull_related_identifiers_2", "(^962__)", override=True)(rel_identifiers) | ||
|
|
||
|
|
||
| @model.over("resource_type", "^980__", override=True) | ||
| def resource_type(self, key, value): | ||
| """Translates resource_type.""" | ||
| value = value.get("a") if "a" in value else value.get("b") | ||
| if value: | ||
| value = value.lower() | ||
| if value == "bulletinstaff": | ||
| # TODO what is the resource type? | ||
| return {"id": "publication-periodicalarticle"} | ||
|
Comment on lines
+61
to
+63
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is resource_type correct? |
||
| raise UnexpectedValue( | ||
| "Unknown resource type (STAFF ASSOCIATION)", field=key, value=value | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes we need to add a resource type for a video to be able to create relations with videos. But this resource type should be only populated for the related records, we should not have it in the main resource type. Where does the link point? old CDS or videos platform?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a digitized video record, in old CDS.
record: https://cds.cern.ch/record/1494034/
related record: https://cds.cern.ch/record/2271254
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kpsherva do you have any idea how we can add this resource type to only related records?