Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
class VideoLecture(CdsOverdo):
"""Translation Index for CERN Video Lectures."""

__query__ = "8567_.x:'Absolute master path' 8567_.d:/mnt/master_share* -980__.C:MIGRATED -980__.c:DELETED -5831_.a:digitized"
__query__ = "8567_.x:'Absolute master path' 8567_.d:/mnt/master_share* -980__.C:MIGRATED -980__.c:DELETED"

__ignore_keys__ = base_model.__ignore_keys__
# IMPLEMENTED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def guess_dates(json_data, key, subkey=None):
### Returns:
- `set[str]`: A set of date strings.
"""
items = json_data.get(key, [])
items = get_values_in_json(json_data, key, type=list)
if subkey:
return {
item[subkey]["date"]
Expand Down Expand Up @@ -345,17 +345,30 @@ def reformat_date(json_data):
]

return None

# Check imprint date in 260 (digitized records has it)
imprint_dates = get_values_in_json(json_data, "imprint_date")
if len(imprint_dates) == 1:
return list(imprint_dates)
raise MissingRequiredField(
f"No valid date found in record: {json_data.get('recid')}.",
stage="transform",
)

def description(json_data):
"""Reformat the description for the cds-videos data model."""
if not json_data.get("description"):
candidates = json_data.get("description") or []
values = [v.strip() for v in candidates if v and v.strip()]
if len(values) > 1:
raise UnexpectedValue(
field="description",
value=values,
message=f"Multiple description values found: {values}"
)

if values:
return values[0]
else:
return json_data.get("title").get("title")
return json_data.get("description")

def format_contributors(json_data):
"""
Expand Down Expand Up @@ -394,7 +407,7 @@ def publication_date(json_data):

def notes(json_data):
"""Get the notes."""
notes = entry.get("notes")
notes = get_values_in_json(json_data, "notes", type=list)
if notes:
note_strings = [note.get("note") for note in notes]
return "\n".join(note_strings)
Expand Down Expand Up @@ -459,7 +472,7 @@ def get_report_number(json_data):
def get_keywords(json_data):
"""Return keywords."""
keywords = json_data.get("keywords", [])
subject_categories = json_data.get("subject_categories", [])
subject_categories = get_values_in_json(json_data, "subject_categories", type=list)
subject_indicators = json_data.get("subject_indicators", [])

all_keywords = [
Expand Down Expand Up @@ -578,7 +591,7 @@ def get_curation(json_data):
"""Return _curation."""
_curation = json_data.get("_curation", {})
# Add volumes
additional_titles = json_data.get("additional_titles", [])
additional_titles = get_values_in_json(json_data, "additional_titles", type=list)
volumes = [item["volume"] for item in additional_titles if "volume" in item]
if volumes:
_curation["volumes"] = volumes
Expand All @@ -594,7 +607,7 @@ def get_curation(json_data):

def get_additional_titles(json_data):
"""Return additional_titles."""
tag_246 = json_data.get("additional_titles", {})
tag_246 = get_values_in_json(json_data, "additional_titles", type=list)
_titles = [item for item in tag_246 if "title" in item]
additional_titles = []
for title_item in _titles:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,25 @@

from ...models.base import model
from ..quality.contributors import get_contributor
from ..quality.curation import transform_subfields


def append_transformed_subfields(self, key, value, field_name, subfield_name=None):
"""Helper to append transformed subfields to a curation field."""
curation = self["_curation"]
transformed = transform_subfields(key, value)

if subfield_name:
existing_values = curation.setdefault(field_name, {})
legacy_field = existing_values.get(subfield_name, [])
legacy_field.extend(transformed)
if legacy_field:
curation[field_name][subfield_name] = legacy_field
else:
existing_values = curation.get(field_name, [])
existing_values.extend(transformed)
if existing_values:
curation[field_name] = existing_values


@model.over("legacy_recid", "^001")
Expand All @@ -46,10 +65,15 @@ def title(self, key, value):


@model.over("description", "^520__")
@for_each_value
def description(self, key, value):
"""Translates description."""
description_text = StringValue(value.get("a")).parse()

provenance = value.get("9", "").strip()
curation_info = value.get("8", "").strip()
if curation_info or provenance:
append_transformed_subfields(self, key, value, "legacy_marc_fields", "520")
return None
return description_text


Expand All @@ -71,7 +95,16 @@ def languages(self, key, value):

if not langs:
raise MissingRequiredField(field=key, subfield="a", value=raw_lang)

provenance = value.get("9", "").strip()
if provenance and provenance not in ["review", "CERN QA"]:
# checking if anything else stored in this field
raise UnexpectedValue(field=key, subfield="9", value=provenance)
# TODO: we need to add for_each_value to this rule but we need to keep the same behavior
# curation_field = value.get("8", "").strip()
# if curation_field:
# append_transformed_subfields(self, key, value, "legacy_marc_fields", "041")


self["additional_languages"].extend(langs[1:])
return langs[0]

Expand Down Expand Up @@ -145,10 +178,12 @@ def keywords(self, key, value):
"""Translates keywords from tag 6531."""
keyword = value.get("a", "").strip()
provenance = value.get("9", "").strip()
if provenance and provenance not in ["CERN", "review"]:
if provenance and provenance not in ["CERN", "review", "review Mar2021", "CERN QA"]:
# checking if anything else stored in this field
raise UnexpectedValue(field=key, subfield="9", value=provenance)

curation_field = value.get("8", "").strip()
if curation_field:
append_transformed_subfields(self, key, value, "legacy_marc_fields", "653")
if keyword:
return {"name": keyword}

Expand Down
Loading
Loading