CERNDocumentServer · zubeydecivelek · Mar 18, 2026
diff --git a/cds_migrator_kit/videos/weblecture_migration/transform/models/video_lecture.py b/cds_migrator_kit/videos/weblecture_migration/transform/models/video_lecture.py
@@ -27,7 +27,7 @@
 class VideoLecture(CdsOverdo):
     """Translation Index for CERN Video Lectures."""
 
-    __query__ = "8567_.x:'Absolute master path' 8567_.d:/mnt/master_share* -980__.C:MIGRATED -980__.c:DELETED -5831_.a:digitized"
+    __query__ = "8567_.x:'Absolute master path' 8567_.d:/mnt/master_share* -980__.C:MIGRATED -980__.c:DELETED"
 
     __ignore_keys__ = base_model.__ignore_keys__
     # IMPLEMENTED

diff --git a/cds_migrator_kit/videos/weblecture_migration/transform/transform.py b/cds_migrator_kit/videos/weblecture_migration/transform/transform.py
@@ -300,7 +300,7 @@ def guess_dates(json_data, key, subkey=None):
             ### Returns:
             - `set[str]`: A set of date strings.
             """
-            items = json_data.get(key, [])
+            items = get_values_in_json(json_data, key, type=list)
             if subkey:
                 return {
                     item[subkey]["date"]
@@ -345,17 +345,30 @@ def reformat_date(json_data):
                     ]
 
                 return None
-
+            # Check imprint date in 260 (digitized records has it)
+            imprint_dates = get_values_in_json(json_data, "imprint_date")
+            if len(imprint_dates) == 1:
+                return list(imprint_dates)
             raise MissingRequiredField(
                 f"No valid date found in record: {json_data.get('recid')}.",
                 stage="transform",
             )
 
         def description(json_data):
             """Reformat the description for the cds-videos data model."""
-            if not json_data.get("description"):
+            candidates = json_data.get("description") or []
+            values = [v.strip() for v in candidates if v and v.strip()]
+            if len(values) > 1:
+                raise UnexpectedValue(
+                    field="description",
+                    value=values,
+                    message=f"Multiple description values found: {values}"
+                )
+
+            if values:
+                return values[0]
+            else:
                 return json_data.get("title").get("title")
-            return json_data.get("description")
 
         def format_contributors(json_data):
             """
@@ -394,7 +407,7 @@ def publication_date(json_data):
 
         def notes(json_data):
             """Get the notes."""
-            notes = entry.get("notes")
+            notes = get_values_in_json(json_data, "notes", type=list)
             if notes:
                 note_strings = [note.get("note") for note in notes]
                 return "\n".join(note_strings)
@@ -459,7 +472,7 @@ def get_report_number(json_data):
         def get_keywords(json_data):
             """Return keywords."""
             keywords = json_data.get("keywords", [])
-            subject_categories = json_data.get("subject_categories", [])
+            subject_categories = get_values_in_json(json_data, "subject_categories", type=list)
             subject_indicators = json_data.get("subject_indicators", [])
 
             all_keywords = [
@@ -578,7 +591,7 @@ def get_curation(json_data):
             """Return _curation."""
             _curation = json_data.get("_curation", {})
             # Add volumes
-            additional_titles = json_data.get("additional_titles", [])
+            additional_titles = get_values_in_json(json_data, "additional_titles", type=list)
             volumes = [item["volume"] for item in additional_titles if "volume" in item]
             if volumes:
                 _curation["volumes"] = volumes
@@ -594,7 +607,7 @@ def get_curation(json_data):
 
         def get_additional_titles(json_data):
             """Return additional_titles."""
-            tag_246 = json_data.get("additional_titles", {})
+            tag_246 = get_values_in_json(json_data, "additional_titles", type=list)
             _titles = [item for item in tag_246 if "title" in item]
             additional_titles = []
             for title_item in _titles:

diff --git a/cds_migrator_kit/videos/weblecture_migration/transform/xml_processing/rules/base.py b/cds_migrator_kit/videos/weblecture_migration/transform/xml_processing/rules/base.py
@@ -26,6 +26,25 @@
 
 from ...models.base import model
 from ..quality.contributors import get_contributor
+from ..quality.curation import transform_subfields
+
+
+def append_transformed_subfields(self, key, value, field_name, subfield_name=None):
+    """Helper to append transformed subfields to a curation field."""
+    curation = self["_curation"]
+    transformed = transform_subfields(key, value)
+
+    if subfield_name:
+        existing_values = curation.setdefault(field_name, {})
+        legacy_field = existing_values.get(subfield_name, [])
+        legacy_field.extend(transformed)
+        if legacy_field:
+            curation[field_name][subfield_name] = legacy_field
+    else:
+        existing_values = curation.get(field_name, [])
+        existing_values.extend(transformed)
+        if existing_values:
+            curation[field_name] = existing_values
 
 
 @model.over("legacy_recid", "^001")
@@ -46,10 +65,15 @@ def title(self, key, value):
 
 
 @model.over("description", "^520__")
+@for_each_value
 def description(self, key, value):
     """Translates description."""
     description_text = StringValue(value.get("a")).parse()
-
+    provenance = value.get("9", "").strip()
+    curation_info = value.get("8", "").strip()
+    if curation_info or provenance:
+        append_transformed_subfields(self, key, value, "legacy_marc_fields", "520")
+        return None
     return description_text
 
 
@@ -71,7 +95,16 @@ def languages(self, key, value):
 
     if not langs:
         raise MissingRequiredField(field=key, subfield="a", value=raw_lang)
-
+    provenance = value.get("9", "").strip()
+    if provenance and provenance not in ["review", "CERN QA"]:
+        # checking if anything else stored in this field
+        raise UnexpectedValue(field=key, subfield="9", value=provenance)
+    # TODO: we need to add for_each_value to this rule but we need to keep the same behavior
+    # curation_field = value.get("8", "").strip()
+    # if curation_field:
+    #     append_transformed_subfields(self, key, value, "legacy_marc_fields", "041")
+
+
     self["additional_languages"].extend(langs[1:])
     return langs[0]
 
@@ -145,10 +178,12 @@ def keywords(self, key, value):
     """Translates keywords from tag 6531."""
     keyword = value.get("a", "").strip()
     provenance = value.get("9", "").strip()
-    if provenance and provenance not in ["CERN", "review"]:
+    if provenance and provenance not in ["CERN", "review", "review Mar2021", "CERN QA"]:
         # checking if anything else stored in this field
         raise UnexpectedValue(field=key, subfield="9", value=provenance)
-
+    curation_field = value.get("8", "").strip()
+    if curation_field:
+        append_transformed_subfields(self, key, value, "legacy_marc_fields", "653")
     if keyword:
         return {"name": keyword}