diff --git a/invenio.cfg b/invenio.cfg index 007f57d2..a94abcdd 100644 --- a/invenio.cfg +++ b/invenio.cfg @@ -8,64 +8,59 @@ https://inveniordm.docs.cern.ch/reference/configuration/. """ import os - from copy import deepcopy from datetime import datetime, timedelta from cds_rdm import schemes +from cds_rdm.clc_sync.services.components import ClcSyncComponent +from cds_rdm.components import (CDSResourcePublication, + MintAlternateIdentifierComponent, + SubjectsValidationComponent) from cds_rdm.custom_fields import CUSTOM_FIELDS, CUSTOM_FIELDS_UI, NAMESPACES -from cds_rdm.permissions import ( - CDSCommunitiesPermissionPolicy, - CDSRDMRecordPermissionPolicy, - CDSRequestsPermissionPolicy, - CDSRDMPreservationSyncPermissionPolicy, - lock_edit_record_published_files, - CDSAuditLogPermissionPolicy -) from cds_rdm.files import storage_factory from cds_rdm.inspire_harvester.reader import InspireHTTPReader from cds_rdm.inspire_harvester.transformer import InspireJsonTransformer from cds_rdm.inspire_harvester.writer import InspireWriter -from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS, \ - STATS_AGGREGATIONS as _APP_RDM_STATS_AGGREGATIONS, APP_RDM_ROUTES -from invenio_previewer.config import PREVIEWER_PREFERENCE as DEFAULT_PREVIEWER_PREFERENCE -from invenio_rdm_records.checks import requests as checks_requests -from invenio_rdm_records.config import (always_valid, RDM_RECORDS_PERSONORG_SCHEMES, - RDM_RECORDS_IDENTIFIERS_SCHEMES as RDM_RECORDS_RELATED_IDENTIFIERS_SCHEMES - ) -from invenio_records_resources.services.records.queryparser import ( - QueryParser, - SearchFieldTransformer, -) -from invenio_rdm_records.proxies import current_rdm_records_service as record_service -from invenio_rdm_records.services.components import DefaultRecordsComponents -from invenio_rdm_records.config import ( - RDM_PERSISTENT_IDENTIFIERS, - RDM_PARENT_PERSISTENT_IDENTIFIERS, - RDM_SEARCH -) -from invenio_preservation_sync.utils import preservation_info_render -from invenio_cern_sync.users.profile import CERNUserProfileSchema -from invenio_oauthclient.views.client import auto_redirect_login -from invenio_cern_sync.sso import cern_remote_app_name, cern_keycloak - -from invenio_vocabularies.config import \ - VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES - +from cds_rdm.permissions import (CDSAuditLogPermissionPolicy, + CDSCommunitiesPermissionPolicy, + CDSRDMPreservationSyncPermissionPolicy, + CDSRDMRecordPermissionPolicy, + CDSRequestsPermissionPolicy, + lock_edit_record_published_files) +from cds_rdm.pids import validate_optional_doi_transitions +from cds_rdm.views import frontpage_view_function, inspire_link_render +from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as RECORD_EXPORTERS +from invenio_app_rdm.config import APP_RDM_ROUTES +from invenio_app_rdm.config import \ + STATS_AGGREGATIONS as _APP_RDM_STATS_AGGREGATIONS +from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS from invenio_app_rdm.config import \ VOCABULARIES_DATASTREAM_READERS as DEFAULT_VOCABULARIES_DATASTREAM_READERS from invenio_app_rdm.config import \ - VOCABULARIES_DATASTREAM_TRANSFORMERS as DEFAULT_VOCABULARIES_DATASTREAM_TRANSFORMERS + VOCABULARIES_DATASTREAM_TRANSFORMERS as \ + DEFAULT_VOCABULARIES_DATASTREAM_TRANSFORMERS from invenio_app_rdm.config import \ VOCABULARIES_DATASTREAM_WRITERS as DEFAULT_VOCABULARIES_DATASTREAM_WRITERS -from cds_rdm.clc_sync.services.components import ClcSyncComponent -from cds_rdm.components import CDSResourcePublication -from cds_rdm.components import SubjectsValidationComponent -from cds_rdm.components import MintAlternateIdentifierComponent -from cds_rdm.pids import validate_optional_doi_transitions -from cds_rdm.views import frontpage_view_function, inspire_link_render - -from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as RECORD_EXPORTERS +from invenio_cern_sync.sso import cern_keycloak, cern_remote_app_name +from invenio_cern_sync.users.profile import CERNUserProfileSchema +from invenio_oauthclient.views.client import auto_redirect_login +from invenio_preservation_sync.utils import preservation_info_render +from invenio_previewer.config import \ + PREVIEWER_PREFERENCE as DEFAULT_PREVIEWER_PREFERENCE +from invenio_rdm_records.checks import requests as checks_requests +from invenio_rdm_records.config import (RDM_PARENT_PERSISTENT_IDENTIFIERS, + RDM_PERSISTENT_IDENTIFIERS) +from invenio_rdm_records.config import \ + RDM_RECORDS_IDENTIFIERS_SCHEMES as RDM_RECORDS_RELATED_IDENTIFIERS_SCHEMES +from invenio_rdm_records.config import (RDM_RECORDS_PERSONORG_SCHEMES, + RDM_SEARCH, always_valid) +from invenio_rdm_records.proxies import \ + current_rdm_records_service as record_service +from invenio_rdm_records.services.components import DefaultRecordsComponents +from invenio_records_resources.services.records.queryparser import ( + QueryParser, SearchFieldTransformer) +from invenio_vocabularies.config import \ + VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES def _(x): # needed to avoid start time failure with lazy strings @@ -391,6 +386,24 @@ RDM_SEARCH = { "projects": "custom_fields.cern\:projects", "study": "custom_fields.cern\:studies", "studies": "custom_fields.cern\:studies", + "identifier": "metadata.identifiers.identifier", + "cdsrn": "metadata.identifiers.identifier", + "report_number": "metadata.identifiers.identifier", + "inspire": "metadata.related_identifiers.identifier", + "inis": "metadata.related_identifiers.identifier", + "indico": "metadata.related_identifiers.identifier", + "cds": "metadata.identifiers.identifier", + "aleph": "metadata.identifiers.identifier", + "doi": "pids.doi.identifier", + "language": "metadata.languages.id", + "languages": "metadata.languages.id", + "title": "metadata.title", + "publisher": "metadata.publisher", + "description": "metadata.description", + "publication_date": "metadata.publication_date", + "creator": "metadata.creators.person_or_org.name", + "creators": "metadata.creators.person_or_org.name", + }, tree_transformer_cls=SearchFieldTransformer, ), diff --git a/site/tests/test_views.py b/site/tests/test_views.py index 083d9898..480a61bf 100644 --- a/site/tests/test_views.py +++ b/site/tests/test_views.py @@ -6,10 +6,55 @@ # the terms of the GPL-2.0 License; see LICENSE file for more details. """Views tests.""" + +from copy import deepcopy + import pytest +from flask import current_app +from invenio_access.permissions import system_identity +from invenio_rdm_records.config import RDM_SEARCH as BASE_RDM_SEARCH +from invenio_rdm_records.proxies import current_rdm_records_service as service +from invenio_records_resources.services.records.queryparser import ( + QueryParser, + SearchFieldTransformer, +) from cds_rdm.views import get_linked_records_search_query +SEARCH_ALIAS_MAPPING = { + "identifier": "metadata.identifiers.identifier", + "cdsrn": "metadata.identifiers.identifier", + "report_number": "metadata.identifiers.identifier", + "inspire": "metadata.related_identifiers.identifier", + "inis": "metadata.related_identifiers.identifier", + "indico": "metadata.related_identifiers.identifier", + "cds": "metadata.identifiers.identifier", + "aleph": "metadata.identifiers.identifier", + "doi": "pids.doi.identifier", + "language": "metadata.languages.id", + "languages": "metadata.languages.id", + "title": "metadata.title", + "publisher": "metadata.publisher", + "description": "metadata.description", + "publication_date": "metadata.publication_date", + "creator": "metadata.creators.person_or_org.name", + "creators": "metadata.creators.person_or_org.name", +} + + +@pytest.fixture(scope="module") +def app_config(app_config): + """Test-local app config for search alias tests.""" + app_config["SQLALCHEMY_ENGINE_OPTIONS"] = {"connect_args": {}} + app_config["RDM_SEARCH"] = { + **deepcopy(BASE_RDM_SEARCH), + "query_parser_cls": QueryParser.factory( + mapping=SEARCH_ALIAS_MAPPING, + tree_transformer_cls=SearchFieldTransformer, + ), + } + return app_config + class MockRecord: """Mock record object for testing.""" @@ -277,4 +322,272 @@ def test_with_non_cds_identifiers(self): assert 'metadata.related_identifiers.scheme:cds AND metadata.related_identifiers.identifier:"abc12-def34"' in query assert 'metadata.related_identifiers.scheme:cds AND metadata.related_identifiers.identifier:"11111"' in query - assert 'metadata.related_identifiers.scheme:cds AND metadata.related_identifiers.identifier:"22222"' in query \ No newline at end of file + assert 'metadata.related_identifiers.scheme:cds AND metadata.related_identifiers.identifier:"22222"' in query + + def _create_and_publish_record( + self, + service, + identity, + minimal_restricted_record, + identifiers=None, + related_identifiers=None, + pids=None, + metadata_updates=None, + ): + """Create, publish, and refresh a record for search tests.""" + new_data = deepcopy(minimal_restricted_record) + + if pids: + new_data["pids"] = pids + + draft = service.create(identity, new_data) + + if identifiers is not None: + draft.data["metadata"]["identifiers"] = identifiers + + if related_identifiers is not None: + draft.data["metadata"]["related_identifiers"] = related_identifiers + + if metadata_updates: + draft.data["metadata"].update(metadata_updates) + + if ( + identifiers is not None + or related_identifiers is not None + or metadata_updates is not None + ): + draft = service.update_draft(identity, id_=draft.id, data=draft.data) + + record = service.publish(identity, id_=draft.id) + service.indexer.refresh() + return record + + + + @pytest.mark.parametrize( + ("alias_query", "expected_parsed_fragment", "create_kwargs"), + [ + ( + 'identifier:"IDENTIFIER-TEST-001"', + 'metadata.identifiers.identifier:"IDENTIFIER-TEST-001"', + { + "identifiers": [ + {"scheme": "cdsrn", "identifier": "IDENTIFIER-TEST-001"}, + ] + }, + ), + ( + 'cdsrn:"CERN-REPORT-001"', + 'metadata.identifiers.identifier:"CERN-REPORT-001"', + { + "identifiers": [ + {"scheme": "cdsrn", "identifier": "CERN-REPORT-001"}, + ] + }, + ), + ( + 'report_number:"CERN-REPORT-002"', + 'metadata.identifiers.identifier:"CERN-REPORT-002"', + { + "identifiers": [ + {"scheme": "cdsrn", "identifier": "CERN-REPORT-002"}, + ] + }, + ), + ( + 'inspire:"33333"', + 'metadata.related_identifiers.identifier:"33333"', + { + "related_identifiers": [ + { + "scheme": "inspire", + "identifier": "33333", + "relation_type": {"id": "isvariantformof"}, + "resource_type": {"id": "publication-other"}, + } + ] + }, + ), + ( + 'inis:"12345"', + 'metadata.related_identifiers.identifier:"12345"', + { + "related_identifiers": [ + { + "scheme": "inis", + "identifier": "12345", + "relation_type": {"id": "isvariantformof"}, + "resource_type": {"id": "publication-other"}, + } + ] + }, + ), + ( + 'indico:"12345"', + 'metadata.related_identifiers.identifier:"12345"', + { + "related_identifiers": [ + { + "scheme": "indico", + "identifier": "12345", + "relation_type": {"id": "isvariantformof"}, + "resource_type": {"id": "publication-other"}, + } + ] + }, + ), + ( + 'cds:"2633033"', + 'metadata.identifiers.identifier:"2633033"', + { + "identifiers": [ + {"scheme": "cds", "identifier": "2633033"}, + ] + }, + ), + ( + 'aleph:"000181238CER"', + 'metadata.identifiers.identifier:"000181238CER"', + { + "identifiers": [ + {"scheme": "aleph", "identifier": "000181238CER"}, + ] + }, + ), + ( + 'doi:"10.1234/test-doi-001"', + 'pids.doi.identifier', + { + "pids": { + "doi": { + "identifier": "10.1234/test-doi-001", + "provider": "external", + } + } + }, + ), + ( + 'language:"eng"', + 'metadata.languages.id:"eng"', + { + "metadata_updates": { + "languages": [{"id": "eng"}, {"id": "spa"}] + } + }, + ), + ( + 'languages:"spa"', + 'metadata.languages.id:"spa"', + { + "metadata_updates": { + "languages": [{"id": "eng"}, {"id": "spa"}] + } + }, + ), + ( + 'title:"French Courses"', + 'metadata.title:"French Courses"', + { + "metadata_updates": { + "title": "French Courses", + } + }, + ), + ( + 'publisher:"CERN"', + 'metadata.publisher:"CERN"', + { + "metadata_updates": { + "publisher": "CERN", + } + }, + ), + ( + 'description:"FrenchCourseAliasTest"', + 'metadata.description:"FrenchCourseAliasTest"', + { + "metadata_updates": { + "description": "FrenchCourseAliasTest", + } + }, + ), + ( + 'publication_date:"2012-11-28"', + 'metadata.publication_date:"2012-11-28"', + { + "metadata_updates": { + "publication_date": "2012-11-28", + } + }, + ), + ( + 'creator:"CERN"', + 'metadata.creators.person_or_org.name:"CERN"', + { + "metadata_updates": { + "creators": [ + { + "person_or_org": { + "type": "organizational", + "name": "CERN", + } + } + ] + } + }, + ), + ( + 'creators:"CERN"', + 'metadata.creators.person_or_org.name:"CERN"', + { + "metadata_updates": { + "creators": [ + { + "person_or_org": { + "type": "organizational", + "name": "CERN", + } + } + ] + } + }, + ), + ], + ) + def test_search_alias_returns_record( + self, + db, + location, + resource_type_v, + relation_type_v, + languages_v, + minimal_restricted_record, + search, + search_clear, + alias_query, + expected_parsed_fragment, + create_kwargs, + ): + record = self._create_and_publish_record( + service, + system_identity, + minimal_restricted_record, + identifiers=create_kwargs.get("identifiers"), + related_identifiers=create_kwargs.get("related_identifiers"), + pids=create_kwargs.get("pids"), + metadata_updates=create_kwargs.get("metadata_updates"), + ) + + parser = current_app.config["RDM_SEARCH"]["query_parser_cls"]() + parsed = str(parser.parse(alias_query)) + + assert expected_parsed_fragment in parsed + + result = service.search( + system_identity, + params={"q": alias_query}, + ) + + assert result.total == 1 + hit_ids = [hit["id"] for hit in result.hits] + assert record.id in hit_ids \ No newline at end of file