Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 57 additions & 44 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,64 +8,59 @@ https://inveniordm.docs.cern.ch/reference/configuration/.
"""

import os

from copy import deepcopy
from datetime import datetime, timedelta

from cds_rdm import schemes
from cds_rdm.clc_sync.services.components import ClcSyncComponent
from cds_rdm.components import (CDSResourcePublication,
MintAlternateIdentifierComponent,
SubjectsValidationComponent)
from cds_rdm.custom_fields import CUSTOM_FIELDS, CUSTOM_FIELDS_UI, NAMESPACES
from cds_rdm.permissions import (
CDSCommunitiesPermissionPolicy,
CDSRDMRecordPermissionPolicy,
CDSRequestsPermissionPolicy,
CDSRDMPreservationSyncPermissionPolicy,
lock_edit_record_published_files,
CDSAuditLogPermissionPolicy
)
from cds_rdm.files import storage_factory
from cds_rdm.inspire_harvester.reader import InspireHTTPReader
from cds_rdm.inspire_harvester.transformer import InspireJsonTransformer
from cds_rdm.inspire_harvester.writer import InspireWriter
from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS, \
STATS_AGGREGATIONS as _APP_RDM_STATS_AGGREGATIONS, APP_RDM_ROUTES
from invenio_previewer.config import PREVIEWER_PREFERENCE as DEFAULT_PREVIEWER_PREFERENCE
from invenio_rdm_records.checks import requests as checks_requests
from invenio_rdm_records.config import (always_valid, RDM_RECORDS_PERSONORG_SCHEMES,
RDM_RECORDS_IDENTIFIERS_SCHEMES as RDM_RECORDS_RELATED_IDENTIFIERS_SCHEMES
)
from invenio_records_resources.services.records.queryparser import (
QueryParser,
SearchFieldTransformer,
)
from invenio_rdm_records.proxies import current_rdm_records_service as record_service
from invenio_rdm_records.services.components import DefaultRecordsComponents
from invenio_rdm_records.config import (
RDM_PERSISTENT_IDENTIFIERS,
RDM_PARENT_PERSISTENT_IDENTIFIERS,
RDM_SEARCH
)
from invenio_preservation_sync.utils import preservation_info_render
from invenio_cern_sync.users.profile import CERNUserProfileSchema
from invenio_oauthclient.views.client import auto_redirect_login
from invenio_cern_sync.sso import cern_remote_app_name, cern_keycloak

from invenio_vocabularies.config import \
VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES

from cds_rdm.permissions import (CDSAuditLogPermissionPolicy,
CDSCommunitiesPermissionPolicy,
CDSRDMPreservationSyncPermissionPolicy,
CDSRDMRecordPermissionPolicy,
CDSRequestsPermissionPolicy,
lock_edit_record_published_files)
from cds_rdm.pids import validate_optional_doi_transitions
from cds_rdm.views import frontpage_view_function, inspire_link_render
from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as RECORD_EXPORTERS
from invenio_app_rdm.config import APP_RDM_ROUTES
from invenio_app_rdm.config import \
STATS_AGGREGATIONS as _APP_RDM_STATS_AGGREGATIONS
from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS
from invenio_app_rdm.config import \
VOCABULARIES_DATASTREAM_READERS as DEFAULT_VOCABULARIES_DATASTREAM_READERS
from invenio_app_rdm.config import \
VOCABULARIES_DATASTREAM_TRANSFORMERS as DEFAULT_VOCABULARIES_DATASTREAM_TRANSFORMERS
VOCABULARIES_DATASTREAM_TRANSFORMERS as \
DEFAULT_VOCABULARIES_DATASTREAM_TRANSFORMERS
from invenio_app_rdm.config import \
VOCABULARIES_DATASTREAM_WRITERS as DEFAULT_VOCABULARIES_DATASTREAM_WRITERS
from cds_rdm.clc_sync.services.components import ClcSyncComponent
from cds_rdm.components import CDSResourcePublication
from cds_rdm.components import SubjectsValidationComponent
from cds_rdm.components import MintAlternateIdentifierComponent
from cds_rdm.pids import validate_optional_doi_transitions
from cds_rdm.views import frontpage_view_function, inspire_link_render

from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as RECORD_EXPORTERS
from invenio_cern_sync.sso import cern_keycloak, cern_remote_app_name
from invenio_cern_sync.users.profile import CERNUserProfileSchema
from invenio_oauthclient.views.client import auto_redirect_login
from invenio_preservation_sync.utils import preservation_info_render
from invenio_previewer.config import \
PREVIEWER_PREFERENCE as DEFAULT_PREVIEWER_PREFERENCE
from invenio_rdm_records.checks import requests as checks_requests
from invenio_rdm_records.config import (RDM_PARENT_PERSISTENT_IDENTIFIERS,
RDM_PERSISTENT_IDENTIFIERS)
from invenio_rdm_records.config import \
RDM_RECORDS_IDENTIFIERS_SCHEMES as RDM_RECORDS_RELATED_IDENTIFIERS_SCHEMES
from invenio_rdm_records.config import (RDM_RECORDS_PERSONORG_SCHEMES,
RDM_SEARCH, always_valid)
from invenio_rdm_records.proxies import \
current_rdm_records_service as record_service
from invenio_rdm_records.services.components import DefaultRecordsComponents
from invenio_records_resources.services.records.queryparser import (
QueryParser, SearchFieldTransformer)
from invenio_vocabularies.config import \
VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES


def _(x): # needed to avoid start time failure with lazy strings
Expand Down Expand Up @@ -391,6 +386,24 @@ RDM_SEARCH = {
"projects": "custom_fields.cern\:projects",
"study": "custom_fields.cern\:studies",
"studies": "custom_fields.cern\:studies",
"identifier": "metadata.identifiers.identifier",
"cdsrn": "metadata.identifiers.identifier",
"report_number": "metadata.identifiers.identifier",
"inspire": "metadata.related_identifiers.identifier",
"inis": "metadata.related_identifiers.identifier",
"indico": "metadata.related_identifiers.identifier",
"cds": "metadata.identifiers.identifier",
Comment on lines +392 to +395
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both cds (legacy) and inspire identifiers' values are integers. How can we ensure that the query will not return both cds and inspire matching records when user searches for cds:12345?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is actually something I tried to handle earlier with an AND clause to enforce both the scheme and the identifier value.

The idea was that something like cds:12345 should translate to “find an identifier where scheme = cds AND value = 12345”, so we don’t get cross-matches with other identifier types.

However, the issue was in how the transformer builds the query. The AND clause was effectively applied across the whole record instead of within the same identifier entry. So it behaved like:

“record has some identifier with scheme = cds AND record has some identifier with value = 12345”

instead of enforcing both conditions on the same identifier object.

Because of that, a record with cds:263303 and inspire:12345 could still match a query like inspire:263303, since the scheme and value conditions were satisfied by different identifiers.

So the issue wasn’t really with the idea of restricting by scheme, but with how the transformer applies those conditions. Right now the mapping only targets the value, so we don’t yet strictly guarantee scheme-level isolation.

"aleph": "metadata.identifiers.identifier",
"doi": "pids.doi.identifier",
"language": "metadata.languages.id",
"languages": "metadata.languages.id",
"title": "metadata.title",
"publisher": "metadata.publisher",
"description": "metadata.description",
"publication_date": "metadata.publication_date",
"creator": "metadata.creators.person_or_org.name",
"creators": "metadata.creators.person_or_org.name",

},
tree_transformer_cls=SearchFieldTransformer,
),
Expand Down
Loading
Loading