Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 81 additions & 11 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ For the full list of settings and their values, see
https://inveniordm.docs.cern.ch/reference/configuration/.
"""

import os

from copy import deepcopy
from datetime import datetime, timedelta

Expand All @@ -26,8 +24,10 @@ from cds_rdm.files import storage_factory
from cds_rdm.inspire_harvester.reader import InspireHTTPReader
from cds_rdm.inspire_harvester.transformer import InspireJsonTransformer
from cds_rdm.inspire_harvester.writer import InspireWriter
from cds_rdm.vcs.handlers import gitlab_account_info_serializer
from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS, \
STATS_AGGREGATIONS as _APP_RDM_STATS_AGGREGATIONS, APP_RDM_ROUTES
from invenio_app_rdm.config import NOTIFICATIONS_BUILDERS
from invenio_previewer.config import PREVIEWER_PREFERENCE as DEFAULT_PREVIEWER_PREFERENCE
from invenio_rdm_records.checks import requests as checks_requests
from invenio_rdm_records.config import (always_valid, RDM_RECORDS_PERSONORG_SCHEMES,
Expand All @@ -39,11 +39,18 @@ from invenio_records_resources.services.records.queryparser import (
)
from invenio_rdm_records.proxies import current_rdm_records_service as record_service
from invenio_rdm_records.services.components import DefaultRecordsComponents
from invenio_rdm_records.services.components.vcs import VCSComponent
from invenio_rdm_records.services.vcs.release import RDMVCSRelease
from invenio_rdm_records.config import (
RDM_PERSISTENT_IDENTIFIERS,
RDM_PARENT_PERSISTENT_IDENTIFIERS,
RDM_SEARCH
)
from invenio_rdm_records.notifications.vcs import (
RepositoryReleaseCommunityRequiredNotificationBuilder,
RepositoryReleaseCommunitySubmittedNotificationBuilder,
RepositoryReleaseFailureNotificationBuilder,
RepositoryReleaseSuccessNotificationBuilder)
from invenio_preservation_sync.utils import preservation_info_render
from invenio_cern_sync.users.profile import CERNUserProfileSchema
from invenio_oauthclient.views.client import auto_redirect_login
Expand All @@ -52,8 +59,7 @@ from invenio_cern_sync.sso import cern_remote_app_name, cern_keycloak
from invenio_vocabularies.config import \
VOCABULARIES_NAMES_SCHEMES as DEFAULT_VOCABULARIES_NAMES_SCHEMES

from invenio_app_rdm.config import \
VOCABULARIES_DATASTREAM_READERS as DEFAULT_VOCABULARIES_DATASTREAM_READERS
from invenio_app_rdm.config import VOCABULARIES_DATASTREAM_READERS as DEFAULT_VOCABULARIES_DATASTREAM_READERS
from invenio_app_rdm.config import \
VOCABULARIES_DATASTREAM_TRANSFORMERS as DEFAULT_VOCABULARIES_DATASTREAM_TRANSFORMERS
from invenio_app_rdm.config import \
Expand All @@ -64,13 +70,16 @@ from cds_rdm.components import SubjectsValidationComponent
from cds_rdm.components import MintAlternateIdentifierComponent
from cds_rdm.pids import validate_optional_doi_transitions
from cds_rdm.views import frontpage_view_function, inspire_link_render
from invenio_vcs.contrib.github import GitHubProviderFactory
from invenio_vcs.contrib.gitlab import GitLabProviderFactory

from invenio_app_rdm.config import APP_RDM_RECORD_EXPORTERS as RECORD_EXPORTERS


def _(x): # needed to avoid start time failure with lazy strings
return x


# Flask
# =====
# See https://flask.palletsprojects.com/en/1.1.x/config/
Expand Down Expand Up @@ -225,10 +234,58 @@ DATACITE_PREFIX = "10.17181"
DATACITE_TEST_MODE = True
DATACITE_DATACENTER_SYMBOL = ""

# Invenio VCS
# ===========

# Actual values set via env vars.
GITLAB_APP_CREDENTIALS = {
"consumer_key": "CHANGEME",
"consumer_secret": "CHANGEME",
}

vcs_gitlab = GitLabProviderFactory(
id="gitlab",
name="GitLab",
base_url="https://gitlab.cern.ch",
# These options can be customised with the `VCS_PROVIDER_CONFIG_DICT` option, e.g. by passing it
# as an env var on OpenShift.
webhook_receiver_url="https://CHANGEME.cern.ch/api/hooks/receivers/gitlab/events/?access_token={token}",
config={"shared_validation_token": "CHANGEME"},
)

vcs_gitlab_oauth_remote_config = vcs_gitlab.oauth_remote_config
vcs_gitlab_oauth_remote_config["signup_handler"]["info_serializer"] = (
# Custom info serializer to ensure the GitLab user has the same CERN SSO ID
# as the current Invenio/CDS user
gitlab_account_info_serializer(
vcs_gitlab_oauth_remote_config["signup_handler"]["info_serializer"]
)
)
# Allow connecting GitLab accounts but not logging in with them
vcs_gitlab_oauth_remote_config["link_only"] = True

VCS_PROVIDERS = [vcs_gitlab]
VCS_PROVIDER_CONFIG_DICT = {
vcs_gitlab.id: {},
}
# Override the template so we can customise the explanatory test to be more CDS specific
VCS_TEMPLATE_INDEX = "cds_rdm/vcs/index.html"
VCS_RELEASE_CLASS = RDMVCSRelease

# Authentication - Invenio-Accounts and Invenio-OAuthclient
# =========================================================
# See: https://inveniordm.docs.cern.ch/customize/authentication/

OAUTHCLIENT_REMOTE_APPS = {
cern_remote_app_name: cern_keycloak.remote_app,
# The OAuth app must have the same ID as the VCS provider
vcs_gitlab.id: vcs_gitlab_oauth_remote_config,
}

OAUTHCLIENT_REST_REMOTE_APPS = {
vcs_gitlab.id: vcs_gitlab_oauth_remote_config,
}

# Invenio-Accounts
# ================
# See https://github.com/inveniosoftware/invenio-accounts/blob/master/invenio_accounts/config.py
Expand Down Expand Up @@ -266,13 +323,10 @@ SECURITY_SEND_REGISTER_EMAIL = False

# Invenio-CERN-Sync/CERN SSO
# ==========================
OAUTHCLIENT_REMOTE_APPS = {
cern_remote_app_name: cern_keycloak.remote_app,
}

CERN_APP_CREDENTIALS = {
"consumer_key": "CHANGE ME",
"consumer_secret": "CHANGE ME",
"consumer_key": "CHANGEME",
"consumer_secret": "CHANGEME",
}
CERN_SYNC_KEYCLOAK_BASE_URL = "https://auth.cern.ch/"
CERN_SYNC_AUTHZ_BASE_URL = "https://authorization-service-api.web.cern.ch/"
Expand Down Expand Up @@ -491,6 +545,7 @@ RDM_RECORDS_SERVICE_COMPONENTS = [
CDSResourcePublication,
ClcSyncComponent,
MintAlternateIdentifierComponent,
VCSComponent,
]

### Do not require DOIs for record and parent
Expand Down Expand Up @@ -601,6 +656,22 @@ _APP_RDM_STATS_AGGREGATIONS["record-view-agg"]["params"]["index_interval"] = "ye
STATS_EVENTS = _APP_RDM_STATS_EVENTS
STATS_AGGREGATIONS = _APP_RDM_STATS_AGGREGATIONS


# Invenio Notifications
# =====================

NOTIFICATIONS_BUILDERS = {
**NOTIFICATIONS_BUILDERS,
RepositoryReleaseSuccessNotificationBuilder.type: RepositoryReleaseSuccessNotificationBuilder,
RepositoryReleaseFailureNotificationBuilder.type: RepositoryReleaseFailureNotificationBuilder,
RepositoryReleaseCommunityRequiredNotificationBuilder.type: RepositoryReleaseCommunityRequiredNotificationBuilder,
RepositoryReleaseCommunitySubmittedNotificationBuilder.type: RepositoryReleaseCommunitySubmittedNotificationBuilder,
}
NOTIFICATIONS_GROUP_EMAIL_DOMAIN = "cern.ch"

# Miscellaneous
# =============

APP_RDM_RECORD_LANDING_PAGE_TEMPLATE = "cds_rdm/records/detail.html"

RDM_DETAIL_SIDE_BAR_MANAGE_ATTRIBUTES_EXTENSION_TEMPLATE = "cds_rdm/records/manage_menu.html"
Expand Down Expand Up @@ -643,7 +714,6 @@ APP_RDM_DETAIL_SIDE_BAR_TEMPLATES = [
"invenio_app_rdm/records/details/side_bar/technical_metadata.html",
]
AUDIT_LOGS_ENABLED = True
NOTIFICATIONS_GROUP_EMAIL_DOMAIN = "cern.ch"
RDM_ALLOW_OWNERS_REMOVE_COMMUNITY_FROM_RECORD = False

AUDIT_LOGS_PERMISSION_POLICY=CDSAuditLogPermissionPolicy
AUDIT_LOGS_PERMISSION_POLICY = CDSAuditLogPermissionPolicy
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ authors = [
license = "GPL-2.0"
requires-python = ">=3.9"
dependencies = [
"invenio-app-rdm[opensearch2]>=14.0.0b8.dev0",
"invenio-app-rdm[opensearch2]",
"invenio-cern-sync",
"invenio-preservation-sync==0.3.0",
"invenio-vcs",
"cds-rdm",
"lxml>=4.6.5",
"s3fs>=2024.6.1", # extra of invenio-vocabularies for ORCiD dump
Expand All @@ -21,6 +22,13 @@ dependencies = [
[tool.uv.sources]
cds-rdm = { workspace = true }
invenio-cern-sync = { git = "https://github.com/cerndocumentserver/invenio-cern-sync", rev = "v0.6.0" }
invenio-vcs = { git = "https://github.com/inveniosoftware/invenio-vcs", branch = "master" }
invenio-app-rdm = { git = "https://github.com/palkerecsenyi/invenio-app-rdm", branch = "vcs" }

[tool.uv]
override-dependencies = [
"invenio-rdm-records @ git+https://github.com/palkerecsenyi/invenio-rdm-records.git@vcs-combined",
]

[tool.uv.workspace]
members = [
Expand Down
38 changes: 38 additions & 0 deletions site/cds_rdm/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,41 @@ class RequestError(GroupSyncingError):
def __init__(self, url, error_details):
"""Initialise error."""
super().__init__(_(f"Request error on {url}.\n Error details: {error_details}"))


class KeycloakIdentityNotFoundError(Exception):
"""The user is missing the Keycloak OAuth identity."""

def __init__(self, user_id: str) -> None:
"""Constructor."""
super().__init__(_(f"Could not find CERN SSO identity for user {user_id}"))


class GitLabIdentityNotFoundError(Exception):
"""The GitLab user did not have an OpenID or Kerberos identity so we cannot match it to the signed-in CDS user."""

def __init__(self, user_id: str) -> None:
"""Constructor."""
super().__init__(
_(
f"GitLab user {user_id} did not have CERN OpenID or Kerberos identity (LDAP-only accounts are not supported)"
)
)


class KeycloakGitLabMismatchError(Exception):
"""The GitLab user has a different Keycloak ID to the signed in CDS user."""

def __init__(
self,
gitlab_user_id: str,
gl_cern_sso_id: str,
cds_user_id: str,
cds_cern_sso_id: str,
) -> None:
"""Constructor."""
super().__init__(
_(
f"GitLab user {gitlab_user_id} has a different CERN SSO identity ({gl_cern_sso_id}) to currently signed-in CDS user {cds_user_id} ({cds_cern_sso_id})"
)
)
55 changes: 55 additions & 0 deletions site/cds_rdm/templates/semantic-ui/cds_rdm/vcs/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{#
Copyright (C) 2026 CERN.

CDS-RDM is free software; you can redistribute it and/or modify it
under the terms of the GPL-2.0 License; see LICENSE file for more details.
#}

{%- extends "invenio_vcs/rdm-index.html" %}

{%- block get_started %}
<div class="ui grid">
<div class="sixteen wide centered column">
<h3 class="ui large header mt-10">
<i class="{{ vocabulary["icon"] }} icon" aria-hidden="true"></i>{{ _("Get started") }}
</h3>
</div>

<div class="three column stackable tablet-mobile row">
<div class="column">
<h4 class="ui medium header">1 {{ _("Flip the switch") }}</h4>
<div class="ui divider"></div>
<p>
{{ _('Select the repository you want to preserve, and toggle
the switch below to turn on automatic preservation of your software.') }}
</p>
</div>

<div class="column">
<h4 class="ui medium header">2 {{ _("Create a release") }}</h4>
<div class="ui divider"></div>
<p>
{{
_(
'Go to %(name)s and <a href="%(release_docs_link)s" target="_blank">create a release <i class="small icon external" aria-hidden="true"></i></a>. %(site_name)s will automatically download a .zip-ball of each new release and publish a record.',
name=vocabulary["name"],
site_name=config.THEME_SITENAME | default('System'),
release_docs_link=vocabulary["release_docs_link"]
)
}}
</p>
</div>

<div class="column">
<h4 class="ui medium header">3 {{ _("Customize the metadata") }}</h4>
<div class="ui divider"></div>
<p>
{{ _(
"On your first release, you'll need to manually assign a community to your %(repository_name)s by following the link in your email. You can also change other metadata or request a DOI.",
repository_name=vocabulary["repository_name"]
) }}
</p>
</div>
</div>
</div>
{%- endblock %}
79 changes: 79 additions & 0 deletions site/cds_rdm/vcs/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it
# under the terms of the GPL-2.0 License; see LICENSE file for more details.

"""GitLab OAuth handler override."""

from __future__ import annotations

from flask_login import current_user
from invenio_cern_sync.sso import cern_remote_app_name
from invenio_oauthclient import current_oauthclient

from cds_rdm.errors import (
GitLabIdentityNotFoundError,
KeycloakGitLabMismatchError,
KeycloakIdentityNotFoundError,
)


def gitlab_account_info_serializer(original_serializer):
"""An OAuthClient account_info_serializer override for GitLab.

This ensures that users who are logged into CDS with CERN Keycloak OAuth
are also logged into GitLab with the same CERN Keycloak account to prevent
an account mismatch that could cause bugs or security issues.
"""

def inner(remote, resp, user_info, **kwargs):
"""Account info serializer."""
# RemoteAccount only contains the application's OAuth Client ID so we need to find it
cern_client_id = current_oauthclient.oauth.remote_apps.get(
cern_remote_app_name
).consumer_key

user_keycloak_id: str | None = None
for remote_account in current_user.remote_accounts:
if remote_account.client_id == cern_client_id:
# This is the user's ID as stored in Keycloak, which is equivalent to the
# CERN username of the person or their secondary account.
user_keycloak_id = remote_account.extra_data.get("keycloak_id")

if user_keycloak_id is None:
# All non-administrative users are expected to have one.
raise KeycloakIdentityNotFoundError(current_user.id)

gl_user_id = str(user_info["id"])
gl_identities = user_info["identities"]
gl_extern_uid: str | None = None
for identity in gl_identities:
prov = identity["provider"]

# On CERN GitLab, you have one GitLab account for each Keycloak account, with secondary accounts
# being separate GitLab accounts. You can sign in to one account with either openid_connect or
# kereberos, with the latter being used e.g. on CERN-provisioned computers.
# If a user has only ever signed in on a CERN device they might only have the kerberos method
# available, so we need to ensure we accept it.
if prov == "openid_connect":
gl_extern_uid = identity["extern_uid"]
elif prov == "kerberos":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I dont understand this case, can you login to your Gitlab account via kerberos? how would a GL identity exist if you havent logged in on the GL interface?

# {'provider': 'kerberos', 'extern_uid': 'username@CERN.CH', 'saml_provider_id': None}
gl_extern_uid = identity["extern_uid"].removesuffix("@CERN.CH")
else:
continue

if gl_extern_uid is None:
raise GitLabIdentityNotFoundError(gl_user_id)

if user_keycloak_id != gl_extern_uid:
raise KeycloakGitLabMismatchError(
gl_user_id, gl_extern_uid, current_user.id, user_keycloak_id
)

# Continue with the rest of the account info serializer chain.
return original_serializer(remote, resp, user_info, **kwargs)

return inner
Loading