diff --git a/docs/customization/encrypted_model_fields.rst b/docs/customization/encrypted_model_fields.rst index cae7263bd..71328ea2e 100644 --- a/docs/customization/encrypted_model_fields.rst +++ b/docs/customization/encrypted_model_fields.rst @@ -80,12 +80,13 @@ As such, the model *should not define a* ``user`` *property of its own*. Some Explanations ----------------- -EncryptableModelMixin (`source `__) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The User's data is encrypted using (among other things) their password (i.e the -password they use to login to your TOM). When the User changes their password, -their encrypted data re-encrypted accordingly. The ``EncryptableModelMixin`` adds -method for this to your otherwise normal Django model. +EncryptableModelMixin +~~~~~~~~~~~~~~~~~~~~~ +An abstract Django model mixin that provides a standardized ``user`` OneToOneField. +Any model that stores encrypted data via ``EncryptedProperty`` should inherit from +this mixin. The ``user`` field ties encrypted data to its owner, allowing the +helper functions in ``session_utils`` to look up the user's Data Encryption Key +(DEK) and build the cipher needed for encryption and decryption. EncryptedProperty (`source `__) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tom_base/settings.py b/tom_base/settings.py index 487fdda77..defea960d 100644 --- a/tom_base/settings.py +++ b/tom_base/settings.py @@ -1,13 +1,17 @@ -""" -Django settings for tom_base project. +"""Django settings for the tom_base repository itself. + +THIS IS NOT YOUR TOM's `settings.py`. -Generated by 'django-admin startproject' using Django 2.0.6. +This file is used when running commands directly from the tom_base repo — +for example, ``python manage.py test`` within the tom_base repo. It is NOT +the settings file that individual TOM projects use. -For more information on this file, see -https://docs.djangoproject.com/en/2.0/topics/settings/ +Each TOM project gets its own standalone ``settings.py``, generated by +``tom_setup`` from the template ``tom_setup/templates/tom_setup/settings.tmpl``. +That project-level settings file is what a TOM runs in production. -For the full list of settings and their values, see -https://docs.djangoproject.com/en/2.0/ref/settings/ +This file exists only so that the tom_base repo has a working +Django configuration for development, testing, and CI. """ import logging.config import os @@ -27,6 +31,13 @@ # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True +# Encryption key for protecting sensitive user data (API keys, credentials) at rest. +# This is a Fernet key — a 44-character URL-safe base64 string encoding 32 random bytes. +# Treat this like SECRET_KEY. See the TOM Toolkit encryption documentation. +TOMTOOLKIT_DEK_ENCRYPTION_KEY = os.getenv( + 'TOMTOOLKIT_DEK_ENCRYPTION_KEY', + 'UlUYyKsGzQVwjpTbvhtgCihKaj07H1voc-V4pmb7NN4=') # 44-char URL-safe base64 string + ALLOWED_HOSTS = [''] # Application definition diff --git a/tom_common/apps.py b/tom_common/apps.py index 8eb4ddd90..52bf0aa73 100644 --- a/tom_common/apps.py +++ b/tom_common/apps.py @@ -1,5 +1,6 @@ from django.apps import AppConfig from django.conf import settings +from django.core.exceptions import ImproperlyConfigured import plotly.io as pio @@ -7,10 +8,14 @@ class TomCommonConfig(AppConfig): name = 'tom_common' def ready(self): - # Import signals for automatically saving profiles when updating User objects + # Import signals so their @receiver decorators are executed, which + # registers the signal handlers. Without this import, signal handlers + # in signals.py would never fire. # https://docs.djangoproject.com/en/5.1/topics/signals/#connecting-receiver-functions import tom_common.signals # noqa + self._check_dek_encryption_key() + # Set default plotly theme on startup valid_themes = ['plotly', 'plotly_white', 'plotly_dark', 'ggplot2', 'seaborn', 'simple_white', 'none'] @@ -21,6 +26,35 @@ def ready(self): pio.templates.default = plotly_theme + def _check_dek_encryption_key(self) -> None: + """Verify that the DEK encryption master key is configured. + + This key is required for encrypting sensitive user data (API keys, + observatory credentials) at rest in the database. Without it, the + TOM is prevented from starting. + """ + key = getattr(settings, 'TOMTOOLKIT_DEK_ENCRYPTION_KEY', '') + if not key: + raise ImproperlyConfigured( + "\n\n" + "TOMTOOLKIT_DEK_ENCRYPTION_KEY is not set.\n\n" + "This setting is required for encrypting sensitive user data at rest.\n" + "To fix this:\n\n" + " 1. Generate a key (requires the 'cryptography' package, which is\n" + " installed as a dependency of tom-base):\n\n" + " python -c \"from cryptography.fernet import Fernet; " + "print(Fernet.generate_key().decode())\"\n\n" + " 2. Set the key as an environment variable:\n\n" + " export TOMTOOLKIT_DEK_ENCRYPTION_KEY=''\n\n" + " Then reference it in your settings.py:\n\n" + " TOMTOOLKIT_DEK_ENCRYPTION_KEY = os.getenv(\n" + " 'TOMTOOLKIT_DEK_ENCRYPTION_KEY')\n\n" + " 3. Restart your TOM.\n\n" + "Treat this key like SECRET_KEY — keep it secret, do not commit it\n" + "to source control, and back it up. If this key is lost, users will\n" + "need to re-enter their saved external service credentials.\n" + ) + def profile_details(self): """ Integration point for adding items to the user profile page. diff --git a/tom_common/management/__init__.py b/tom_common/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tom_common/management/commands/__init__.py b/tom_common/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tom_common/management/commands/rotate_dek_encryption_key.py b/tom_common/management/commands/rotate_dek_encryption_key.py new file mode 100644 index 000000000..18fe1014d --- /dev/null +++ b/tom_common/management/commands/rotate_dek_encryption_key.py @@ -0,0 +1,73 @@ +"""Management command to rotate the TOMTOOLKIT_DEK_ENCRYPTION_KEY. + +This is a thin CLI wrapper around ``session_utils.rotate_master_key()``. +See that function for the actual rotation logic. + +Usage: + 1. Generate a new Fernet key: + python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" + 2. Run the rotation: + python manage.py rotate_dek_encryption_key --new-key + 3. Update your environment / settings.py with the new key. + 4. Restart the server. +""" +from __future__ import annotations + +from django.core.management.base import BaseCommand, CommandError + +from tom_common.session_utils import rotate_master_key + + +class Command(BaseCommand): + help = ( + 'Re-encrypts all per-user Data Encryption Keys (DEKs) with a new master key. ' + 'Run this when rotating TOMTOOLKIT_DEK_ENCRYPTION_KEY.' + ) + + def add_arguments(self, parser) -> None: + parser.add_argument( + '--new-key', + required=True, + help='The new Fernet master key (URL-safe base64-encoded, 32 bytes). ' + 'Generate with: python -c "from cryptography.fernet import Fernet; ' + 'print(Fernet.generate_key().decode())"', + ) + + def handle(self, *args, **options) -> None: + new_key: str = options['new_key'] + + try: + result = rotate_master_key(new_key) + except ValueError as e: + raise CommandError(str(e)) + except Exception as e: + raise CommandError(f"Cannot access current master key: {e}") + + if result.total == 0: + self.stdout.write(self.style.WARNING( + "No profiles with encryption keys found. Nothing to rotate." + )) + return + + self.stdout.write(f"Re-encrypting DEKs for {result.total} profile(s)...") + + if result.success_count: + self.stdout.write(self.style.SUCCESS( + f"Done. {result.success_count} re-encrypted successfully." + )) + + for error in result.errors: + self.stderr.write(self.style.ERROR( + f" FAILED: Profile pk={error.profile_pk} (user={error.username}) — {error.error}" + )) + + if result.error_count: + self.stdout.write(self.style.ERROR( + f"{result.error_count} failed — see errors above." + )) + + self.stdout.write("") + self.stdout.write(self.style.WARNING( + "IMPORTANT: Update TOMTOOLKIT_DEK_ENCRYPTION_KEY in your environment / " + "settings.py with the new key, then restart the server." + )) diff --git a/tom_common/migrations/0003_profile_encrypted_dek.py b/tom_common/migrations/0003_profile_encrypted_dek.py new file mode 100644 index 000000000..ea5b9f893 --- /dev/null +++ b/tom_common/migrations/0003_profile_encrypted_dek.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.12 on 2026-03-27 22:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('tom_common', '0002_usersession'), + ] + + operations = [ + migrations.AddField( + model_name='profile', + name='encrypted_dek', + field=models.BinaryField(blank=True, null=True), + ), + ] diff --git a/tom_common/migrations/0004_delete_usersession.py b/tom_common/migrations/0004_delete_usersession.py new file mode 100644 index 000000000..c0fcc8f3e --- /dev/null +++ b/tom_common/migrations/0004_delete_usersession.py @@ -0,0 +1,16 @@ +# Generated by Django 5.2.12 on 2026-03-27 22:29 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('tom_common', '0003_profile_encrypted_dek'), + ] + + operations = [ + migrations.DeleteModel( + name='UserSession', + ), + ] diff --git a/tom_common/models.py b/tom_common/models.py index 8091ed2f4..953fb7370 100644 --- a/tom_common/models.py +++ b/tom_common/models.py @@ -1,63 +1,89 @@ +"""Models for TOM Toolkit's user profiles and encrypted field storage. + +Encryption Architecture +----------------------- +TOM Toolkit uses envelope encryption to protect sensitive user data (API keys, +observatory credentials) at rest in the database. The scheme has two layers: + +1. A server-side **master key** (``TOMTOOLKIT_DEK_ENCRYPTION_KEY``) is stored in the + environment, never in the database. It is a Fernet key used to encrypt + per-user keys. + +2. Each user has a random **Data Encryption Key (DEK)** that encrypts their + actual data. The DEK is stored on the user's ``Profile`` as ``encrypted_dek`` + — encrypted by the master key. To use it, we decrypt it with the master + key, create a Fernet cipher, and use that cipher to encrypt or decrypt + individual fields. + +This means database access alone cannot decrypt user data — an attacker also +needs the master key from the server environment. See +``docs/design/encryption_architecture_redesign.md`` for the full design. + +Plugin developers use ``EncryptedProperty`` descriptors and +``EncryptableModelMixin`` to add encrypted fields to their models, and the +helper functions in ``session_utils`` to read/write those fields. The +encryption plumbing is handled transparently. +""" + +from __future__ import annotations + import logging + +from cryptography.fernet import Fernet from django.conf import settings from django.db import models from django.contrib.auth.models import User -from django.contrib.sessions.models import Session -from cryptography.fernet import Fernet logger = logging.getLogger(__name__) class Profile(models.Model): - """Profile model for a TOMToolkit User""" + """Profile model for a TOMToolkit User. + """ user = models.OneToOneField(User, on_delete=models.CASCADE) affiliation = models.CharField(max_length=100, null=True, blank=True) - def __str__(self): - return f'{self.user.username} Profile' - - -class UserSession(models.Model): - """Mapping model to associate the User and their Sessions + # The user's Data Encryption Key (DEK), encrypted by the master key + # (TOMTOOLKIT_DEK_ENCRYPTION_KEY). Generated on first user save; null for + # users created before this feature who haven't logged in yet. + # BinaryField is excluded by model_to_dict(), so this intentionally does + # not appear on the user Profile card. + encrypted_dek = models.BinaryField(null=True, blank=True) - An instance of this model is created whenever we receive the user_logged_in - signal (see signals.py). Upon receiving user_logged_out, we delete all instances - of UserSession for the specific User logging out. + def __str__(self) -> str: + return f'{self.user.username} Profile' - This allows us to manage the User's encrypted data in their app profiles, - should they change their password (see signals.py). - """ - # if either of the referenced objects are deleted, delete this object (CASCADE). - user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) - session = models.ForeignKey(Session, on_delete=models.CASCADE) - def __str__(self): - return f'UserSession for {self.user.username} with Session key {self.session.session_key}' +class EncryptedProperty: + """A Python descriptor that provides transparent encryption and decryption + for a model field. + This descriptor works with ``EncryptableModelMixin`` and the helper + functions in ``session_utils``. It expects a Fernet cipher to be + temporarily attached to the model instance as ``_cipher`` before the + property is read or written. The cipher is created from the user's + decrypted DEK by the helper functions and removed immediately after use. -class EncryptedProperty: - """ - A Python descriptor that provides transparent encryption and decryption for a - model field. + The ``_cipher`` attachment pattern exists because Python descriptors cannot + accept extra arguments — the cipher must be passed through the instance. + Direct access without a cipher raises ``AttributeError`` to prevent + accidental plaintext reads of encrypted data. - This descriptor is used in conjunction with the EncryptableModelMixin. It - requires a cipher to be temporarily attached to the model instance as `_cipher` - before accessing the property. + Usage:: - Usage: class MyModel(EncryptableModelMixin, models.Model): _my_secret_encrypted = models.BinaryField(null=True) my_secret = EncryptedProperty('_my_secret_encrypted') """ def __init__(self, db_field_name: str): self.db_field_name = db_field_name - self.property_name = None # Set by __set_name__ + self.property_name: str | None = None # Set by __set_name__ - def __set_name__(self, owner, name): + def __set_name__(self, owner: type, name: str) -> None: self.property_name = name - def __get__(self, instance, owner): + def __get__(self, instance: models.Model | None, owner: type) -> str | EncryptedProperty: if instance is None: return self @@ -73,16 +99,15 @@ def __get__(self, instance, owner): if not encrypted_value: return '' - # Handle bytes (sqlite3) vs memoryview (postgresql) + # Handle bytes (sqlite3) vs memoryview (postgresql). + # PostgreSQL/psycopg returns memoryview for BinaryFields; + # SQLite returns bytes. Fernet.decrypt() needs bytes. if isinstance(encrypted_value, memoryview): - # postgresql/psycopg uses a memoryview object for BinaryFields. - # Sqlite3 uses bytes. When needed, convert to the encrypted_value - # to bytes before we decrypt and decode it. encrypted_value = encrypted_value.tobytes() return cipher.decrypt(encrypted_value).decode() - def __set__(self, instance, value: str): + def __set__(self, instance: models.Model, value: str) -> None: cipher = getattr(instance, '_cipher', None) if not isinstance(cipher, Fernet): raise AttributeError( @@ -99,90 +124,25 @@ def __set__(self, instance, value: str): class EncryptableModelMixin(models.Model): - """ - A mixin for models that use EncryptedProperty to handle sensitive data. + """Base mixin for models that store encrypted data via ``EncryptedProperty``. + + Plugin models that hold sensitive per-user data (API keys, observatory + credentials) should inherit from this mixin alongside ``models.Model``. + It provides a standardized ``user`` ForeignKey that ties the encrypted + data to its owner. The helper functions ``get_encrypted_field()`` and + ``set_encrypted_field()`` in ``session_utils`` use this user reference + to look up the user's DEK (via their ``Profile.encrypted_dek``) and + build the Fernet cipher needed by the ``EncryptedProperty`` descriptors. - Provides a generic re-encryption mechanism for all encrypted properties - in the model. + Usage:: + + class MyAppModel(EncryptableModelMixin, models.Model): + _api_key_encrypted = models.BinaryField(null=True) + api_key = EncryptedProperty('_api_key_encrypted') + + Subclasses should not redefine the ``user`` field. """ - # By defining the user relationship here, we ensure that any model using this - # mixin has a standardized way to associate with a user. This removes - # ambiguity and the need for assumptions in utility functions that need to - # find the user associated with an encryptable model instance. - # Subclasses should not redefine this field. user = models.OneToOneField(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) - def reencrypt_model_fields(self, decoding_cipher: Fernet, encoding_cipher: Fernet) -> None: - """Re-encrypts all fields managed by an EncryptedProperty descriptor. - - Re-encryption means decypting to plaintext with the old cipher based on the old - password and re-encrypting the plaintext with the new cipher based on the new - password. - - The `EncryptableModelMixin` and the `EncyptedProperty` descriptor work together - to access the `Model`'s encytped `BinaryField`s (for setting, getting, and - re-encrypting, which involves both). - - The `EncryptedProperty` descriptor uses the `_cipher` attribute on the encyrpted - `BinaryField`-containing `Model` and this method sets and resets `_cipher` in the - process of re-encrypting: First, `Model._cipher` is the `decoding_cipher` to get the - plaintext value from the encrypted `BinaryField`. Second, `Model._cipher` is reset - to the `encoding_cipher` to encrypt the plaintext value and save it in the - `BinaryField`. Third, the `_cipher` attribute is removed from the `Model` until - the next time it's needed, when it's attached again. - - So, to re-encrpyt, for each of the Model's encrypted `BinaryField`s, we need to: - 1. Use the `decoding_cipher` to get the `plaintext` of the value stored in the - BinaryField. `self._cipher` is set to the `decoding_cipher` for this purpose - and the `EncyptedProperty` descriptor handles the getting. - 2. Reset `self._cipher` to be the `encoding_cipher` and have the `EncyptedProperty` - descriptor handle the encryption and setting. - 3. Remove the `_cipher` attribute from the Model. - """ - model_save_needed = False - for attr_name in dir(self.__class__): - attr = getattr(self.__class__, attr_name) - if isinstance(attr, EncryptedProperty): - try: - # Set decoding cipher and get plaintext - self._cipher = decoding_cipher - plaintext = getattr(self, attr_name) - - if plaintext: - # Set encoding cipher and set new value - self._cipher = encoding_cipher - setattr(self, attr_name, plaintext) - model_save_needed = True - except Exception as e: - logger.error(f"Error re-encrypting property {attr_name} for {self.__class__.__name__}" - f" instance {getattr(self, 'pk', 'UnknownPK')}: {e}") - finally: - # Clean up the temporary cipher - if hasattr(self, '_cipher'): - del self._cipher - if model_save_needed: - self.save() - - def clear_encrypted_fields(self) -> None: - """ - Clears all fields managed by an EncryptedProperty descriptor. - - This is a destructive operation used when re-encryption is not possible, - e.g., when a user's password is reset by an admin and the old - decryption key is unavailable. It sets the value of each encrypted - field to None. - """ - model_save_needed = False - for attr_name in dir(self.__class__): - attr = getattr(self.__class__, attr_name) - if isinstance(attr, EncryptedProperty): - # Directly set the underlying db field to None - setattr(self, attr.db_field_name, None) - model_save_needed = True - logger.info(f"Cleared encrypted property '{attr_name}' for {self.__class__.__name__} " - f"instance {getattr(self, 'pk', 'UnknownPK')}.") - if model_save_needed: - self.save() - class Meta: abstract = True diff --git a/tom_common/session_utils.py b/tom_common/session_utils.py index 7e87f41d2..ed82d5a5f 100644 --- a/tom_common/session_utils.py +++ b/tom_common/session_utils.py @@ -1,25 +1,49 @@ -import base64 +"""Utilities for encrypting and decrypting sensitive user data at rest. + +This module implements the "read/write time" portion of TOM Toolkit's envelope +encryption scheme. The full architecture is documented in +``docs/design/encryption_architecture_redesign.md``; here is a brief summary +of how the pieces fit together: + +**Master key** (``TOMTOOLKIT_DEK_ENCRYPTION_KEY`` in settings / environment): + A Fernet key that never touches the database. It encrypts each user's + Data Encryption Key so that database access alone cannot reveal user data. + +**Per-user DEK** (``Profile.encrypted_dek``): + A random Fernet key generated when the user is created. Stored in the + database encrypted by the master key. To use it, we decrypt it with the + master key, build a Fernet cipher, and attach it briefly to the model + instance that holds the encrypted field. + +**EncryptedProperty / EncryptableModelMixin** (in ``models.py``): + The descriptor and mixin that plugin models use to declare encrypted + fields. They expect a ``_cipher`` attribute on the model instance — + this module's helper functions manage that lifecycle. + +Typical call from a view or API endpoint:: + + from tom_common.session_utils import get_encrypted_field, set_encrypted_field + + api_key = get_encrypted_field(user, eso_profile, 'api_key') + set_encrypted_field(user, eso_profile, 'api_key', new_value) + eso_profile.save() +""" + +from __future__ import annotations + import logging +from dataclasses import dataclass, field from typing import Optional, TypeVar -from cryptography.fernet import Fernet -from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.backends import default_backend +from cryptography.fernet import Fernet, InvalidToken -from django.apps import AppConfig, apps +from django.conf import settings from django.db import models -from django.contrib.auth.models import User -from django.contrib.sessions.models import Session -from django.contrib.sessions.backends.db import SessionStore -from tom_common.models import EncryptableModelMixin, UserSession +from tom_common.models import Profile -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -# Constant for storing the cipher encryption key in the session -SESSION_KEY_FOR_CIPHER_ENCRYPTION_KEY = 'key' +logger = logging.getLogger(__name__) # A generic TypeVar for a Django models.Model subclass instance. # The `bound=models.Model` constraint ensures that any @@ -27,113 +51,107 @@ ModelType = TypeVar('ModelType', bound=models.Model) -def create_cipher_encryption_key(user: User, password: str) -> bytes: - """Creates a Fernet cipher encryption key derived from the user's password. - - This key is intended to be stored (e.g., in the session) and used to - instantiate Fernet ciphers for encrypting and decrypting sensitive data - associated with the user, such as API keys or external service credentials. +def _get_master_cipher() -> Fernet: + """Return a Fernet cipher built from the server-side master key. - The key derivation process uses PBKDF2HMAC with a salt generated from - the user's username, making the key unique per user and password. + The master key (``TOMTOOLKIT_DEK_ENCRYPTION_KEY``) lives in the server + environment, not in the database. It is used only to encrypt and decrypt + per-user DEKs — never to encrypt user data directly. - Args: - user: The Django User object. - password: The user's plaintext password. - - Returns: - A URL-safe base64-encoded Fernet encryption key as bytes. - - See Also: - https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet + Raises: + django.core.exceptions.ImproperlyConfigured: If the setting is missing + or empty. """ + key = getattr(settings, 'TOMTOOLKIT_DEK_ENCRYPTION_KEY', '') + if not key: + from django.core.exceptions import ImproperlyConfigured + raise ImproperlyConfigured( + "TOMTOOLKIT_DEK_ENCRYPTION_KEY is not set. This setting is required for " + "encrypting sensitive user data at rest. Generate one with:\n" + " python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\"\n" + "Then add it to your environment or settings.py." + ) + # The key may be a string (from os.getenv) or bytes; Fernet accepts both. + return Fernet(key) - # Generate a salt from hash and username - salt = hashes.Hash(hashes.SHA256(), backend=default_backend()) - salt.update(user.username.encode()) - - # Derive encryption_key using PBKDF2-HMAC and the newly generated salt - kdf = PBKDF2HMAC( # key derivation function - algorithm=hashes.SHA256(), - length=32, - salt=salt.finalize()[:16], # only finalize once; returns bytes; use 16 bytes - iterations=1_000_000, # Django recommendation of jan-2025 - backend=default_backend(), - ) - encryption_key: bytes = base64.urlsafe_b64encode(kdf.derive(password.encode())) - return encryption_key +def create_encrypted_dek() -> bytes: + """Generate a new random DEK and return it encrypted by the master key. -def save_key_to_session_store(key: bytes, session_store: SessionStore) -> None: - """Saves the provided encryption key to the given Django session store. + This is called once per user, at user-creation time (see ``signals.py``). + The returned bytes are stored in ``Profile.encrypted_dek``. - The key is first base64 encoded and converted to a string before being - stored in the session under a predefined session key. + We use ``Fernet.generate_key()`` rather than ``os.urandom()`` because + Fernet keys have a specific format (URL-safe base64-encoded 32 bytes) + and ``generate_key()`` guarantees that format. - Args: - key: The encryption key (bytes) to be saved. - session_store: The Django SessionStore instance where the key will be saved. + Returns: + The DEK encrypted by the master key, as bytes suitable for a BinaryField. """ - try: - assert isinstance(session_store, SessionStore), \ - f"session_store is not a SessionStore; it's a {type(session_store)}" - except AssertionError as e: - logger.error(str(e)) - - # The key is bytes, but session values must be JSON-serializable. - # A Fernet key is already base64-encoded, so we just decode it to a string for storage. - session_store[SESSION_KEY_FOR_CIPHER_ENCRYPTION_KEY] = key.decode('utf-8') - session_store.save() # we might be accessing the session before it's saved (in the middleware?) + # Generate a fresh random Fernet key for this user + dek: bytes = Fernet.generate_key() + # Encrypt the DEK with the master key so it can be stored safely + # in the database. The master key is the only thing that can decrypt it. + master_cipher = _get_master_cipher() + encrypted_dek: bytes = master_cipher.encrypt(dek) + return encrypted_dek -def get_key_from_session_model(session: Session) -> bytes: - """Extracts and decodes the encryption key from a Django Session object. - - Retrieves the base64 encoded key string from the session, decodes it - from base64, and returns the raw bytes of the encryption key. +def _decrypt_dek(encrypted_dek: bytes) -> bytes: + """Decrypt a user's DEK using the master key. Args: - session: The Django Session object from which to extract the key. + encrypted_dek: The encrypted DEK from ``Profile.encrypted_dek``. Returns: - The encryption key as bytes. + The plaintext DEK (a valid Fernet key as bytes). """ + # Handle memoryview from PostgreSQL BinaryField + if isinstance(encrypted_dek, memoryview): + encrypted_dek = encrypted_dek.tobytes() + + master_cipher = _get_master_cipher() + return master_cipher.decrypt(encrypted_dek) - logger.debug(f"Extracting key from Session model: {type(session)} = {session} - {session.get_decoded()}") - key_as_str: str = session.get_decoded()[SESSION_KEY_FOR_CIPHER_ENCRYPTION_KEY] # type: ignore - # The key was stored as a string, so we encode it back to bytes. - return key_as_str.encode('utf-8') +def _get_cipher_for_user(user) -> Fernet: + """Build a Fernet cipher from a user's decrypted DEK. -def get_key_from_session_store(session_store: SessionStore) -> bytes: - """Extracts the encryption key from a Django SessionStore instance. + This fetches the user's ``Profile.encrypted_dek``, decrypts it with the + master key, and returns a Fernet cipher ready to encrypt or decrypt the + user's data fields. - Use the dictionary-like API that the SessionStore provides to retreive - the encryption key. + The decrypted DEK exists only in memory for the duration of this call and + the subsequent encrypt/decrypt operation. It is never persisted in + plaintext. Args: - session_store: The Django SessionStore instance. + user: A Django User instance. Returns: - The encryption key as bytes. + A Fernet cipher built from the user's DEK. + + Raises: + Profile.DoesNotExist: If the user has no Profile. + ValueError: If the user's Profile has no encrypted DEK. """ - if not isinstance(session_store, SessionStore): - # manual type checking - raise TypeError(f"Expected a SessionStore object, but got {type(session_store)}") + profile = Profile.objects.get(user=user) + if not profile.encrypted_dek: + raise ValueError(f"User {user.username} has no encryption key (encrypted_dek is empty). " + f"This may indicate the user was created before encryption was configured.") - key_as_str: str = session_store[SESSION_KEY_FOR_CIPHER_ENCRYPTION_KEY] - return key_as_str.encode('utf-8') + dek: bytes = _decrypt_dek(profile.encrypted_dek) + return Fernet(dek) -def get_encrypted_field(user: User, - model_instance: ModelType, # type: ignore +def get_encrypted_field(user, + model_instance: ModelType, field_name: str) -> Optional[str]: - """ - Helper function to safely get the decrypted value of an EncryptedProperty. + """Safely get the decrypted value of an EncryptedProperty. - This function encapsulates the logic of fetching the user's session key, - creating a cipher, attaching it to the model instance, reading the - decrypted value, and cleaning up. + Fetches the user's DEK from their Profile, decrypts it with the master key, + creates a Fernet cipher, and uses the ``EncryptedProperty`` descriptor to + decrypt the field value. Args: user: The User object associated with the encrypted data. @@ -142,26 +160,18 @@ def get_encrypted_field(user: User, Returns: The decrypted string value, or None if decryption fails for any reason - (e.g., no active session, key not found). + (e.g., no Profile, no DEK, corrupted data). """ try: - # Get the current Session from the UserSession - # A user can be logged in from multiple browsers, resulting in multiple - # UserSession objects. Since the encryption key is derived from the - # password and is the same for all sessions, we can safely take the first one. - user_session = UserSession.objects.filter(user=user).first() - if not user_session: - raise UserSession.DoesNotExist(f"No active session found for user {user.username}") - - session: Session = user_session.session - cipher_key: bytes = get_key_from_session_model(session) - cipher: Fernet = Fernet(cipher_key) - - # Attach the cipher, get the value, and then clean up - model_instance._cipher = cipher # type: ignore + cipher = _get_cipher_for_user(user) + # Attach the cipher so the EncryptedProperty descriptor can use it, + # read the decrypted value, then clean up. The cipher is attached to + # the model instance (not the user) because the descriptor's __get__ + # method receives the instance it's defined on. + model_instance._cipher = cipher # type: ignore[attr-defined] decrypted_value = getattr(model_instance, field_name) return decrypted_value - except (UserSession.DoesNotExist, KeyError) as e: + except (Profile.DoesNotExist, ValueError) as e: logger.warning(f"Could not get encryption key for user {user.username} to access " f"'{field_name}': {e}") return None @@ -170,24 +180,23 @@ def get_encrypted_field(user: User, f"for user {user.username}: {e}") return None finally: - # Ensure the temporary cipher is always removed from the instance + # Always remove the temporary cipher from the instance to avoid + # accidental reuse or leaking the key in memory longer than needed. if hasattr(model_instance, '_cipher'): - del model_instance._cipher # type: ignore + del model_instance._cipher # type: ignore[attr-defined] -def set_encrypted_field(user: User, - model_instance: ModelType, # type: ignore +def set_encrypted_field(user, + model_instance: ModelType, field_name: str, value: str) -> bool: - """ - Helper function to safely set the value of an EncryptedProperty. + """Safely set the value of an EncryptedProperty. - This function encapsulates the logic of fetching the user's session key, - creating a cipher, attaching it to the model instance, setting the new - encrypted value, and cleaning up. + Fetches the user's DEK, creates a cipher, and uses the + ``EncryptedProperty`` descriptor to encrypt and store the value. Note: This function does NOT save the instance. The caller is responsible - for calling `instance.save()` after the field has been set. + for calling ``instance.save()`` after the field has been set. Args: user: The User object associated with the encrypted data. @@ -199,20 +208,11 @@ def set_encrypted_field(user: User, True if the field was set successfully, False otherwise. """ try: - # Get the current Session from the UserSession - user_session = UserSession.objects.filter(user=user).first() # see comment above - if not user_session: - raise UserSession.DoesNotExist(f"No active session found for user {user.username}") - - session: Session = user_session.session - cipher_key: bytes = get_key_from_session_model(session) - cipher = Fernet(cipher_key) - - # Attach the cipher, set the value, and then clean up - model_instance._cipher = cipher # type: ignore + cipher = _get_cipher_for_user(user) + model_instance._cipher = cipher # type: ignore[attr-defined] setattr(model_instance, field_name, value) return True - except (UserSession.DoesNotExist, KeyError) as e: + except (Profile.DoesNotExist, ValueError) as e: logger.error(f"Could not get encryption key for user {user.username} to set " f"'{field_name}': {e}") return False @@ -221,131 +221,105 @@ def set_encrypted_field(user: User, f"for user {user.username}: {e}") return False finally: - # Ensure the temporary cipher is always removed from the instance if hasattr(model_instance, '_cipher'): - del model_instance._cipher # type: ignore + del model_instance._cipher # type: ignore[attr-defined] -def reencrypt_data(user) -> None: - """Re-encrypts sensitive data for a user after a password change. +# --------------------------------------------------------------------------- +# Master key rotation +# --------------------------------------------------------------------------- - If an Administrator is changing another user's password, and - the `user: User` is not logged-in, then they have no SessionStore, - and, thus, no encryption key is available. In that case, the User's - encrypted fields are cleared out because they are stale, having - been ecrypted with an encryption key derived from a password that - is no longer in use. +@dataclass +class RotationError: + """Details about a single Profile that failed during key rotation.""" + profile_pk: int + username: str + error: str - Args: - user: The Django User object whose password has changed. - """ - logger.debug("Re-encrypting sensitive data...") - - # Get the current Session from the UserSession - user_session = UserSession.objects.filter(user=user.id).first() # see comment above - - if not user_session: - logger.warning(f"User {user.username} is not logged in. Cannot re-encrypt sensitive data. " - f"Clearing all encrypted fields instead.") - # Loop through all the installed apps and ask them to clear their encrypted profile fields - for app_config in apps.get_app_configs(): - clear_encrypted_fields_for_user(app_config, user) # type: ignore - return - - session: Session = user_session.session - # Get the current encryption_key from the Session - current_encryption_key: bytes = get_key_from_session_model(session) - # Generate a decoding Fernet cipher with the current encryption key - decoding_cipher = Fernet(current_encryption_key) - - # Get the new raw password from the User instance - new_raw_password = user._password # CAUTION: this is implemenation dependent (using _) - # Generate a new encryption_key with the new raw password - new_encryption_key: bytes = create_cipher_encryption_key(user, new_raw_password) - # Generate a new encoding Fernet cipher with the new encryption key - encoding_cipher = Fernet(new_encryption_key) - - # Save the new encryption key in the User's Session - session_store: SessionStore = SessionStore(session_key=session.session_key) - save_key_to_session_store(new_encryption_key, session_store) - # also, attach the new encryption key to the User instance so it can be inserted - # into the Session before we call update_session_auth_hash in - # tom_common.views.UserUpdateView.form_valid() - user._temp_new_fernet_key = new_encryption_key - - # Loop through all the installed apps and ask them to reencrypt their encrypted profile fields - for app_config in apps.get_app_configs(): - try: - reencrypt_encypted_fields_for_user(app_config, user, decoding_cipher, encoding_cipher) # type: ignore - except AttributeError: - logger.debug(f'App: {app_config.name} does not have a reencrypt_app_fields method.') - continue +@dataclass +class RotationResult: + """Result of a master key rotation operation. -def reencrypt_encypted_fields_for_user(app_config: AppConfig, user: 'User', - decoding_cipher: Fernet, encoding_cipher: Fernet): + Attributes: + success_count: Number of Profiles whose DEKs were successfully re-encrypted. + errors: Per-profile details for any that failed. """ - Automatically finds models in the app_config that inherit from EncryptableModelMixin - and attempts to re-encrypt their fields for the given user. + success_count: int = 0 + errors: list[RotationError] = field(default_factory=list) - :param app_config: The AppConfig instance of the plugin app. - :param user: The User whose data needs re-encryption. - :param decoding_cipher: Fernet cipher to decrypt existing data. - :param encoding_cipher: Fernet cipher to encrypt new data. - """ - for model_class in app_config.get_models(): - if issubclass(model_class, EncryptableModelMixin): - logger.debug(f"Found EncryptableModelMixin subclass: {model_class.__name__} in app {app_config.name}") - # The EncryptableModelMixin guarantees a 'user' field, which is a OneToOneField. - try: - encryptable_model_instance = model_class.objects.get(user=user) - # instance of the Model which is a subclass of EncryptableModelMixin - encryptable_model_instance.reencrypt_model_fields(decoding_cipher, encoding_cipher) # re-entrpt here - except model_class.DoesNotExist: - logger.info(f"No {model_class.__name__} instance found for user {user.username}.") - except model_class.MultipleObjectsReturned: - # This should not be reached if the mixin correctly enforces a OneToOneField. - # It's kept here as a safeguard against unexpected configurations. - logger.error(f"Multiple {model_class.__name__} instances found for user {user.username}. " - f"This is unexpected for an EncryptableModelMixin. Re-encrypting all found.") - instances = model_class.objects.filter(user=user) - for encryptable_model_instance in instances: - encryptable_model_instance.reencrypt_model_fields(decoding_cipher, encoding_cipher) - except Exception as e: - logger.error(f"Error processing model {model_class.__name__} for re-encryption for " - f"user {user.username}: {e}") - - -def clear_encrypted_fields_for_user(app_config: AppConfig, user: 'User',) -> None: - """ - Finds models in an app that are Encryptable and clears their encrypted fields for the given user. + @property + def error_count(self) -> int: + return len(self.errors) + + @property + def total(self) -> int: + return self.success_count + self.error_count + + +def rotate_master_key(new_key: str) -> RotationResult: + """Re-encrypt all per-user DEKs with a new master key. - This is a destructive operation used when a user's password is reset without - them being logged in, making the old decryption key unavailable. This happens, - for example, when an adminitrator resets their password. + Each Profile's ``encrypted_dek`` is decrypted with the current master key + (from ``TOMTOOLKIT_DEK_ENCRYPTION_KEY``) and re-encrypted with + ``new_key``. The user Profile's plaintext DEK is unchanged — only its + encryption layer (i.e. `encrypted_dek`) is replaced. The actual encrypted + data is not touched. - :param app_config: The AppConfig instance of the plugin app. - :param user: The User whose data needs to be cleared. + After this function completes successfully, the server's + ``TOMTOOLKIT_DEK_ENCRYPTION_KEY`` must be updated to ``new_key`` and the + server restarted. Until that happens, the re-encrypted DEKs cannot be + decrypted. + + Args: + new_key: The new Fernet master key as a string (URL-safe base64, 44 chars). + + Returns: + A ``RotationResult`` with per-profile success/error details. + + Raises: + ValueError: If ``new_key`` is not a valid Fernet key. + django.core.exceptions.ImproperlyConfigured: If the current master key + is missing or empty. """ - for model_class in app_config.get_models(): - if issubclass(model_class, EncryptableModelMixin): - logger.debug(f"Found EncryptableModelMixin subclass: {model_class.__name__} in " - f"app {app_config.name} for clearing.") - # The EncryptableModelMixin now guarantees a 'user' field, which is a OneToOneField. - try: - encryptable_model_instance = model_class.objects.get(user=user) - # instance of the Model which is a subclass of EncryptableModelMixin - encryptable_model_instance.clear_encrypted_fields() # do the clearing of the fields here - except model_class.DoesNotExist: - logger.info(f"No {model_class.__name__} instance found for user {user.username} to clear.") - except model_class.MultipleObjectsReturned: - # This should not be reached if the mixin correctly enforces a OneToOneField. - # It's kept here as a safeguard against unexpected configurations. - logger.error(f"Multiple {model_class.__name__} instances found for user {user.username}. " - f"This is unexpected for an EncryptableModelMixin. Clearing all found.") - instances = model_class.objects.filter(user=user) - for encryptable_model_instance in instances: - encryptable_model_instance.clear_encrypted_fields() - except Exception as e: - logger.error(f"Error clearing encrypted fields for model {model_class.__name__} for " - f"user {user.username}: {e}") + # Validate the new key before touching any data. + try: + new_master_cipher = Fernet(new_key.encode()) + except Exception as e: + raise ValueError(f"Invalid new key: {e}") from e + + # Build the old master cipher from current settings. + # Raises ImproperlyConfigured if missing — intentionally not caught here. + old_master_cipher = _get_master_cipher() + + profiles = Profile.objects.exclude(encrypted_dek=None) + result = RotationResult() + + for profile in profiles.iterator(): + try: + encrypted_dek = profile.encrypted_dek + # Handle memoryview from PostgreSQL + if isinstance(encrypted_dek, memoryview): + encrypted_dek = encrypted_dek.tobytes() + + # Decrypt with old key, re-encrypt with new key + plaintext_dek: bytes = old_master_cipher.decrypt(encrypted_dek) + new_encrypted_dek: bytes = new_master_cipher.encrypt(plaintext_dek) + + profile.encrypted_dek = new_encrypted_dek + profile.save(update_fields=['encrypted_dek']) + result.success_count += 1 + except InvalidToken: + result.errors.append(RotationError( + profile_pk=profile.pk, + username=profile.user.username, + error="could not decrypt with current master key", + )) + except Exception as e: + result.errors.append(RotationError( + profile_pk=profile.pk, + username=profile.user.username, + error=str(e), + )) + + return result diff --git a/tom_common/signals.py b/tom_common/signals.py index 108720412..cd9e9c745 100644 --- a/tom_common/signals.py +++ b/tom_common/signals.py @@ -1,16 +1,13 @@ import logging from django.conf import settings -from django.contrib.auth import get_user_model from django.contrib.auth.models import User -from django.contrib.auth.signals import user_logged_in, user_logged_out -from django.contrib.sessions.models import Session -from django.db.models.signals import post_save, pre_save +from django.db.models.signals import post_save from django.dispatch import receiver from rest_framework.authtoken.models import Token -from tom_common.models import Profile, UserSession +from tom_common.models import Profile from tom_common import session_utils logger = logging.getLogger(__name__) @@ -23,17 +20,32 @@ # while get_user_model() is valid after INSTALLED_APPS are loaded. -# Signal: Create a Profile for the User when the User instance is created +# Signal: Create a Profile (with an encrypted DEK) for the User when the User instance is created @receiver(post_save, sender=User) -def save_profile_on_user_post_save(sender, instance, **kwargs): - """When a user is saved, save their profile.""" - # Take advantage of the fact that logging in updates a user's last_login field - # to create a profile for users that don't have one. +def save_profile_on_user_post_save(sender, instance, created, **kwargs) -> None: + """When a user is saved, ensure their Profile exists and has an encrypted DEK. + + On first save (user creation), creates a new Profile and generates an + encrypted Data Encryption Key (DEK) for the user. The DEK is a random + Fernet key encrypted by the server-side master key — see + ``session_utils.create_encrypted_dek()`` for details. + + On subsequent saves, just saves the existing Profile (e.g., to propagate + any changes from inline formsets). + """ try: - instance.profile.save() - except User.profile.RelatedObjectDoesNotExist: # type: ignore - logger.info(f'No Profile found for {instance}. Creating Profile.') - Profile.objects.create(user=instance) + profile = instance.profile + # If the Profile exists but has no DEK (e.g., it was created before + # the encryption system was added), generate one now. + if not profile.encrypted_dek: + profile.encrypted_dek = session_utils.create_encrypted_dek() + profile.save() + except User.profile.RelatedObjectDoesNotExist: # type: ignore[attr-defined] + logger.info(f'No Profile found for {instance}. Creating Profile with encryption key.') + Profile.objects.create( + user=instance, + encrypted_dek=session_utils.create_encrypted_dek(), + ) # Signal: Create a DRF token for the User when the User instance is created @@ -49,120 +61,3 @@ def create_auth_token_on_user_post_save(sender, instance=None, created=False, ** """ if created: Token.objects.create(user=instance) - - -# Signal: Create UserSession on login -@receiver(user_logged_in) -def create_user_session_on_user_logged_in(sender, request, user, **kwargs) -> None: - """Whenever a user logs in, create a UserSession instance to associate - the User with the new Session. - """ - logger.debug(f"User {user.username} has logged in. request: {request}") - logger.debug(f"Request session: {type(request.session)} = {request.session}") - - # the request.session is a SessionStore object, we need the Session - # and we can get it using the session_key - try: - session: Session = Session.objects.get(pk=request.session.session_key) - except Session.DoesNotExist: - # this request should have a sesssion: SessionStore object, and if it - # doesn't, it could be because the user was logged in as part of a test, but - # TODO: sort out whether the test code should be updated or ??? - logger.error(f"Session {request.session.session_key} does not exist.") - return - - logger.debug(f"Session: {type(session)} = {session}") - - user_session, created = UserSession.objects.get_or_create(user=user, session=session) - if created: - logger.debug(f"UserSession created: {user_session}") - else: - logger.debug(f"UserSession already exists: {user_session}") - - -# Signal: Delete UserSession on logout -@receiver(user_logged_out) -def delete_user_session_on_user_logged_out(sender, request, user, **kwargs) -> None: - """Whenever a user logs out, delete all their UserSession instances. - """ - user_sessions = UserSession.objects.filter(user=user) - for user_session in user_sessions: - user_session.session.delete() - # TODO: consider if the User has logged in from multiple browsers/devices - # (i.e. we want to delete all their sessions or just the one they logged out from) - # this could probably be done by filtering on the session_key of the request in - # addition to the user above. - - -# Signal: Set cipher on login -@receiver(user_logged_in) -def set_cipher_on_user_logged_in(sender, request, user, **kwargs) -> None: - """When the user logs in, capture their password and use it to - generate a cipher encryption key and save it in the User's Session. - """ - logger.debug(f"User {user.username} has logged in. request: {request}") - - password = request.POST.get("password") # Capture password from login - if password: - encryption_key: bytes = session_utils.create_cipher_encryption_key(user, password) - session_utils.save_key_to_session_store(encryption_key, request.session) - else: - logger.error(f'User {user.username} logged in without a password. Cannot create encryption key.') - - -# Signal: Clear cipher encryption key on logout -@receiver(user_logged_out) -def clear_encryption_key_on_user_logged_out(sender, request, user, **kwargs) -> None: - """Clear the cipher encryption key when a user logs out. - """ - if user: - logger.debug(f'User {user.username} has logged out. Deleting key from Session.' - f'sender: {sender}; request: {request}') - request.session.pop(session_utils.SESSION_KEY_FOR_CIPHER_ENCRYPTION_KEY, None) - - -# Signal: Update the User's sensitive data when the password changes -@receiver(pre_save, sender=get_user_model()) -def user_updated_on_user_pre_save(sender, **kwargs): - """When the User model is saved, detect if the password has changed. - - kwargs: - * signal: - * instance: - * raw: Boolean - * using: str - * update_fields: frozenset | NoneType - - If the User's password has changed, take the following actions: - - Current list of actions to be taken upon User password change: - * re-encrypt the user's sensitive data (see session_utils.reencrypt_data() function) - * - """ - logger.debug(f"kwargs: {kwargs}") - user = kwargs.get("instance", None) - - if user and not user.username == 'AnonymousUser' and not user.is_anonymous: - # user.password vs. user._password: - # the user.password field is used for authentication (via comparison to the (hashed) password - # being tested for validity). The _password field is the raw password and is what we need to - # create a new cipher for the User's sensitive data. - - # This Signal is called for ANY change to the User model, not just password changes. - # So, determine if the password has changed by comparing new and old (hashed) passwords. - # NOTE: the update_fields kwarg is a frozenset of changed updated fields, but it does not contain - # 'password' when the User is changing their password. So, compare new and old: - - new_hashed_password = user.password # from the not-yet-saved User instance - try: - old_hashed_password = User.objects.get(id=user.id).password # from the previously-saved User instance - except User.DoesNotExist: - old_hashed_password = None - - if new_hashed_password != old_hashed_password: - # New password detected - logger.debug(f'User {user.username} is changing their password.') - session_utils.reencrypt_data(user) # need new RAW password to re-create cipher and re-encrypt - else: - # No new password detected - logger.debug(f'User {user.username} is updating their profile without a password change.') diff --git a/tom_common/templates/tom_common/create_user.html b/tom_common/templates/tom_common/create_user.html index 5ceffb565..f1a019db1 100644 --- a/tom_common/templates/tom_common/create_user.html +++ b/tom_common/templates/tom_common/create_user.html @@ -10,10 +10,6 @@ {% csrf_token %} {% bootstrap_form form %} {% bootstrap_formset form.user_profile_formset %} - {% if object.pk != current_user.pk %} -

WARNING: Changing the password for user {{ object.username }} will clear out all of - their saved external service API keys and passwords (if any).

- {% endif %} {% buttons %}