From 730b84d69330f13487244a88c0bfd0e4a201f741 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 4 May 2026 17:05:04 -0500 Subject: [PATCH 01/20] Cache catalognumber uniqueness preferences during bulk operations --- specifyweb/backend/businessrules/utils.py | 28 ++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/specifyweb/backend/businessrules/utils.py b/specifyweb/backend/businessrules/utils.py index 2834b765932..478b23e9936 100644 --- a/specifyweb/backend/businessrules/utils.py +++ b/specifyweb/backend/businessrules/utils.py @@ -1,13 +1,36 @@ import json import logging +from contextlib import contextmanager +from contextvars import ContextVar logger = logging.getLogger(__name__) +_unique_catnum_pref_cache: ContextVar[dict[tuple[int | None, int | None], bool] | None] = ContextVar( + "unique_catnum_pref_cache", + default=None, +) + +@contextmanager +def cache_unique_catnum_preferences(): + token = _unique_catnum_pref_cache.set({}) + try: + yield + finally: + _unique_catnum_pref_cache.reset(token) + def get_unique_catnum_across_comp_co_coll_pref(collection, user) -> bool: import specifyweb.backend.context.app_resource as app_resource - unique_catnum_enabled: bool = False + cache = _unique_catnum_pref_cache.get() + cache_key = ( + getattr(collection, "id", None), + getattr(user, "id", None), + ) + if cache is not None and cache_key in cache: + return cache[cache_key] + + unique_catnum_enabled: bool = False try: collection_prefs_json, _, __ = app_resource.get_app_resource(collection, user, 'CollectionPreferences') @@ -30,4 +53,7 @@ def get_unique_catnum_across_comp_co_coll_pref(collection, user) -> bool: except Exception as e: logger.warning(f"An unexpected error occurred: {e}") + if cache is not None: + cache[cache_key] = unique_catnum_enabled + return unique_catnum_enabled \ No newline at end of file From fb7880cd5b30a2968713ec83fb87d01bcc799775 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 4 May 2026 17:05:54 -0500 Subject: [PATCH 02/20] Skip component duplicate checks when CO catalog fields are unchanged --- .../rules/collectionobject_rules.py | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/specifyweb/backend/businessrules/rules/collectionobject_rules.py b/specifyweb/backend/businessrules/rules/collectionobject_rules.py index 40ffec39a0c..f34a1e84b40 100644 --- a/specifyweb/backend/businessrules/rules/collectionobject_rules.py +++ b/specifyweb/backend/businessrules/rules/collectionobject_rules.py @@ -4,25 +4,41 @@ from specifyweb.backend.businessrules.utils import get_unique_catnum_across_comp_co_coll_pref from specifyweb.specify.models import Component +def _collection_object_catalog_check_needed(co) -> bool: + if co.catalognumber is None: + return False + if co.pk is None: + return True + + return not type(co).objects.filter( + pk=co.pk, + catalognumber=co.catalognumber, + collection_id=co.collection_id, + ).exists() @orm_signal_handler('pre_save', 'Collectionobject') def collectionobject_pre_save(co): if co.collectionmemberid is None: co.collectionmemberid = co.collection_id - if co.collectionobjecttype is None: + if co.collectionobjecttype is None: co.collectionobjecttype = co.collection.collectionobjecttype agent = co.createdbyagent - if agent is not None and agent.specifyuser is not None: + if ( + agent is not None + and agent.specifyuser is not None + and _collection_object_catalog_check_needed(co) + ): unique_catnum_across_comp_co_coll_pref = get_unique_catnum_across_comp_co_coll_pref(co.collection, co.createdbyagent.specifyuser) - if unique_catnum_across_comp_co_coll_pref: - if co.catalognumber is not None: - contains_component_duplicates = Component.objects.filter( - catalognumber=co.catalognumber).exclude(pk=co.pk).exists() + if unique_catnum_across_comp_co_coll_pref: + contains_component_duplicates = Component.objects.filter( + catalognumber=co.catalognumber, + collectionobject__collection_id=co.collection_id, + ).exists() - if contains_component_duplicates: - raise BusinessRuleException( - 'Catalog Number is already in use for another Component in this collection.') \ No newline at end of file + if contains_component_duplicates: + raise BusinessRuleException( + 'Catalog Number is already in use for another Component in this collection.') From fa7a6ba801c770b9b22ce8ad6d0704a3e8bca959 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 4 May 2026 17:06:13 -0500 Subject: [PATCH 03/20] Reuse uniqueness preference lookup across bulk copy requests --- specifyweb/backend/bulk_copy/bulk_copy.py | 45 ++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/specifyweb/backend/bulk_copy/bulk_copy.py b/specifyweb/backend/bulk_copy/bulk_copy.py index 3620e52af46..4dd47c6c768 100644 --- a/specifyweb/backend/bulk_copy/bulk_copy.py +++ b/specifyweb/backend/bulk_copy/bulk_copy.py @@ -6,6 +6,7 @@ from specifyweb.specify.api.crud import post_resource from specifyweb.specify.api.dispatch import HttpResponseCreated from specifyweb.specify.api.serializers import _obj_to_data, toJson +from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: @@ -17,15 +18,16 @@ def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: data = json.loads(request.body) data = dict(filter(lambda item: item[0] != 'id', data.items())) # Remove ID field before making copies resp_objs = [] - for _ in range(int(copies)): - obj = post_resource( - request.specify_collection, - request.specify_user_agent, - model, - data, - request.GET.get("recordsetid", None), - ) - resp_objs.append(_obj_to_data(obj, checker)) + with cache_unique_catnum_preferences(): + for _ in range(int(copies)): + obj = post_resource( + request.specify_collection, + request.specify_user_agent, + model, + data, + request.GET.get("recordsetid", None), + ) + resp_objs.append(_obj_to_data(obj, checker)) return HttpResponseCreated(toJson(resp_objs), content_type='application/json') @@ -39,17 +41,18 @@ def collection_dispatch_bulk(request, model) -> HttpResponse: if request.method != 'POST': return HttpResponseNotAllowed(['POST']) - + data = json.loads(request.body) resp_objs = [] - for obj_data in data: - obj = post_resource( - request.specify_collection, - request.specify_user_agent, - model, - obj_data, - request.GET.get("recordsetid", None), - ) - resp_objs.append(_obj_to_data(obj, checker)) - - return HttpResponseCreated(toJson(resp_objs), content_type='application/json') \ No newline at end of file + with cache_unique_catnum_preferences(): + for obj_data in data: + obj = post_resource( + request.specify_collection, + request.specify_user_agent, + model, + obj_data, + request.GET.get("recordsetid", None), + ) + resp_objs.append(_obj_to_data(obj, checker)) + + return HttpResponseCreated(toJson(resp_objs), content_type='application/json') From c12d76edd3e79fa933e1893215fcbfda5422a939 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 4 May 2026 17:06:57 -0500 Subject: [PATCH 04/20] Cache uniqueness preference lookup for bulk create endpoints --- specifyweb/specify/api/dispatch.py | 41 ++++++++++++++++-------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/specifyweb/specify/api/dispatch.py b/specifyweb/specify/api/dispatch.py index 07f0e77e52c..6b9f648ee76 100644 --- a/specifyweb/specify/api/dispatch.py +++ b/specifyweb/specify/api/dispatch.py @@ -4,6 +4,7 @@ from django.core.exceptions import FieldError from specifyweb.backend.businessrules.exceptions import BusinessRuleException +from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences from specifyweb.backend.permissions.permissions import enforce, table_permissions_checker from specifyweb.specify.api.crud import apply_filters, delete_resource, get_collection, get_resource, post_resource, put_resource from specifyweb.specify.api.exceptions import FilterError, OrderByError @@ -124,18 +125,19 @@ def collection_dispatch_bulk(request, model) -> HttpResponse: if request.method != 'POST': return HttpResponseNotAllowed(['POST']) - + data = json.loads(request.body) resp_objs = [] - for obj_data in data: - obj = post_resource( - request.specify_collection, - request.specify_user_agent, - model, - obj_data, - request.GET.get("recordsetid", None), - ) - resp_objs.append(_obj_to_data(obj, checker)) + with cache_unique_catnum_preferences(): + for obj_data in data: + obj = post_resource( + request.specify_collection, + request.specify_user_agent, + model, + obj_data, + request.GET.get("recordsetid", None), + ) + resp_objs.append(_obj_to_data(obj, checker)) return HttpResponseCreated(toJson(resp_objs), content_type='application/json') @@ -148,15 +150,16 @@ def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: data = json.loads(request.body) data = dict(filter(lambda item: item[0] != 'id', data.items())) # Remove ID field before making copies resp_objs = [] - for _ in range(int(copies)): - obj = post_resource( - request.specify_collection, - request.specify_user_agent, - model, - data, - request.GET.get("recordsetid", None), - ) - resp_objs.append(_obj_to_data(obj, checker)) + with cache_unique_catnum_preferences(): + for _ in range(int(copies)): + obj = post_resource( + request.specify_collection, + request.specify_user_agent, + model, + data, + request.GET.get("recordsetid", None), + ) + resp_objs.append(_obj_to_data(obj, checker)) return HttpResponseCreated(toJson(resp_objs), content_type='application/json') From b3b0b2cce2fad1a1683b34be9e7d56e9cb9fb87c Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 4 May 2026 17:07:32 -0500 Subject: [PATCH 05/20] Cache catalognumber uniqueness preference during data set upload --- specifyweb/backend/workbench/upload/upload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specifyweb/backend/workbench/upload/upload.py b/specifyweb/backend/workbench/upload/upload.py index 6a502c1121f..440a79bec9d 100644 --- a/specifyweb/backend/workbench/upload/upload.py +++ b/specifyweb/backend/workbench/upload/upload.py @@ -16,6 +16,7 @@ from jsonschema import validate # type: ignore from typing import Any, Optional, cast +from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences from specifyweb.backend.permissions.permissions import has_target_permission from specifyweb.specify import models from specifyweb.backend.workbench.upload.auditlog import auditlog @@ -341,7 +342,7 @@ def do_upload( scope_context = ScopeContext() - with savepoint("main upload"): + with savepoint("main upload"), cache_unique_catnum_preferences(): tic = time.perf_counter() results: list[UploadResult] = [] for i, row in enumerate(rows): From 3313343bf5a1b50847522da8ee17faa5d5a6cac6 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 6 May 2026 16:13:39 -0500 Subject: [PATCH 06/20] Cache Component catalog lookups during bulk CO saves --- .../rules/collectionobject_rules.py | 14 +++--- .../businessrules/rules/component_rules.py | 18 +++++++- specifyweb/backend/businessrules/utils.py | 43 ++++++++++++++++++- 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/specifyweb/backend/businessrules/rules/collectionobject_rules.py b/specifyweb/backend/businessrules/rules/collectionobject_rules.py index f34a1e84b40..2ae9555d4d3 100644 --- a/specifyweb/backend/businessrules/rules/collectionobject_rules.py +++ b/specifyweb/backend/businessrules/rules/collectionobject_rules.py @@ -1,8 +1,10 @@ from specifyweb.backend.businessrules.orm_signal_handler import orm_signal_handler from specifyweb.backend.businessrules.exceptions import BusinessRuleException -from specifyweb.backend.businessrules.utils import get_unique_catnum_across_comp_co_coll_pref -from specifyweb.specify.models import Component +from specifyweb.backend.businessrules.utils import ( + collection_has_component_catalog_number, + get_unique_catnum_across_comp_co_coll_pref, +) def _collection_object_catalog_check_needed(co) -> bool: if co.catalognumber is None: @@ -34,10 +36,10 @@ def collectionobject_pre_save(co): unique_catnum_across_comp_co_coll_pref = get_unique_catnum_across_comp_co_coll_pref(co.collection, co.createdbyagent.specifyuser) if unique_catnum_across_comp_co_coll_pref: - contains_component_duplicates = Component.objects.filter( - catalognumber=co.catalognumber, - collectionobject__collection_id=co.collection_id, - ).exists() + contains_component_duplicates = collection_has_component_catalog_number( + co.collection_id, + co.catalognumber, + ) if contains_component_duplicates: raise BusinessRuleException( diff --git a/specifyweb/backend/businessrules/rules/component_rules.py b/specifyweb/backend/businessrules/rules/component_rules.py index 9e5b6963627..70bc733f518 100644 --- a/specifyweb/backend/businessrules/rules/component_rules.py +++ b/specifyweb/backend/businessrules/rules/component_rules.py @@ -1,10 +1,20 @@ from specifyweb.backend.businessrules.orm_signal_handler import orm_signal_handler from specifyweb.backend.businessrules.exceptions import BusinessRuleException -from specifyweb.backend.businessrules.utils import get_unique_catnum_across_comp_co_coll_pref +from specifyweb.backend.businessrules.utils import ( + clear_component_catalog_number_cache, + component_catalog_number_cache_is_active, + get_unique_catnum_across_comp_co_coll_pref, +) from specifyweb.specify.models import Collectionobject, Component +def _clear_component_catalog_number_cache_if_needed(comp): + if component_catalog_number_cache_is_active(): + clear_component_catalog_number_cache(comp.collectionobject.collection_id) + @orm_signal_handler('pre_save', 'Component') def component_pre_save(comp): + _clear_component_catalog_number_cache_if_needed(comp) + agent = comp.createdbyagent if agent is not None and agent.specifyuser is not None: unique_catnum_across_comp_co_coll_pref = get_unique_catnum_across_comp_co_coll_pref(comp.collectionobject.collection, comp.createdbyagent.specifyuser) @@ -19,4 +29,8 @@ def component_pre_save(comp): if contains_co_duplicates or contains_component_duplicates: raise BusinessRuleException( - 'Catalog Number is already in use for another Collection Object or Component in this collection.') \ No newline at end of file + 'Catalog Number is already in use for another Collection Object or Component in this collection.') + +@orm_signal_handler('pre_delete', 'Component') +def component_pre_delete(comp): + _clear_component_catalog_number_cache_if_needed(comp) diff --git a/specifyweb/backend/businessrules/utils.py b/specifyweb/backend/businessrules/utils.py index 478b23e9936..85072ef69ae 100644 --- a/specifyweb/backend/businessrules/utils.py +++ b/specifyweb/backend/businessrules/utils.py @@ -11,13 +11,52 @@ default=None, ) +_component_catnum_cache: ContextVar[dict[int, set[str]] | None] = ContextVar( + "component_catnum_cache", + default=None, +) + @contextmanager def cache_unique_catnum_preferences(): - token = _unique_catnum_pref_cache.set({}) + pref_token = _unique_catnum_pref_cache.set({}) + component_token = _component_catnum_cache.set({}) try: yield finally: - _unique_catnum_pref_cache.reset(token) + _component_catnum_cache.reset(component_token) + _unique_catnum_pref_cache.reset(pref_token) + +def component_catalog_number_cache_is_active() -> bool: + return _component_catnum_cache.get() is not None + +def clear_component_catalog_number_cache(collection_id: int) -> None: + cache = _component_catnum_cache.get() + if cache is not None: + cache.pop(collection_id, None) + +def collection_has_component_catalog_number(collection_id: int | None, catalog_number: str | None) -> bool: + from specifyweb.specify.models import Component + + if collection_id is None or catalog_number is None: + return False + + cache = _component_catnum_cache.get() + if cache is None: + return Component.objects.filter( + catalognumber=catalog_number, + collectionobject__collection_id=collection_id, + ).exists() + + if collection_id not in cache: + cache[collection_id] = set( + Component.objects.filter( + collectionobject__collection_id=collection_id, + ) + .exclude(catalognumber=None) + .values_list("catalognumber", flat=True) + ) + + return catalog_number in cache[collection_id] def get_unique_catnum_across_comp_co_coll_pref(collection, user) -> bool: import specifyweb.backend.context.app_resource as app_resource From f23437cf350d0ab9fddf007fa3f4533eefa1347d Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 12:55:42 -0500 Subject: [PATCH 07/20] Cache CO business rule lookups during bulk saves --- specifyweb/backend/businessrules/utils.py | 105 +++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/specifyweb/backend/businessrules/utils.py b/specifyweb/backend/businessrules/utils.py index 85072ef69ae..c37baf68f2e 100644 --- a/specifyweb/backend/businessrules/utils.py +++ b/specifyweb/backend/businessrules/utils.py @@ -3,6 +3,7 @@ import logging from contextlib import contextmanager from contextvars import ContextVar +from typing import Any logger = logging.getLogger(__name__) @@ -16,16 +17,102 @@ default=None, ) +_collection_default_type_cache: ContextVar[dict[int, int | None] | None] = ContextVar( + "collection_default_type_cache", + default=None, +) + +_collection_cache: ContextVar[dict[int, Any] | None] = ContextVar( + "businessrules_collection_cache", + default=None, +) + +_agent_specifyuser_cache: ContextVar[dict[int, Any | None] | None] = ContextVar( + "businessrules_agent_specifyuser_cache", + default=None, +) + @contextmanager def cache_unique_catnum_preferences(): pref_token = _unique_catnum_pref_cache.set({}) component_token = _component_catnum_cache.set({}) + default_type_token = _collection_default_type_cache.set({}) + collection_token = _collection_cache.set({}) + agent_user_token = _agent_specifyuser_cache.set({}) try: yield finally: + _agent_specifyuser_cache.reset(agent_user_token) + _collection_cache.reset(collection_token) + _collection_default_type_cache.reset(default_type_token) _component_catnum_cache.reset(component_token) _unique_catnum_pref_cache.reset(pref_token) +def _get_collection(collection_id: int): + from specifyweb.specify.models import Collection + + cache = _collection_cache.get() + if cache is None: + return Collection.objects.get(id=collection_id) + + if collection_id not in cache: + cache[collection_id] = Collection.objects.get(id=collection_id) + + return cache[collection_id] + +def _get_agent_specifyuser(agent_id: int): + from specifyweb.specify.models import Agent + + cache = _agent_specifyuser_cache.get() + if cache is None: + return Agent.objects.select_related("specifyuser").get(id=agent_id).specifyuser + + if agent_id not in cache: + cache[agent_id] = ( + Agent.objects + .select_related("specifyuser") + .get(id=agent_id) + .specifyuser + ) + + return cache[agent_id] + +def get_default_collectionobjecttype_id(collection_or_id) -> int | None: + from specifyweb.specify.models import Collection + + collection = None if isinstance(collection_or_id, int) else collection_or_id + collection_id = ( + collection_or_id + if isinstance(collection_or_id, int) + else getattr(collection_or_id, "id", None) + ) + if collection_id is None: + return getattr(collection, "collectionobjecttype_id", None) + + cache = _collection_default_type_cache.get() + if cache is None: + if collection is not None: + return getattr(collection, "collectionobjecttype_id", None) + return ( + Collection.objects + .filter(id=collection_id) + .values_list("collectionobjecttype_id", flat=True) + .first() + ) + + if collection_id not in cache: + if collection is not None: + cache[collection_id] = getattr(collection, "collectionobjecttype_id", None) + else: + cache[collection_id] = ( + Collection.objects + .filter(id=collection_id) + .values_list("collectionobjecttype_id", flat=True) + .first() + ) + + return cache[collection_id] + def component_catalog_number_cache_is_active() -> bool: return _component_catnum_cache.get() is not None @@ -95,4 +182,20 @@ def get_unique_catnum_across_comp_co_coll_pref(collection, user) -> bool: if cache is not None: cache[cache_key] = unique_catnum_enabled - return unique_catnum_enabled \ No newline at end of file + return unique_catnum_enabled + +def get_unique_catnum_across_comp_co_coll_pref_by_ids( + collection_id: int | None, + agent_id: int | None, +) -> bool: + if collection_id is None or agent_id is None: + return False + + user = _get_agent_specifyuser(agent_id) + if user is None: + return False + + return get_unique_catnum_across_comp_co_coll_pref( + _get_collection(collection_id), + user, + ) From 60ea3119d4cf67bee11d1115fb77db7b45c72abb Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 12:56:26 -0500 Subject: [PATCH 08/20] Avoid repeated FK object loads in CO pre-save checks --- .../rules/collectionobject_rules.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/specifyweb/backend/businessrules/rules/collectionobject_rules.py b/specifyweb/backend/businessrules/rules/collectionobject_rules.py index 2ae9555d4d3..3332929e6d4 100644 --- a/specifyweb/backend/businessrules/rules/collectionobject_rules.py +++ b/specifyweb/backend/businessrules/rules/collectionobject_rules.py @@ -3,7 +3,8 @@ from specifyweb.backend.businessrules.exceptions import BusinessRuleException from specifyweb.backend.businessrules.utils import ( collection_has_component_catalog_number, - get_unique_catnum_across_comp_co_coll_pref, + get_default_collectionobjecttype_id, + get_unique_catnum_across_comp_co_coll_pref_by_ids, ) def _collection_object_catalog_check_needed(co) -> bool: @@ -23,17 +24,22 @@ def collectionobject_pre_save(co): if co.collectionmemberid is None: co.collectionmemberid = co.collection_id - if co.collectionobjecttype is None: - co.collectionobjecttype = co.collection.collectionobjecttype + if co.collectionobjecttype_id is None: + co.collectionobjecttype_id = get_default_collectionobjecttype_id( + co.collection_id + ) - agent = co.createdbyagent if ( - agent is not None - and agent.specifyuser is not None + co.createdbyagent_id is not None and _collection_object_catalog_check_needed(co) ): - unique_catnum_across_comp_co_coll_pref = get_unique_catnum_across_comp_co_coll_pref(co.collection, co.createdbyagent.specifyuser) + unique_catnum_across_comp_co_coll_pref = ( + get_unique_catnum_across_comp_co_coll_pref_by_ids( + co.collection_id, + co.createdbyagent_id, + ) + ) if unique_catnum_across_comp_co_coll_pref: contains_component_duplicates = collection_has_component_catalog_number( From 0633bbe266a580f20fed10db38397b555d406095 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 12:58:11 -0500 Subject: [PATCH 09/20] Cache uniqueness rule metadata during bulk operations --- .../backend/businessrules/uniqueness_rules.py | 206 ++++++++++++++---- 1 file changed, 165 insertions(+), 41 deletions(-) diff --git a/specifyweb/backend/businessrules/uniqueness_rules.py b/specifyweb/backend/businessrules/uniqueness_rules.py index 52e5b8e360b..dd730fd863c 100644 --- a/specifyweb/backend/businessrules/uniqueness_rules.py +++ b/specifyweb/backend/businessrules/uniqueness_rules.py @@ -2,6 +2,9 @@ from functools import reduce import logging import json +from contextlib import contextmanager +from contextvars import ContextVar +from dataclasses import dataclass from typing import Any, TypedDict from collections.abc import Iterable @@ -35,6 +38,40 @@ class JSONUniquenessRule(TypedDict): logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class CachedUniquenessRule: + rule: Any + field_names: tuple[str, ...] + scope_fields: tuple[str, ...] + all_fields: tuple[str, ...] + scope: str | None + is_database_constraint: bool + is_global: bool + discipline_id: int | None + +_uniqueness_rule_cache: ContextVar[ + dict[tuple[int, str], list[CachedUniquenessRule]] | None +] = ContextVar("uniqueness_rule_cache", default=None) +_uniqueness_migration_cache: ContextVar[dict[str, bool] | None] = ContextVar( + "uniqueness_migration_cache", + default=None, +) +_collection_discipline_cache: ContextVar[dict[int, int | None] | None] = ContextVar( + "uniqueness_collection_discipline_cache", + default=None, +) + +@contextmanager +def cache_uniqueness_rules(): + rule_token = _uniqueness_rule_cache.set({}) + migration_token = _uniqueness_migration_cache.set({}) + collection_token = _collection_discipline_cache.set({}) + try: + yield + finally: + _collection_discipline_cache.reset(collection_token) + _uniqueness_migration_cache.reset(migration_token) + _uniqueness_rule_cache.reset(rule_token) def resolve_model_field(model, field_path: str): current_model = model @@ -83,6 +120,119 @@ def apply_case_sensitive_filters(queryset, model, matchable, filter_kwargs): return queryset, transformed_filters +def _businessrules_initial_migration_applied() -> bool: + cache_key = "default" + cache = _uniqueness_migration_cache.get() + if cache is not None and cache_key in cache: + return cache[cache_key] + + applied = any( + app == "businessrules" and migration_name == "0001_initial" + for app, migration_name in MigrationRecorder( + connections["default"] + ).applied_migrations() + ) + + if cache is not None: + cache[cache_key] = applied + + return applied + +def _get_uniqueness_rule_configs(registry, model_name: str) -> list[CachedUniquenessRule]: + cache_key = (id(registry), model_name) + cache = _uniqueness_rule_cache.get() + if cache is not None and cache_key in cache: + return cache[cache_key] + + UniquenessRule = registry.get_model("businessrules", "UniquenessRule") + + configs: list[CachedUniquenessRule] = [] + rules = ( + UniquenessRule.objects + .filter(modelName=model_name) + .select_related("discipline") + .prefetch_related("uniquenessrulefield_set") + ) + + for rule in rules: + rule_fields = tuple(rule.uniquenessrulefield_set.all()) + scope_fields = tuple( + field.fieldPath for field in rule_fields if field.isScope + ) + field_names = tuple( + field.fieldPath.lower() for field in rule_fields if not field.isScope + ) + scope = scope_fields[0] if scope_fields else None + all_fields = ( + (*field_names, scope.lower()) + if scope is not None + else field_names + ) + configs.append( + CachedUniquenessRule( + rule=rule, + field_names=field_names, + scope_fields=scope_fields, + all_fields=all_fields, + scope=scope, + is_database_constraint=rule.isDatabaseConstraint, + is_global=rule_is_global(scope_fields), + discipline_id=rule.discipline_id, + ) + ) + + if cache is not None: + cache[cache_key] = configs + + return configs + +def _get_collection_discipline_id(collection_id: int) -> int | None: + from specifyweb.specify.models import Collection + + cache = _collection_discipline_cache.get() + if cache is not None and collection_id in cache: + return cache[collection_id] + + discipline_id = ( + Collection.objects + .filter(id=collection_id) + .values_list("discipline_id", flat=True) + .first() + ) + + if cache is not None: + cache[collection_id] = discipline_id + + return discipline_id + +def _instance_discipline_id(instance) -> int | None: + discipline_id = getattr(instance, "discipline_id", None) + if discipline_id is not None: + return discipline_id + + collection_id = getattr(instance, "collection_id", None) + if collection_id is None: + collection_id = getattr(instance, "collectionmemberid", None) + if collection_id is not None: + return _get_collection_discipline_id(collection_id) + + cached_collection = getattr(instance._state, "fields_cache", {}).get("collection") + if cached_collection is not None: + return getattr(cached_collection, "discipline_id", None) + + return None + +def _rule_applies_to_instance(rule: CachedUniquenessRule, instance) -> bool: + if rule.is_global: + return True + + if rule.discipline_id is not None: + discipline_id = _instance_discipline_id(instance) + if discipline_id is not None: + return discipline_id == rule.discipline_id + + return in_same_scope(rule.rule, instance) + @orm_signal_handler('pre_save', None, dispatch_uid=UNIQUENESS_DISPATCH_UID) def validate_unique(model, instance): """ @@ -102,41 +252,16 @@ def validate_unique(model, instance): f"Skipping uniqueness rule check on non-Specify model: '{model_name}'") return - applied_migrations = MigrationRecorder( - connections['default']).applied_migrations() - - for migration in applied_migrations: - app, migration_name = migration - if app == 'businessrules' and migration_name == '0001_initial': - break - else: + if not _businessrules_initial_migration_applied(): return # We can't directly use the main app registry in the context of migrations, which uses fake models registry = model._meta.apps - UniquenessRule = registry.get_model('businessrules', 'UniquenessRule') - UniquenessRuleField = registry.get_model( - 'businessrules', 'UniquenessRuleField') - - rules = UniquenessRule.objects.filter(modelName=model_name) - for rule in rules: - rule_fields = UniquenessRuleField.objects.filter(uniquenessrule=rule) - if not rule_is_global(tuple(field.fieldPath for field in rule_fields.filter(isScope=True))) \ - and not in_same_scope(rule, instance): + for rule in _get_uniqueness_rule_configs(registry, model_name): + if not _rule_applies_to_instance(rule, instance): continue - field_names = [ - field.fieldPath.lower() for field in rule_fields.filter(isScope=False)] - - _scope = rule_fields.filter(isScope=True) - scope = None if len(_scope) == 0 else _scope[0] - - all_fields = [*field_names] - - if scope is not None: - all_fields.append(scope.fieldPath.lower()) - def get_matchable(instance): def best_match_or_none(field_name: str): try: @@ -146,7 +271,7 @@ def best_match_or_none(field_name: str): matchable = {} field_mapping = {} - for field in all_fields: + for field in rule.all_fields: matched_or_none = best_match_or_none(field) if matched_or_none is not None: field_mapping[field] = matched_or_none[0] @@ -155,22 +280,21 @@ def best_match_or_none(field_name: str): return field_mapping, matchable def get_exception(conflicts, matchable, field_map): - error_message = '{} must have unique {}'.format(model_name, - join_with_and(field_names)) + error_message = '{} must have unique {}'.format(model_name, join_with_and(rule.field_names)) response = {"table": model_name, "localizationKey": "fieldNotUnique" - if scope is None + if rule.scope is None else "childFieldNotUnique", - "fieldName": ','.join(field_names), - "fieldData": serialize_multiple_django(matchable, field_map, field_names), + "fieldName": ','.join(rule.field_names), + "fieldData": serialize_multiple_django(matchable, field_map, rule.field_names), } - if scope is not None: - error_message += f' in {scope.fieldPath.lower()}' + if rule.scope is not None: + error_message += f' in {rule.scope.lower()}' response.update({ - "parentField": scope.fieldPath, - "parentData": serialize_multiple_django(matchable, field_map, [scope.fieldPath.lower()]) + "parentField": rule.scope, + "parentData": serialize_multiple_django(matchable, field_map, [rule.scope.lower()]) }) response['conflicting'] = list( conflicts.values_list('id', flat=True)[:100]) @@ -181,7 +305,7 @@ def get_exception(conflicts, matchable, field_map): continue field_map, matchable = match_result - if len(matchable.keys()) == 0 or set(all_fields) != set(field_map.keys()): + if len(matchable.keys()) == 0 or set(rule.all_fields) != set(field_map.keys()): continue conflicts_query = model.objects.only('id') @@ -190,7 +314,7 @@ def get_exception(conflicts, matchable, field_map): filter_kwargs = dict(matchable) - apply_case_sensitive = connection.vendor == 'mysql' and not rule.isDatabaseConstraint + apply_case_sensitive = connection.vendor == 'mysql' and not rule.is_database_constraint if apply_case_sensitive: conflicts_query, transformed_filters = apply_case_sensitive_filters( @@ -205,7 +329,7 @@ def get_exception(conflicts, matchable, field_map): conflicts = conflicts_query.filter(**filter_kwargs) if instance.id is not None: conflicts = conflicts.exclude(id=instance.id) - if conflicts: + if conflicts.exists(): raise get_exception(conflicts, matchable, field_map) From 8dbcfd4a434afd956787ca6672a972b0d7ced338 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 12:59:16 -0500 Subject: [PATCH 10/20] Cache repeated permission queries within bulk operations --- specifyweb/backend/permissions/permissions.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/specifyweb/backend/permissions/permissions.py b/specifyweb/backend/permissions/permissions.py index 1cd24a02754..b9dcd86eae0 100644 --- a/specifyweb/backend/permissions/permissions.py +++ b/specifyweb/backend/permissions/permissions.py @@ -5,6 +5,8 @@ ) from collections.abc import Callable from collections.abc import Iterable +from contextlib import contextmanager +from contextvars import ContextVar import logging @@ -167,6 +169,18 @@ class QueryResult(NamedTuple): matching_user_policies: list matching_role_policies: list +_permission_query_cache: ContextVar[dict[PermRequest, QueryResult] | None] = ContextVar( + "permission_query_cache", + default=None, +) + +@contextmanager +def cache_permission_queries(): + token = _permission_query_cache.set({}) + try: + yield + finally: + _permission_query_cache.reset(token) def query_pt( collectionid: int | None, userid: int, target: PermissionTargetAction @@ -177,6 +191,11 @@ def query_pt( def query( collectionid: int | None, userid: int, resource: str, action: str ) -> QueryResult: + request = PermRequest(collectionid, userid, resource, action) + cache = _permission_query_cache.get() + if cache is not None and request in cache: + return cache[request] + cursor = connection.cursor() cursor.execute( @@ -225,12 +244,17 @@ def query( for r in cursor.fetchall() ] - return QueryResult( + result = QueryResult( allowed=bool(ups) or bool(rps), matching_user_policies=ups, matching_role_policies=rps, ) + if cache is not None: + cache[request] = result + + return result + TABLE_ACTION = Literal["read", "create", "update", "delete"] def check_table_permissions(collection, actor, obj, action: TABLE_ACTION) -> None: From 1b53e6dbbe87869ddc1a11986a0129830afa35fa Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 13:00:22 -0500 Subject: [PATCH 11/20] Use bulk validation caches during dataset commit --- specifyweb/backend/workbench/upload/upload.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/specifyweb/backend/workbench/upload/upload.py b/specifyweb/backend/workbench/upload/upload.py index 440a79bec9d..c7651109f9d 100644 --- a/specifyweb/backend/workbench/upload/upload.py +++ b/specifyweb/backend/workbench/upload/upload.py @@ -17,7 +17,8 @@ from typing import Any, Optional, cast from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences -from specifyweb.backend.permissions.permissions import has_target_permission +from specifyweb.backend.businessrules.uniqueness_rules import cache_uniqueness_rules +from specifyweb.backend.permissions.permissions import cache_permission_queries, has_target_permission from specifyweb.specify import models from specifyweb.backend.workbench.upload.auditlog import auditlog from specifyweb.specify.datamodel import Table @@ -342,7 +343,12 @@ def do_upload( scope_context = ScopeContext() - with savepoint("main upload"), cache_unique_catnum_preferences(): + with ( + savepoint("main upload"), + cache_unique_catnum_preferences(), + cache_uniqueness_rules(), + cache_permission_queries(), + ): tic = time.perf_counter() results: list[UploadResult] = [] for i, row in enumerate(rows): From 1480b872073b7a932dec876532332e85f9d0c769 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 13:01:46 -0500 Subject: [PATCH 12/20] Apply validation caches to bulk copy endpoints --- specifyweb/backend/bulk_copy/bulk_copy.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/specifyweb/backend/bulk_copy/bulk_copy.py b/specifyweb/backend/bulk_copy/bulk_copy.py index 4dd47c6c768..97c1d1618d0 100644 --- a/specifyweb/backend/bulk_copy/bulk_copy.py +++ b/specifyweb/backend/bulk_copy/bulk_copy.py @@ -1,12 +1,13 @@ import json -from specifyweb.backend.permissions.permissions import table_permissions_checker +from specifyweb.backend.permissions.permissions import cache_permission_queries, table_permissions_checker from django.http import (HttpResponse, HttpResponseNotAllowed) from specifyweb.specify.api.crud import post_resource from specifyweb.specify.api.dispatch import HttpResponseCreated from specifyweb.specify.api.serializers import _obj_to_data, toJson from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences +from specifyweb.backend.businessrules.uniqueness_rules import cache_uniqueness_rules def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: @@ -18,7 +19,11 @@ def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: data = json.loads(request.body) data = dict(filter(lambda item: item[0] != 'id', data.items())) # Remove ID field before making copies resp_objs = [] - with cache_unique_catnum_preferences(): + with ( + cache_unique_catnum_preferences(), + cache_uniqueness_rules(), + cache_permission_queries(), + ): for _ in range(int(copies)): obj = post_resource( request.specify_collection, @@ -44,7 +49,11 @@ def collection_dispatch_bulk(request, model) -> HttpResponse: data = json.loads(request.body) resp_objs = [] - with cache_unique_catnum_preferences(): + with ( + cache_unique_catnum_preferences(), + cache_uniqueness_rules(), + cache_permission_queries(), + ): for obj_data in data: obj = post_resource( request.specify_collection, From 26b4b77126486569b50c090c28d9265035d8bf43 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 13:02:46 -0500 Subject: [PATCH 13/20] Apply validation caches to API bulk endpoints --- specifyweb/specify/api/dispatch.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/specifyweb/specify/api/dispatch.py b/specifyweb/specify/api/dispatch.py index 6b9f648ee76..2bd5342340d 100644 --- a/specifyweb/specify/api/dispatch.py +++ b/specifyweb/specify/api/dispatch.py @@ -5,7 +5,8 @@ from specifyweb.backend.businessrules.exceptions import BusinessRuleException from specifyweb.backend.businessrules.utils import cache_unique_catnum_preferences -from specifyweb.backend.permissions.permissions import enforce, table_permissions_checker +from specifyweb.backend.businessrules.uniqueness_rules import cache_uniqueness_rules +from specifyweb.backend.permissions.permissions import cache_permission_queries, enforce, table_permissions_checker from specifyweb.specify.api.crud import apply_filters, delete_resource, get_collection, get_resource, post_resource, put_resource from specifyweb.specify.api.exceptions import FilterError, OrderByError from specifyweb.specify.api.filter_by_col import filter_by_collection @@ -128,7 +129,11 @@ def collection_dispatch_bulk(request, model) -> HttpResponse: data = json.loads(request.body) resp_objs = [] - with cache_unique_catnum_preferences(): + with ( + cache_unique_catnum_preferences(), + cache_uniqueness_rules(), + cache_permission_queries(), + ): for obj_data in data: obj = post_resource( request.specify_collection, @@ -150,7 +155,11 @@ def collection_dispatch_bulk_copy(request, model, copies) -> HttpResponse: data = json.loads(request.body) data = dict(filter(lambda item: item[0] != 'id', data.items())) # Remove ID field before making copies resp_objs = [] - with cache_unique_catnum_preferences(): + with ( + cache_unique_catnum_preferences(), + cache_uniqueness_rules(), + cache_permission_queries(), + ): for _ in range(int(copies)): obj = post_resource( request.specify_collection, @@ -195,4 +204,4 @@ def rows(request, model_name: str) -> HttpResponse: query = query[offset:offset + limit] data = list(query) - return HttpResponse(toJson(data), content_type='application/json') \ No newline at end of file + return HttpResponse(toJson(data), content_type='application/json') From 71b3ef77be1f066e52923104a823775bec0ad923 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 16:47:51 -0500 Subject: [PATCH 14/20] Track dirty model fields during scoped saves --- specifyweb/backend/businessrules/utils.py | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/specifyweb/backend/businessrules/utils.py b/specifyweb/backend/businessrules/utils.py index c37baf68f2e..455d734003a 100644 --- a/specifyweb/backend/businessrules/utils.py +++ b/specifyweb/backend/businessrules/utils.py @@ -32,6 +32,52 @@ default=None, ) +_changed_field_names_attr = "_specify_changed_field_names" +_missing = object() + +def _normalize_changed_field_name(field_name: str) -> str: + field_name = field_name.lower() + return field_name[:-3] if field_name.endswith("_id") else field_name + +def _field_path_match_names(field_name: str) -> set[str]: + field_name = field_name.lower() + first_part = field_name.split("__", 1)[0] + return { + _normalize_changed_field_name(field_name), + _normalize_changed_field_name(first_part), + } + +@contextmanager +def track_changed_fields(instance, dirty_fields): + previous = getattr(instance, _changed_field_names_attr, _missing) + setattr( + instance, + _changed_field_names_attr, + { + _normalize_changed_field_name(field["field_name"]) + for field in dirty_fields + }, + ) + try: + yield + finally: + if previous is _missing: + delattr(instance, _changed_field_names_attr) + else: + setattr(instance, _changed_field_names_attr, previous) + +def changed_fields_include(instance, field_names) -> bool: + changed_field_names = getattr(instance, _changed_field_names_attr, None) + if changed_field_names is None: + return True + + match_names = { + match_name + for field_name in field_names + for match_name in _field_path_match_names(field_name) + } + return bool(changed_field_names & match_names) + @contextmanager def cache_unique_catnum_preferences(): pref_token = _unique_catnum_pref_cache.set({}) From 59e8b3be7317d981cf810b7d7a965460be8e0b27 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 16:48:35 -0500 Subject: [PATCH 15/20] Save only dirty fields during Batch Edit updates --- specifyweb/backend/workbench/upload/upload_table.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/specifyweb/backend/workbench/upload/upload_table.py b/specifyweb/backend/workbench/upload/upload_table.py index 782249c7505..c750c7204ef 100644 --- a/specifyweb/backend/workbench/upload/upload_table.py +++ b/specifyweb/backend/workbench/upload/upload_table.py @@ -5,6 +5,7 @@ from django.db import transaction, IntegrityError from specifyweb.backend.businessrules.exceptions import BusinessRuleException +from specifyweb.backend.businessrules.utils import track_changed_fields from specifyweb.specify import models from specifyweb.specify.utils.func import Func from specifyweb.specify.utils.field_change_info import FieldChangeInfo @@ -1100,11 +1101,18 @@ def _do_update(self, reference_obj, dirty_fields, **attrs): self.auditor.update(reference_obj, None, dirty_fields) for key, value in attrs.items(): setattr(reference_obj, key, value) + update_fields = {field["field_name"] for field in dirty_fields} + if "modifiedbyagent_id" in attrs: + update_fields.add("modifiedbyagent_id") + if hasattr(reference_obj, "timestampmodified"): + update_fields.add("timestampmodified") if hasattr(reference_obj, "version"): # Consider using bump_version here. # I'm not doing it for performance reasons -- we already checked our version at this point, and have a lock, so can just increment the version. setattr(reference_obj, "version", getattr(reference_obj, "version") + 1) - reference_obj.save() + update_fields.add("version") + with track_changed_fields(reference_obj, dirty_fields): + reference_obj.save(update_fields=update_fields) return reference_obj def _do_insert(self): From b54f7b03254947b05b9e841e54657144899bb207 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 16:48:51 -0500 Subject: [PATCH 16/20] Skip CO catalog checks for unrelated field updates --- .../backend/businessrules/rules/collectionobject_rules.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/specifyweb/backend/businessrules/rules/collectionobject_rules.py b/specifyweb/backend/businessrules/rules/collectionobject_rules.py index 3332929e6d4..cf54c6f6fea 100644 --- a/specifyweb/backend/businessrules/rules/collectionobject_rules.py +++ b/specifyweb/backend/businessrules/rules/collectionobject_rules.py @@ -2,6 +2,7 @@ from specifyweb.backend.businessrules.exceptions import BusinessRuleException from specifyweb.backend.businessrules.utils import ( + changed_fields_include, collection_has_component_catalog_number, get_default_collectionobjecttype_id, get_unique_catnum_across_comp_co_coll_pref_by_ids, @@ -12,6 +13,8 @@ def _collection_object_catalog_check_needed(co) -> bool: return False if co.pk is None: return True + if not changed_fields_include(co, ("catalognumber", "collection")): + return False return not type(co).objects.filter( pk=co.pk, @@ -48,5 +51,4 @@ def collectionobject_pre_save(co): ) if contains_component_duplicates: - raise BusinessRuleException( - 'Catalog Number is already in use for another Component in this collection.') + raise BusinessRuleException("Catalog Number is already in use for another Component in this collection.") From cba9e121ef3c29b5f8a9bbc31ccf6fc163e6f712 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 7 May 2026 16:49:05 -0500 Subject: [PATCH 17/20] Skip unchanged uniqueness rules during updates --- specifyweb/backend/businessrules/uniqueness_rules.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/specifyweb/backend/businessrules/uniqueness_rules.py b/specifyweb/backend/businessrules/uniqueness_rules.py index dd730fd863c..34788c19763 100644 --- a/specifyweb/backend/businessrules/uniqueness_rules.py +++ b/specifyweb/backend/businessrules/uniqueness_rules.py @@ -22,6 +22,7 @@ from .orm_signal_handler import orm_signal_handler from .exceptions import BusinessRuleException from . import models +from .utils import changed_fields_include class JSONUniquenessRule(TypedDict): rule: tuple[list[str], list[str]] @@ -259,6 +260,12 @@ def validate_unique(model, instance): registry = model._meta.apps for rule in _get_uniqueness_rule_configs(registry, model_name): + if ( + instance.pk is not None + and not changed_fields_include(instance, rule.all_fields) + ): + continue + if not _rule_applies_to_instance(rule, instance): continue From ecf327751965862004f513f4679a6bae7bf7efcd Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 8 May 2026 14:59:13 -0500 Subject: [PATCH 18/20] Add bulk Batch Edit update and audit flush path --- .../backend/workbench/upload/upload_table.py | 380 +++++++++++++++++- 1 file changed, 372 insertions(+), 8 deletions(-) diff --git a/specifyweb/backend/workbench/upload/upload_table.py b/specifyweb/backend/workbench/upload/upload_table.py index c750c7204ef..a01e15940a7 100644 --- a/specifyweb/backend/workbench/upload/upload_table.py +++ b/specifyweb/backend/workbench/upload/upload_table.py @@ -1,10 +1,14 @@ from decimal import Decimal import logging +from collections import defaultdict +from dataclasses import dataclass from typing import Any, NamedTuple, Literal, Union, Callable -from django.db import transaction, IntegrityError +from django.db import connection, transaction, IntegrityError +from django.db.models.signals import post_save, pre_save +from django.utils import timezone -from specifyweb.backend.businessrules.exceptions import BusinessRuleException +from specifyweb.backend.businessrules.exceptions import AbortSave, BusinessRuleException from specifyweb.backend.businessrules.utils import track_changed_fields from specifyweb.specify import models from specifyweb.specify.utils.func import Func @@ -21,8 +25,15 @@ ) from specifyweb.specify.models_utils.lock_tables import LockDispatcher from specifyweb.backend.workbench.upload.scope_context import ScopeContext +from specifyweb.backend.workbench.upload.auditlog import ( + Collection as AUDIT_COLLECTION, + Discipline as AUDIT_DISCIPLINE, + Division as AUDIT_DIVISION, + truncate_str_to_bytes, +) from .column_options import ColumnOptions, ExtendedColumnOptions from .parsing import parse_many, ParseResult, WorkBenchParseFailure +from . import auditcodes from .upload_result import ( Deleted, @@ -61,6 +72,333 @@ REFERENCE_KEY = object() +class BulkBatchEditFallback(Exception): + pass + + +@dataclass +class BulkUpdateIntent: + model: type + obj: Any + dirty_fields: list[FieldChangeInfo] + update_fields: frozenset[str] + agent_id: int | None + parent_record_id: int | None + parent_table_num: int | None + record_version: int + table_num: int + row_order: int + + +class BulkBatchEditContext: + batch_size = 500 + + def __init__(self, audit_log, agent): + self.audit_log = audit_log + self.agent = agent + self.intents: list[BulkUpdateIntent] = [] + self._order = 0 + self._audit_enabled: bool | None = None + self._field_audit_enabled: bool | None = None + + def validate_audit_backend(self): + audit_enabled, field_audit_enabled = self._audit_state() + if field_audit_enabled and not connection.features.can_return_rows_from_bulk_insert: + raise BulkBatchEditFallback( + "Database does not return IDs from bulk audit inserts." + ) + + def preload_references(self, scoped_upload_plan, batch_edit_packs, cache: dict): + refs: dict[str, dict[int, int | None]] = defaultdict(dict) + self._collect_references(scoped_upload_plan, batch_edit_packs, refs) + + for model_name, versions_by_id in refs.items(): + model = getattr(models, model_name.capitalize()) + records = { + record.id: record + for record in ( + model.objects + .select_for_update() + .filter(id__in=versions_by_id.keys()) + ) + } + if len(records) != len(versions_by_id): + raise BulkBatchEditFallback( + f"Unable to preload all {model_name} batch edit records." + ) + + for record_id, version in versions_by_id.items(): + record = records[record_id] + current_version = getattr(record, "version", None) + if ( + current_version is not None + and version is not None + and current_version != version + ): + raise BulkBatchEditFallback( + f"{model_name} record {record_id} is out of date." + ) + cache[(REFERENCE_KEY, model_name, record_id)] = record + + def _collect_references(self, uploadable, packs, refs): + if not all(hasattr(uploadable, attr) for attr in ("name", "toOne", "toMany")): + raise BulkBatchEditFallback("Only upload tables are eligible.") + + for pack in packs: + if pack is None: + raise BulkBatchEditFallback("Batch edit pack is required.") + + self_pack = pack.get("self", {}) + record_id = self_pack.get("id") + if not isinstance(record_id, int): + raise BulkBatchEditFallback("Only existing records are eligible.") + version = self_pack.get("version", None) + existing_version = refs[uploadable.name].get(record_id) + if ( + record_id in refs[uploadable.name] + and existing_version is not None + and version is not None + and existing_version != version + ): + raise BulkBatchEditFallback("Conflicting batch edit versions.") + refs[uploadable.name][record_id] = version + + to_one_pack = pack.get("to_one", {}) or {} + for field_name, to_one_uploadable in uploadable.toOne.items(): + if field_name in to_one_pack: + self._collect_references( + to_one_uploadable, + [to_one_pack[field_name]], + refs, + ) + + to_many_pack = pack.get("to_many", {}) or {} + for field_name, to_many_uploadables in uploadable.toMany.items(): + if field_name not in to_many_pack: + continue + field_packs = to_many_pack[field_name] + if len(field_packs) != len(to_many_uploadables): + raise BulkBatchEditFallback( + "Batch edit to-many pack shape changed." + ) + for record_uploadable, record_pack in zip( + to_many_uploadables, + field_packs, + ): + self._collect_references(record_uploadable, [record_pack], refs) + + def queue_update(self, auditor, reference_obj, dirty_fields, attrs): + if post_save.has_listeners(type(reference_obj)): + raise BulkBatchEditFallback( + f"{type(reference_obj).__name__} has post-save handlers." + ) + if self._dirty_fields_touch_uniqueness(type(reference_obj), dirty_fields): + raise BulkBatchEditFallback("Unique-field updates use the existing path.") + + auditor.pre_log(reference_obj, "update") + + update_fields = {field["field_name"] for field in dirty_fields} + if "modifiedbyagent_id" in attrs: + update_fields.add("modifiedbyagent_id") + if hasattr(reference_obj, "timestampmodified"): + reference_obj.timestampmodified = timezone.now() + update_fields.add("timestampmodified") + + old_version = getattr(reference_obj, "version", 0) + if hasattr(reference_obj, "version"): + reference_obj.version = old_version + 1 + update_fields.add("version") + + parent_record_id, parent_table_num = self._audit_parent(reference_obj) + table_num = ( + reference_obj.specify_model.tableId + if hasattr(reference_obj, "specify_model") + else 0 + ) + + for key, value in attrs.items(): + setattr(reference_obj, key, value) + + try: + with track_changed_fields(reference_obj, dirty_fields): + pre_save.send( + sender=type(reference_obj), + instance=reference_obj, + raw=False, + using=reference_obj._state.db, + update_fields=frozenset(update_fields), + ) + except AbortSave as e: + raise BulkBatchEditFallback("AbortSave uses the existing path.") from e + + self.intents.append( + BulkUpdateIntent( + model=type(reference_obj), + obj=reference_obj, + dirty_fields=[dict(field) for field in dirty_fields], + update_fields=frozenset(update_fields), + agent_id=self.agent.id if self.agent is not None else None, + parent_record_id=parent_record_id, + parent_table_num=parent_table_num, + record_version=old_version, + table_num=table_num, + row_order=self._order, + ) + ) + self._order += 1 + return reference_obj + + def flush(self): + if not self.intents: + return + + audit_enabled, field_audit_enabled = self._audit_state() + self.validate_audit_backend() + + try: + for (model, update_fields), intents in self._grouped_update_intents(): + model.objects.bulk_update( + [intent.obj for intent in intents], + list(update_fields), + batch_size=self.batch_size, + ) + + if not audit_enabled: + return + + sorted_intents = sorted(self.intents, key=lambda intent: intent.row_order) + audit_logs = models.Spauditlog.objects.bulk_create( + [ + models.Spauditlog( + action=auditcodes.UPDATE, + parentrecordid=intent.parent_record_id, + parenttablenum=intent.parent_table_num, + recordid=intent.obj.id, + recordversion=intent.record_version, + tablenum=intent.table_num, + createdbyagent_id=intent.agent_id, + modifiedbyagent_id=intent.agent_id, + ) + for intent in sorted_intents + ], + batch_size=self.batch_size, + ) + + if not field_audit_enabled: + return + if any(audit_log.id is None for audit_log in audit_logs): + raise BulkBatchEditFallback( + "Database did not populate bulk audit IDs." + ) + + audit_fields = [] + for intent, audit_log in zip(sorted_intents, audit_logs): + for field in intent.dirty_fields: + audit_fields.append( + models.Spauditlogfield( + fieldname=field["field_name"], + newvalue=self._audit_value(field["new_value"]), + oldvalue=self._audit_value(field["old_value"]), + spauditlog=audit_log, + createdbyagent_id=intent.agent_id, + modifiedbyagent_id=intent.agent_id, + ) + ) + models.Spauditlogfield.objects.bulk_create( + audit_fields, + batch_size=self.batch_size, + ) + except (IntegrityError, TypeError, ValueError) as e: + raise BulkBatchEditFallback("Bulk Batch Edit flush failed.") from e + + def _grouped_update_intents(self): + grouped = defaultdict(list) + for intent in self.intents: + grouped[(intent.model, intent.update_fields)].append(intent) + return grouped.items() + + def _audit_state(self): + if self._audit_enabled is None: + self._audit_enabled = ( + self.audit_log is not None and self.audit_log.isAuditing() + ) + self._field_audit_enabled = ( + self._audit_enabled + and self.audit_log is not None + and self.audit_log.isAuditingFlds() + ) + return self._audit_enabled, self._field_audit_enabled + + def _audit_value(self, value): + if value is None: + return None + return truncate_str_to_bytes(str(value), 2**16 - 1) + + def _audit_parent(self, obj): + for field_name, table in ( + ("collectionmemberid", AUDIT_COLLECTION), + ("collection_id", AUDIT_COLLECTION), + ("discipline_id", AUDIT_DISCIPLINE), + ("division_id", AUDIT_DIVISION), + ): + if not hasattr(obj, field_name): + continue + scope_id = getattr(obj, field_name) + if scope_id is not None: + return scope_id, table.tableId + return None, None + + def _dirty_fields_touch_uniqueness(self, model, dirty_fields) -> bool: + changed_names = { + self._normalize_field_name(field["field_name"]) + for field in dirty_fields + } + + for field in model._meta.fields: + if field.unique and self._field_matches(changed_names, field.name): + return True + + for unique_together in model._meta.unique_together: + if any(self._field_matches(changed_names, field) for field in unique_together): + return True + + for constraint in model._meta.constraints: + constraint_fields = getattr(constraint, "fields", ()) + if any(self._field_matches(changed_names, field) for field in constraint_fields): + return True + + try: + from specifyweb.backend.businessrules.uniqueness_rules import ( + _get_uniqueness_rule_configs, + ) + + for rule in _get_uniqueness_rule_configs( + model._meta.apps, + model.__name__, + ): + if any( + self._field_matches(changed_names, field_name) + for field_name in rule.all_fields + ): + return True + except Exception: + raise BulkBatchEditFallback("Unable to inspect uniqueness rules.") + + return False + + def _field_matches(self, changed_names, field_name: str) -> bool: + field_name = field_name.lower() + first_part = field_name.split("__", 1)[0] + return ( + self._normalize_field_name(field_name) in changed_names + or self._normalize_field_name(first_part) in changed_names + ) + + def _normalize_field_name(self, field_name: str) -> str: + field_name = field_name.lower() + return field_name[:-3] if field_name.endswith("_id") else field_name + + class UploadTable(NamedTuple): name: str wbcols: dict[str, ColumnOptions] @@ -701,6 +1039,8 @@ def _do_upload( to_one_results: dict[str, UploadResult], info: ReportInfo, ) -> UploadResult: + if self.auditor.bulk_context is not None: + raise BulkBatchEditFallback("Inserts and clones use the existing path.") missing_required = self._check_missing_required() @@ -808,6 +1148,12 @@ def _inserter(model, attrs): return _inserter def _do_picklist_additions(self) -> list[PicklistAddition]: + if ( + self.auditor.bulk_context is not None + and any(parsedField.add_to_picklist is not None for parsedField in self.parsedFields) + ): + raise BulkBatchEditFallback("Picklist additions use the existing path.") + added_picklist_items = [] for parsedField in self.parsedFields: if parsedField.add_to_picklist is not None: @@ -826,6 +1172,8 @@ def _do_picklist_additions(self) -> list[PicklistAddition]: return added_picklist_items def delete_row(self, parent_obj=None) -> UploadResult: + if self.auditor.bulk_context is not None: + raise BulkBatchEditFallback("Deletes use the existing path.") info = ReportInfo( tableName=self.name, @@ -1056,18 +1404,26 @@ def _do_upload( ), } - with transaction.atomic(): - try: + try: + if self.auditor.bulk_context is None: + with transaction.atomic(): + updated = self._do_update( + reference_record, + [*to_one_changes.values(), *concrete_field_changes.values()], + **attrs, + ) + picklist_additions = self._do_picklist_additions() + else: updated = self._do_update( reference_record, [*to_one_changes.values(), *concrete_field_changes.values()], **attrs, ) picklist_additions = self._do_picklist_additions() - except (BusinessRuleException, IntegrityError) as e: - return UploadResult( - FailedBusinessRule(str(e), {}, info), to_one_results, {} - ) + except (BusinessRuleException, IntegrityError) as e: + return UploadResult( + FailedBusinessRule(str(e), {}, info), to_one_results, {} + ) record: Updated | NoChange = ( Updated(updated.pk, info, picklist_additions) @@ -1098,6 +1454,14 @@ def _is_scope_change_allowed(self, concrete_field_changes): def _do_update(self, reference_obj, dirty_fields, **attrs): # TODO: Try handling parent_obj. Quite complicated and ugly. + if self.auditor.bulk_context is not None: + return self.auditor.bulk_context.queue_update( + self.auditor, + reference_obj, + dirty_fields, + attrs, + ) + self.auditor.update(reference_obj, None, dirty_fields) for key, value in attrs.items(): setattr(reference_obj, key, value) From 969a568932df07171abe44aa275ca41b59966f1b Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 8 May 2026 14:59:28 -0500 Subject: [PATCH 19/20] Try bulk Batch Edit commits with row-by-row fallback --- specifyweb/backend/workbench/upload/upload.py | 92 ++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/specifyweb/backend/workbench/upload/upload.py b/specifyweb/backend/workbench/upload/upload.py index c7651109f9d..85e5c0c8a0a 100644 --- a/specifyweb/backend/workbench/upload/upload.py +++ b/specifyweb/backend/workbench/upload/upload.py @@ -55,7 +55,7 @@ Uploadable, BatchEditJson, ) -from .upload_table import UploadTable +from .upload_table import BulkBatchEditContext, BulkBatchEditFallback, UploadTable from .scope_context import ScopeContext from ..models import Spdataset @@ -327,8 +327,67 @@ def do_upload( progress: Progress | None = None, batch_edit_packs: list[BatchEditJson | None] | None = None, auditor_props: AuditorProps | None = None, +) -> list[UploadResult]: + should_try_bulk_batch_edit = ( + batch_edit_packs is not None + and not no_commit + and not allow_partial + and isinstance(rows, Sized) + ) + if should_try_bulk_batch_edit: + try: + return _do_upload_impl( + collection, + rows, + upload_plan, + uploading_agent_id, + disambiguations, + no_commit, + allow_partial, + progress, + batch_edit_packs, + auditor_props, + use_bulk_batch_edit=True, + ) + except BulkBatchEditFallback as e: + logger.info("falling back to row-by-row Batch Edit upload: %s", e) + + return _do_upload_impl( + collection, + rows, + upload_plan, + uploading_agent_id, + disambiguations, + no_commit, + allow_partial, + progress, + batch_edit_packs, + auditor_props, + ) + + +def _do_upload_impl( + collection, + rows: Rows, + upload_plan: Uploadable, + uploading_agent_id: int, + disambiguations: list[Disambiguation] | None = None, + no_commit: bool = False, + allow_partial: bool = True, + progress: Progress | None = None, + batch_edit_packs: list[BatchEditJson | None] | None = None, + auditor_props: AuditorProps | None = None, + use_bulk_batch_edit: bool = False, ) -> list[UploadResult]: cache: dict = {} + agent = models.Agent.objects.get(id=uploading_agent_id) + bulk_context = ( + BulkBatchEditContext(None if no_commit else auditlog, agent) + if use_bulk_batch_edit + else None + ) + if bulk_context is not None: + bulk_context.validate_audit_backend() _auditor = Auditor( collection=collection, props=auditor_props or DEFAULT_AUDITOR_PROPS, @@ -336,10 +395,12 @@ def do_upload( # Done to allow checking skipping write permission check # during validations skip_create_permission_check=no_commit, - agent=models.Agent.objects.get(id=uploading_agent_id), + agent=agent, + bulk_context=bulk_context, ) total = len(rows) if isinstance(rows, Sized) else None cached_scope_table = None + bulk_context_preloaded = False scope_context = ScopeContext() @@ -352,6 +413,9 @@ def do_upload( tic = time.perf_counter() results: list[UploadResult] = [] for i, row in enumerate(rows): + if bulk_context is not None and has_attachments(row): + raise BulkBatchEditFallback("Attachments use the existing path.") + _cache = cache.copy() if cache is not None and allow_partial else cache da = disambiguations[i] if disambiguations else None batch_edit_pack = batch_edit_packs[i] if batch_edit_packs else None @@ -393,9 +457,29 @@ def do_upload( if not scope_context.is_variable: # This forces every row to rescope when not variable cached_scope_table = scoped_table + if bulk_context is not None: + if scope_context.is_variable: + raise BulkBatchEditFallback( + "Variable scoping uses the existing path." + ) + if batch_edit_packs is None: + raise BulkBatchEditFallback( + "Batch edit packs are required." + ) + bulk_context.preload_references( + scoped_table, + batch_edit_packs, + cache, + ) + bulk_context_preloaded = True else: scoped_table = cached_scope_table + if bulk_context is not None and not bulk_context_preloaded: + raise BulkBatchEditFallback( + "Bulk Batch Edit references were not preloaded." + ) + bind_result = ( scoped_table.disambiguate(da) .apply_batch_edit_pack(batch_edit_pack) @@ -432,6 +516,8 @@ def do_upload( if no_commit: raise Rollback("no_commit option") else: + if bulk_context is not None: + bulk_context.flush() fixup_trees(scoped_table, results) return results @@ -652,4 +738,4 @@ def _commit_uploader(result): # parent.save(update_fields=['rowresults']) parent.rowresults = None - parent.save(update_fields=["rowresults"]) \ No newline at end of file + parent.save(update_fields=["rowresults"]) From f4b2d996161270c8c2bf4547cdbb589783d268bf Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 8 May 2026 14:59:49 -0500 Subject: [PATCH 20/20] Carry bulk update context through WB auditing --- specifyweb/backend/workbench/upload/auditor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/specifyweb/backend/workbench/upload/auditor.py b/specifyweb/backend/workbench/upload/auditor.py index 47bf662e3c9..d545320ee88 100644 --- a/specifyweb/backend/workbench/upload/auditor.py +++ b/specifyweb/backend/workbench/upload/auditor.py @@ -42,6 +42,7 @@ class Auditor(NamedTuple): audit_log: AuditLog | None skip_create_permission_check: bool = False agent: Agent | None = None + bulk_context: Any | None = None def pre_log(self, obj: Any, action_name: TABLE_ACTION): if self.skip_create_permission_check: