diff --git a/specifyweb/backend/export/cache.py b/specifyweb/backend/export/cache.py new file mode 100644 index 00000000000..250aad111d5 --- /dev/null +++ b/specifyweb/backend/export/cache.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any + + +def _field_to_cache_entry(field) -> dict[str, Any]: + return { + 'id': field.id, + 'exported_field_name': field.exportedfieldname, + 'extension_item': field.extensionitem, + 'remarks': field.remarks, + 'row_type': field.rowtype, + 'export_schema_item_id': field.exportschemaitem_id, + 'query_field_id': field.queryfield_id, + } + + +def _build_single_cache(extension, fields=None) -> dict[str, Any]: + if fields is None: + fields = extension.mappings + + return { + 'id': extension.id, + 'mapping_name': extension.mappingname, + 'description': extension.description, + 'collection_member_id': extension.collectionmemberid, + 'timestamp_exported': extension.timestampexported, + 'fields': [ + _field_to_cache_entry(field) + for field in fields.all().iterator(chunk_size=2000) + ], + } + + +def build_cache_tables(extensions) -> list[dict[str, Any]]: + return [ + _build_single_cache(extension) + for extension in extensions.all().iterator(chunk_size=2000) + ] diff --git a/specifyweb/backend/export/extract_query.py b/specifyweb/backend/export/extract_query.py index 3ff381efc51..19c1f8ce523 100644 --- a/specifyweb/backend/export/extract_query.py +++ b/specifyweb/backend/export/extract_query.py @@ -2,12 +2,13 @@ from .dwca import prettify + def extract_query(query): query_node = ElementTree.Element('query') query_node.set('name', query.name) query_node.set('contextTableId', str(query.contexttableid)) - for field in query.fields.all(): + for field in query.fields.all().iterator(chunk_size=2000): field_node = ElementTree.SubElement(query_node, 'field') field_node.set('stringId', field.stringid) field_node.set('oper', str(field.operstart)) diff --git a/specifyweb/backend/interactions/cog_preps.py b/specifyweb/backend/interactions/cog_preps.py index 63089be38da..bd871539b22 100644 --- a/specifyweb/backend/interactions/cog_preps.py +++ b/specifyweb/backend/interactions/cog_preps.py @@ -45,7 +45,7 @@ def get_cog_consolidated_preps(cog: Collectionobjectgroup) -> list[Preparation]: # For each child cog, recursively get the consolidated preparations child_cogs = Collectionobjectgroupjoin.objects.filter( parentcog=cog, childcog__isnull=False - ).values_list("childcog", flat=True) + ).values_list("childcog", flat=True).iterator(chunk_size=2000) consolidated_preps = [] for child_cog_id in child_cogs: child_cog = Collectionobjectgroup.objects.filter( @@ -56,7 +56,7 @@ def get_cog_consolidated_preps(cog: Collectionobjectgroup) -> list[Preparation]: # Get the child CollectionObjects collection_objects = Collectionobjectgroupjoin.objects.filter( parentcog=cog, childco__isnull=False - ).values_list("childco", flat=True) + ).values_list("childco", flat=True).iterator(chunk_size=2000) # For each CollectionObject, get the preparations for co in collection_objects: diff --git a/specifyweb/backend/permissions/views.py b/specifyweb/backend/permissions/views.py index 27e3e35cb14..f421d22d832 100644 --- a/specifyweb/backend/permissions/views.py +++ b/specifyweb/backend/permissions/views.py @@ -533,7 +533,7 @@ def put(self, request, collectionid: int, userid: int) -> http.HttpResponse: def serialize_role(role: models.Role | models.LibraryRole) -> dict: policies = defaultdict(list) - for p in role.policies.all(): + for p in role.policies.all().iterator(chunk_size=2000): policies[p.resource].append(p.action) return { diff --git a/specifyweb/backend/stored_queries/batch_edit.py b/specifyweb/backend/stored_queries/batch_edit.py index 56f3fedfaed..969389757ce 100644 --- a/specifyweb/backend/stored_queries/batch_edit.py +++ b/specifyweb/backend/stored_queries/batch_edit.py @@ -720,17 +720,22 @@ def _lookup_in_fields(_id: int | None, readonly_fields: list[str]): ] # Need to go off by 1, bc we added 1 to account for id fields # It could happen that the field we saw doesn't exist. # Plus, the default options get chosen in the cases of - table_name, field_name = _get_table_and_field(field) + table_name, _ = _get_table_and_field(field) + # Use date-part-aware field name for localization lookup so that + # different date components (Full Date, Day, Month, Year) are + # matched to their correct captions rather than consuming labels + # in insertion order. + date_part_field_name = _get_date_part_field_name(field) field_caption = query_field_caption_lookup.get(field, None) table_field_labels = batch_edit_meta_tables.get_table_field_labels(table_name) if ( table_field_labels is None - or not table_field_labels.has_field_label(field_name) + or not table_field_labels.has_field_label(date_part_field_name) or field.fieldspec.contains_tree_rank() ): localized_label = naive_field_format(field.fieldspec) else: - field_label = table_field_labels.use_field_label(field_name, field_caption) + field_label = table_field_labels.use_field_label(date_part_field_name, field_caption) localized_label = ( field_label.caption if field_label is not None else naive_field_format(field.fieldspec) ) @@ -857,7 +862,10 @@ def naive_field_format(fieldspec: QueryFieldSpec): return f"{prefix}{fieldspec.table.name} (formatted)" if field.is_relationship: return f"{prefix}{fieldspec.table.name} ({'formatted' if field.type.endswith('to-one') else 'aggregatd'})" - return f"{prefix}{fieldspec.table.name} {field.name}" + date_suffix = "" + if fieldspec.is_temporal() and fieldspec.date_part is not None and fieldspec.date_part != "Full Date": + date_suffix = f" ({fieldspec.date_part})" + return f"{prefix}{fieldspec.table.name} {field.name}{date_suffix}" # @transaction.atomic <--- we DONT do this because the query logic could take up possibly multiple minutes @@ -896,6 +904,21 @@ def _get_table_and_field(field: QueryField): field_name = None if field.fieldspec.get_field() is None else field.fieldspec.get_field().name return (table_name, field_name) +def _get_date_part_field_name(field: QueryField) -> str | None: + """Return a field name that includes the date part suffix for temporal fields. + + This ensures that different date components (e.g., catalogedDate Full Date, + catalogedDate Day, catalogedDate Month, catalogedDate Year) are treated as + distinct fields in the localization lookup, preventing mislabeled headers. + """ + base_name = None if field.fieldspec.get_field() is None else field.fieldspec.get_field().name + if base_name is None: + return None + date_part = field.fieldspec.date_part + if date_part is not None: + return f"{base_name}__{date_part}" + return base_name + def rewrite_coordinate_fields(row, _mapped_rows: dict[tuple[tuple[str, ...], ...], Any], join_paths: tuple[tuple[str, ...], ...]) -> tuple: """ In the QueryResults we want to replace any instances of the decimal @@ -982,7 +1005,8 @@ def run_batch_edit_query(props: BatchEditProps): localization_dump: dict[str, list[tuple[str, str, bool]]] = {} for field, caption in field_caption_pairs: - table_name, field_name = _get_table_and_field(field) + table_name, _ = _get_table_and_field(field) + field_name = _get_date_part_field_name(field) field_labels = localization_dump.get(table_name, []) new_field_label = (field_name, caption, False) field_labels.append(new_field_label) diff --git a/specifyweb/backend/trees/views.py b/specifyweb/backend/trees/views.py index bd04ec2522f..e6a55963d79 100644 --- a/specifyweb/backend/trees/views.py +++ b/specifyweb/backend/trees/views.py @@ -547,7 +547,7 @@ def has_tree_read_permission(tree: TREE_TABLE) -> bool: treedef_model = getattr(spmodels, f'{tree.lower().capitalize()}treedef') tree_defs = treedef_model.objects.filter(get_search_filters(collection, tree)).distinct() for definition in tree_defs: - ranks = definition.treedefitems.order_by('rankid') + ranks = definition.treedefitems.order_by('rankid').iterator(chunk_size=2000) result[tree].append({ 'definition': obj_to_data(definition), 'ranks': [obj_to_data(rank) for rank in ranks] diff --git a/specifyweb/specify/api/calculated_fields.py b/specifyweb/specify/api/calculated_fields.py index e0693107325..a249967a88f 100644 --- a/specifyweb/specify/api/calculated_fields.py +++ b/specifyweb/specify/api/calculated_fields.py @@ -27,7 +27,7 @@ def calculate_totals_deaccession(obj, Model, related_field_name): total_preps = 0 total_items = 0 - for prep in Model.objects.filter(deaccession=obj): + for prep in Model.objects.filter(deaccession=obj).iterator(chunk_size=2000): counts = calc_prep_item_count(prep, related_field_name, {}) total_preps += counts["totalPreps"] total_items += counts["totalItems"] diff --git a/specifyweb/specify/api/serializers.py b/specifyweb/specify/api/serializers.py index 949726e5103..c058a5952bf 100644 --- a/specifyweb/specify/api/serializers.py +++ b/specifyweb/specify/api/serializers.py @@ -88,7 +88,7 @@ def to_many_to_data(obj, rel, checker: ReadPermChecker) -> str | list[dict[str, field = parent_model.get_field(field_name) if field is not None and field.dependent: objs = getattr(obj, field_name) - return [_obj_to_data(o, checker) for o in objs.all()] + return [_obj_to_data(o, checker) for o in objs.all().iterator(chunk_size=2000)] collection_uri = uri_for_model(rel.related_model) return collection_uri + '?' + urlencode([(rel.field.name.lower(), str(obj.id))]) diff --git a/specifyweb/specify/management/commands/print_tree.py b/specifyweb/specify/management/commands/print_tree.py index 2381d451d7f..bde2a18c330 100644 --- a/specifyweb/specify/management/commands/print_tree.py +++ b/specifyweb/specify/management/commands/print_tree.py @@ -15,8 +15,7 @@ def handle(self, **options): for r in tdis: rank_hhn[r.rankid] = None - - for t in Taxon.objects.all().order_by('nodenumber'): + for t in Taxon.objects.all().order_by('nodenumber').iterator(chunk_size=2000): rank_hhn[t.rankid] = t.highestchildnodenumber nn = t.nodenumber line = ['*-' if t.rankid == r else diff --git a/specifyweb/specify/tests/test_queryset_iterators.py b/specifyweb/specify/tests/test_queryset_iterators.py new file mode 100644 index 00000000000..37154a32fba --- /dev/null +++ b/specifyweb/specify/tests/test_queryset_iterators.py @@ -0,0 +1,112 @@ +"""Tests for queryset .iterator() usage in high-impact paths. + +Verifies that key callsites that iterate over potentially large querysets +use .iterator(chunk_size=2000) to avoid caching all results in memory. +""" +import inspect +import textwrap +from django.test import TestCase + + +def _get_source(func): + """Return dedented source code for a function.""" + return textwrap.dedent(inspect.getsource(func)) + + +class TestIteratorUsageInSource(TestCase): + """Verify that high-impact callsites use .iterator() in their source code. + + These are source-level checks — they inspect the actual Python source of + functions that iterate over potentially large querysets, and verify that + .iterator(chunk_size=2000) is present. + """ + + def test_serializers_to_many_uses_iterator(self): + """to_many_to_data should use .iterator() when serializing dependent collections.""" + from specifyweb.specify.api.serializers import to_many_to_data + source = _get_source(to_many_to_data) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "to_many_to_data should use .iterator(chunk_size=2000) on objs.all()" + ) + + def test_calculated_fields_deaccession_uses_iterator(self): + """calculate_totals_deaccession should use .iterator() on the filter queryset.""" + from specifyweb.specify.api.calculated_fields import calculate_totals_deaccession + source = _get_source(calculate_totals_deaccession) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "calculate_totals_deaccession should use .iterator(chunk_size=2000)" + ) + + def test_print_tree_taxon_uses_iterator(self): + """print_tree management command should use .iterator() on Taxon.objects.all().""" + from specifyweb.specify.management.commands.print_tree import Command + source = _get_source(Command.handle) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "print_tree should use .iterator(chunk_size=2000) on Taxon.objects.all()" + ) + + def test_export_extract_query_uses_iterator(self): + """extract_query should use .iterator() on query.fields.all().""" + from specifyweb.backend.export.extract_query import extract_query + source = _get_source(extract_query) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "extract_query should use .iterator(chunk_size=2000) on query.fields.all()" + ) + + def test_export_cache_build_uses_iterator(self): + """build_cache_tables should use .iterator() on extensions.all().""" + from specifyweb.backend.export.cache import build_cache_tables + source = _get_source(build_cache_tables) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "build_cache_tables should use .iterator(chunk_size=2000) on extensions.all()" + ) + + def test_export_cache_fields_uses_iterator(self): + """_build_single_cache should use .iterator() on fields.all().""" + from specifyweb.backend.export.cache import _build_single_cache + source = _get_source(_build_single_cache) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "_build_single_cache should use .iterator(chunk_size=2000) on fields.all()" + ) + + def test_cog_preps_child_cogs_uses_iterator(self): + """get_cog_consolidated_preps should use .iterator() on child COG queries.""" + from specifyweb.backend.interactions.cog_preps import get_cog_consolidated_preps + source = _get_source(get_cog_consolidated_preps) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "get_cog_consolidated_preps should use .iterator(chunk_size=2000)" + ) + + def test_permissions_serialize_role_uses_iterator(self): + """serialize_role should use .iterator() on role.policies.all().""" + from specifyweb.backend.permissions.views import serialize_role + source = _get_source(serialize_role) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "serialize_role should use .iterator(chunk_size=2000) on role.policies.all()" + ) + + def test_tree_views_ranks_uses_iterator(self): + """get_all_tree_information should use .iterator() on treedefitems.""" + from specifyweb.backend.trees.views import get_all_tree_information + source = _get_source(get_all_tree_information) + self.assertIn( + '.iterator(chunk_size=2000)', + source, + "get_all_tree_information should use .iterator(chunk_size=2000) on ranks" + )