diff --git a/nginx.conf b/nginx.conf index c83f380bf56..8073f00ecee 100644 --- a/nginx.conf +++ b/nginx.conf @@ -29,7 +29,11 @@ server { location / { client_max_body_size 400M; client_body_buffer_size 400M; - client_body_timeout 120; + client_body_timeout 600s; + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + send_timeout 600s; resolver 127.0.0.11 valid=30s; set $backend "http://specify7:8000"; proxy_pass $backend; diff --git a/specifyweb/backend/trees/extras.py b/specifyweb/backend/trees/extras.py index 3b1a6d29656..99cccbd7a9e 100644 --- a/specifyweb/backend/trees/extras.py +++ b/specifyweb/backend/trees/extras.py @@ -355,15 +355,18 @@ def merge(node, into, agent): "children": list(into.children.values('id', 'fullname')) }}) target_children = target.children.select_for_update() + children_to_reparent = [] for child in node.children.select_for_update(): matched = [target_child for target_child in target_children if child.name == target_child.name and child.rankid == target_child.rankid] if len(matched) > 0: merge(child, matched[0], agent) else: - child.parent = target - child.save() - + children_to_reparent.append(child) + + if children_to_reparent: + _batch_reparent_children(children_to_reparent, target, model) + for retry in range(100): try: id = node.id @@ -387,6 +390,54 @@ def merge(node, into, agent): assert False, "failed to move all referrences to merged tree node" + +def _batch_reparent_children(children, target, model): + """Batch-reparent multiple children to a new parent, avoiding O(N) + per-child overhead. + + This replaces the per-child pattern of: + child.parent = target + child.save() + which triggers moving_node() -> 5 interval UPDATEs + set_fullnames + validate + for each child individually. + + Instead, we: + 1. Update parent_id for all children in a single UPDATE + 2. Renumber the entire tree (fixes all node numbers at once) + 3. Run set_fullnames once for the entire tree + 4. Run validate_tree_numbering once + + The renumber_tree step is O(N) but runs in seconds even for large trees, + and is far faster than the O(N²) per-child approach. + """ + if not children: + logger.info('batch reparenting 0 children to %s — skipping', target) + return + logger.info('batch reparenting %d children to %s', len(children), target) + + child_ids = [child.id for child in children] + + # Batch-update parent_id for all children in a single query + model.objects.filter(id__in=child_ids).update(parent=target) + + # Renumber the entire tree to fix all node numbers at once. + # This is O(N) but runs in seconds even for large trees. + renumber_tree(model._meta.db_table) + + # Run set_fullnames once for the entire tree + definition = target.definition + set_fullnames(definition) + + # Validate tree numbering once + validate_tree_numbering(model._meta.db_table) + + # Update the in-memory nodenumber/highestchildnodenumber for each child + # so they reflect the new positions (needed for subsequent operations) + for child in children: + refreshed = model.objects.get(id=child.id) + child.nodenumber = refreshed.nodenumber + child.highestchildnodenumber = refreshed.highestchildnodenumber + def bulk_move(node, into, agent): from specifyweb.specify import models logger.info('Bulk move preparations from %s to %s', node, into) @@ -817,13 +868,15 @@ def tree_path_expr(tbl: str, d: int) -> str: # replace path_expr if ordering iss return f"CONCAT_WS(',', {parts})" # Preorder numbering using ROW_NUMBER() + # Use existing nodenumber as a secondary tie-breaker so siblings + # keep their prior interval order unless explicitly moved. sql_preorder = ( f"UPDATE {table} t\n" f"JOIN (\n" f" SELECT id, rn FROM (\n" f" SELECT\n" f" t0.{table}id AS id,\n" - f" ROW_NUMBER() OVER (ORDER BY {path_expr(table, depth)}, t0.{table}id) AS rn\n" + f" ROW_NUMBER() OVER (ORDER BY {path_expr(table, depth)}, t0.nodenumber, t0.{table}id) AS rn\n" f" FROM {table} t0\n" f"{parent_joins(table, depth)}\n" f" ) ordered\n" diff --git a/specifyweb/backend/trees/tests/test_tree_extras/test_merge.py b/specifyweb/backend/trees/tests/test_tree_extras/test_merge.py index af66437a97e..bd5fbee75dc 100644 --- a/specifyweb/backend/trees/tests/test_tree_extras/test_merge.py +++ b/specifyweb/backend/trees/tests/test_tree_extras/test_merge.py @@ -1,7 +1,7 @@ from specifyweb.backend.businessrules.exceptions import TreeBusinessRuleException -from specifyweb.specify.models import Geography, Locality, Taxon, Taxontreedef +from specifyweb.specify.models import Geography, Locality, Taxon, Taxontreedef, Taxontreedefitem from specifyweb.backend.trees.tests.test_trees import GeographyTree -from specifyweb.backend.trees.extras import merge +from specifyweb.backend.trees.extras import merge, _batch_reparent_children, validate_tree_numbering class TestMerge(GeographyTree): @@ -47,12 +47,6 @@ def test_merge_into_synonymized(self): self.assertEqual(context.exception.args[1]['localizationKey'], "nodeOperationToSynonymizedParent") - def _make_locality(self, geo): - return Locality.objects.create( - discipline=self.discipline, - geography=geo - ) - def test_simple_merge(self): locality_1 = self._make_locality(self.springmo) @@ -156,4 +150,243 @@ def not_exists(obj): self.assertEqual(locality_gc_2.geography_id, generic_city_oh_2.id) self.assertEqual(self.sangomon.accepted_id, self.greeneoh.id) - self.assertEqual(self.ill.accepted_id, self.ohio.id) \ No newline at end of file + self.assertEqual(self.ill.accepted_id, self.ohio.id) + + +class TestBatchReparent(GeographyTree): + """Tests for the _batch_reparent_children function used during merge.""" + + def test_batch_reparent_single_child(self): + """A single county can be reparented from Missouri to Ohio.""" + # Greene County (MO) has Springfield as a city child + # Reparent Greene County from Missouri to Ohio + _batch_reparent_children([self.greene], self.ohio, Geography) + + # Verify parent changed from Missouri to Ohio + self.greene.refresh_from_db() + self.assertEqual(self.greene.parent_id, self.ohio.id) + + # Verify Springfield still exists and has correct parent + self.springmo.refresh_from_db() + self.assertEqual(self.springmo.parent_id, self.greene.id) + + # Verify tree numbering is valid + validate_tree_numbering('geography') + + def test_batch_reparent_multiple_counties(self): + """Multiple counties can be reparented from one state to another in a single batch.""" + # Create additional counties under Missouri + boone = self.make_geotree("Boone", "County", parent=self.mo) + jasper = self.make_geotree("Jasper", "County", parent=self.mo) + platte = self.make_geotree("Platte", "County", parent=self.mo) + + _batch_reparent_children([boone, jasper, platte], self.ohio, Geography) + + for county in [boone, jasper, platte]: + county.refresh_from_db() + self.assertEqual(county.parent_id, self.ohio.id) + + validate_tree_numbering('geography') + + def test_batch_reparent_county_with_cities(self): + """A county with its own cities (subtree) is correctly reparented.""" + # Greene County (MO) has Springfield as a city child + # Reparent Greene County from Missouri to Ohio + _batch_reparent_children([self.greene], self.ohio, Geography) + + self.greene.refresh_from_db() + self.assertEqual(self.greene.parent_id, self.ohio.id) + + # Verify Springfield is still a child of Greene County + self.springmo.refresh_from_db() + self.assertEqual(self.springmo.parent_id, self.greene.id) + + validate_tree_numbering('geography') + + def test_batch_reparent_updates_fullnames(self): + """Full names reflect the new parent path after batch reparenting.""" + # Create a taxonomy tree with ranks that include ancestry in fullname. + # In the real taxon tree, Genus and Species are the ranks with + # isinfullname=True, so Species includes Genus in its fullname. + root = Taxon.objects.create( + definition=self.taxontreedef, + definitionitem=self.taxon_root, + name="Life", + fullname="Life" + ) + + # Create two Kingdom-level nodes + animalia = self.make_taxontree("Animalia", "Kingdom", parent=root) + plantae = self.make_taxontree("Plantae", "Kingdom", parent=root) + + # Enable fullname ancestry for Genus and Species ranks + genus_rank = Taxontreedefitem.objects.get(name="Genus") + species_rank = Taxontreedefitem.objects.get(name="Species") + self._update(genus_rank, dict(isinfullname=True)) + self._update(species_rank, dict(isinfullname=True)) + + # Create a Genus (Canis) under Animalia with two Species + canis = self.make_taxontree("Canis", "Genus", parent=animalia) + canis_lupus = self.make_taxontree("lupus", "Species", parent=canis) + canis_latrans = self.make_taxontree("latrans", "Species", parent=canis) + + # Refresh species to get computed fullnames + canis_lupus.refresh_from_db() + canis_latrans.refresh_from_db() + + # Before reparenting, Species fullnames should include Canis + self.assertEqual("Canislupus", canis_lupus.fullname) + self.assertEqual("Canislatrans", canis_latrans.fullname) + + # Reparent the Genus Canis from Animalia to Plantae + _batch_reparent_children([canis], plantae, Taxon) + + # Refresh and check Species fullnames still include Canis + canis_lupus.refresh_from_db() + canis_latrans.refresh_from_db() + self.assertEqual("Canislupus", canis_lupus.fullname) + self.assertEqual("Canislatrans", canis_latrans.fullname) + + def test_batch_reparent_preserves_node_numbers(self): + """After batch reparenting, all node numbers are valid and unique.""" + # Create additional counties under Missouri with cities + boone = self.make_geotree("Boone", "County", parent=self.mo) + jasper = self.make_geotree("Jasper", "County", parent=self.mo) + columbia = self.make_geotree("Columbia", "City", parent=boone) + joplin = self.make_geotree("Joplin", "City", parent=jasper) + + _batch_reparent_children([boone, jasper], self.ohio, Geography) + + # Verify node numbers are valid + validate_tree_numbering('geography') + + # Verify all nodes have unique nodenumbers + nodenumbers = Geography.objects.values_list('nodenumber', flat=True) + self.assertEqual(len(nodenumbers), len(set(nodenumbers))) + + def test_batch_reparent_empty_list(self): + """Reparenting an empty list should not raise an error.""" + # This should be a no-op + _batch_reparent_children([], self.ohio, Geography) + + # Tree should still be valid + validate_tree_numbering('geography') + + def test_batch_reparent_county_and_city_together(self): + """A county and a city from different parents can be reparented together.""" + # Reparent Greene County (MO) and Sangamon County (IL) both to Ohio + _batch_reparent_children([self.greene, self.sangomon], self.ohio, Geography) + + self.greene.refresh_from_db() + self.sangomon.refresh_from_db() + + self.assertEqual(self.greene.parent_id, self.ohio.id) + self.assertEqual(self.sangomon.parent_id, self.ohio.id) + + validate_tree_numbering('geography') + + def test_batch_reparent_within_merge(self): + """A merge that triggers batch reparenting works correctly.""" + # Create additional counties under Missouri + boone = self.make_geotree("Boone", "County", parent=self.mo) + jasper = self.make_geotree("Jasper", "County", parent=self.mo) + + # Create a matching county under Ohio (same name) - this will be recursively merged + greene_oh = self.make_geotree("Greene", "County", parent=self.ohio) + + # Create localities attached to the batch-reparented counties + loc_boone = self._make_locality(boone) + loc_jasper = self._make_locality(jasper) + + # Merge Missouri into Ohio + merge(self.mo, self.ohio, self.agent) + + # Verify Missouri is gone + self.assertFalse(Geography.objects.filter(id=self.mo.id).exists()) + + # Verify Ohio still exists + self.assertTrue(Geography.objects.filter(id=self.ohio.id).exists()) + + # Verify the matching Greene County was recursively merged (MO -> OH) + self.assertFalse(Geography.objects.filter(id=self.greene.id).exists()) + greene_oh.refresh_from_db() + self.assertEqual(greene_oh.parent_id, self.ohio.id) + + # Verify non-matching counties were batch reparented + boone.refresh_from_db() + self.assertEqual(boone.parent_id, self.ohio.id) + jasper.refresh_from_db() + self.assertEqual(jasper.parent_id, self.ohio.id) + + # Verify localities for batch-reparented counties were moved + loc_boone.refresh_from_db() + self.assertEqual(loc_boone.geography_id, boone.id) + loc_jasper.refresh_from_db() + self.assertEqual(loc_jasper.geography_id, jasper.id) + + # Verify tree numbering is valid + validate_tree_numbering('geography') + + def test_batch_reparent_preserves_ordering(self): + """Children maintain their relative ordering after batch reparenting.""" + # Create a taxonomy tree. + root = Taxon.objects.create( + definition=self.taxontreedef, + definitionitem=self.taxon_root, + name="Life", + fullname="Life" + ) + + # Create two Kingdom-level nodes + animalia = self.make_taxontree("Animalia", "Kingdom", parent=root) + plantae = self.make_taxontree("Plantae", "Kingdom", parent=root) + + # Create three real-world Genus-level children under Animalia + canis = self.make_taxontree("Canis", "Genus", parent=animalia) + felis = self.make_taxontree("Felis", "Genus", parent=animalia) + ursus = self.make_taxontree("Ursus", "Genus", parent=animalia) + + # Reparent the three genera from Animalia to Plantae + _batch_reparent_children([canis, felis, ursus], plantae, Taxon) + + # Refresh all + for c in [canis, felis, ursus]: + c.refresh_from_db() + + # All should be children of Plantae + for c in [canis, felis, ursus]: + self.assertEqual(c.parent_id, plantae.id) + + # Verify node numbers are nested under Plantae + plantae.refresh_from_db() + for c in [canis, felis, ursus]: + self.assertGreaterEqual(c.nodenumber, plantae.nodenumber) + self.assertLessEqual(c.nodenumber, plantae.highestchildnodenumber) + + # Fetch Plantae's children ordered by nodenumber and verify that + # canis, felis, ursus appear in that exact relative order. + plantae_children = Taxon.objects.filter( + parent=plantae + ).order_by('nodenumber').values_list('id', flat=True) + + # Build a list of the child ids in the order they appear + child_ids = list(plantae_children) + + # Find the positions of canis, felis, ursus in the ordered list + canis_idx = child_ids.index(canis.id) + felis_idx = child_ids.index(felis.id) + ursus_idx = child_ids.index(ursus.id) + + # Assert that canis, felis, ursus appear in that exact order + self.assertLess(canis_idx, felis_idx, + "canis should appear before felis among Plantae's children") + self.assertLess(felis_idx, ursus_idx, + "felis should appear before ursus among Plantae's children") + + # Optionally assert they are contiguous (no other children interleaved) + self.assertEqual(felis_idx, canis_idx + 1, + "canis and felis should be adjacent among Plantae's children") + self.assertEqual(ursus_idx, felis_idx + 1, + "felis and ursus should be adjacent among Plantae's children") + + validate_tree_numbering('taxon') diff --git a/specifyweb/backend/trees/tests/test_trees.py b/specifyweb/backend/trees/tests/test_trees.py index a3acd477518..628de51e6a8 100644 --- a/specifyweb/backend/trees/tests/test_trees.py +++ b/specifyweb/backend/trees/tests/test_trees.py @@ -175,7 +175,12 @@ def make_storagetree(self, name, rank_name, **extra_kwargs): class GeographyTree(TestTree, TestTreeSetup): - pass + + def _make_locality(self, geo): + return models.Locality.objects.create( + discipline=self.discipline, + geography=geo + ) class SqlTreeSetup(SQLAlchemySetup, GeographyTree):