Skip to content
6 changes: 5 additions & 1 deletion nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ server {
location / {
client_max_body_size 400M;
client_body_buffer_size 400M;
client_body_timeout 120;
client_body_timeout 600s;
proxy_connect_timeout 600s;
proxy_send_timeout 600s;
proxy_read_timeout 600s;
send_timeout 600s;
resolver 127.0.0.11 valid=30s;
set $backend "http://specify7:8000";
proxy_pass $backend;
Expand Down
61 changes: 57 additions & 4 deletions specifyweb/backend/trees/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,18 @@ def merge(node, into, agent):
"children": list(into.children.values('id', 'fullname'))
}})
target_children = target.children.select_for_update()
children_to_reparent = []
for child in node.children.select_for_update():
matched = [target_child for target_child in target_children
if child.name == target_child.name and child.rankid == target_child.rankid]
if len(matched) > 0:
merge(child, matched[0], agent)
else:
child.parent = target
child.save()

children_to_reparent.append(child)

if children_to_reparent:
_batch_reparent_children(children_to_reparent, target, model)

for retry in range(100):
try:
id = node.id
Expand All @@ -387,6 +390,54 @@ def merge(node, into, agent):

assert False, "failed to move all referrences to merged tree node"


def _batch_reparent_children(children, target, model):
"""Batch-reparent multiple children to a new parent, avoiding O(N)
per-child overhead.

This replaces the per-child pattern of:
child.parent = target
child.save()
which triggers moving_node() -> 5 interval UPDATEs + set_fullnames + validate
for each child individually.

Instead, we:
1. Update parent_id for all children in a single UPDATE
2. Renumber the entire tree (fixes all node numbers at once)
3. Run set_fullnames once for the entire tree
4. Run validate_tree_numbering once

The renumber_tree step is O(N) but runs in seconds even for large trees,
and is far faster than the O(N²) per-child approach.
"""
if not children:
logger.info('batch reparenting 0 children to %s — skipping', target)
return
logger.info('batch reparenting %d children to %s', len(children), target)

child_ids = [child.id for child in children]

# Batch-update parent_id for all children in a single query
model.objects.filter(id__in=child_ids).update(parent=target)

# Renumber the entire tree to fix all node numbers at once.
# This is O(N) but runs in seconds even for large trees.
renumber_tree(model._meta.db_table)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Run set_fullnames once for the entire tree
definition = target.definition
set_fullnames(definition)

# Validate tree numbering once
validate_tree_numbering(model._meta.db_table)

# Update the in-memory nodenumber/highestchildnodenumber for each child
# so they reflect the new positions (needed for subsequent operations)
for child in children:
refreshed = model.objects.get(id=child.id)
child.nodenumber = refreshed.nodenumber
child.highestchildnodenumber = refreshed.highestchildnodenumber

def bulk_move(node, into, agent):
from specifyweb.specify import models
logger.info('Bulk move preparations from %s to %s', node, into)
Expand Down Expand Up @@ -817,13 +868,15 @@ def tree_path_expr(tbl: str, d: int) -> str: # replace path_expr if ordering iss
return f"CONCAT_WS(',', {parts})"

# Preorder numbering using ROW_NUMBER()
# Use existing nodenumber as a secondary tie-breaker so siblings
# keep their prior interval order unless explicitly moved.
sql_preorder = (
f"UPDATE {table} t\n"
f"JOIN (\n"
f" SELECT id, rn FROM (\n"
f" SELECT\n"
f" t0.{table}id AS id,\n"
f" ROW_NUMBER() OVER (ORDER BY {path_expr(table, depth)}, t0.{table}id) AS rn\n"
f" ROW_NUMBER() OVER (ORDER BY {path_expr(table, depth)}, t0.nodenumber, t0.{table}id) AS rn\n"
f" FROM {table} t0\n"
f"{parent_joins(table, depth)}\n"
f" ) ordered\n"
Expand Down
251 changes: 242 additions & 9 deletions specifyweb/backend/trees/tests/test_tree_extras/test_merge.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from specifyweb.backend.businessrules.exceptions import TreeBusinessRuleException
from specifyweb.specify.models import Geography, Locality, Taxon, Taxontreedef
from specifyweb.specify.models import Geography, Locality, Taxon, Taxontreedef, Taxontreedefitem
from specifyweb.backend.trees.tests.test_trees import GeographyTree
from specifyweb.backend.trees.extras import merge
from specifyweb.backend.trees.extras import merge, _batch_reparent_children, validate_tree_numbering

class TestMerge(GeographyTree):

Expand Down Expand Up @@ -47,12 +47,6 @@ def test_merge_into_synonymized(self):

self.assertEqual(context.exception.args[1]['localizationKey'], "nodeOperationToSynonymizedParent")

def _make_locality(self, geo):
return Locality.objects.create(
discipline=self.discipline,
geography=geo
)

def test_simple_merge(self):

locality_1 = self._make_locality(self.springmo)
Expand Down Expand Up @@ -156,4 +150,243 @@ def not_exists(obj):
self.assertEqual(locality_gc_2.geography_id, generic_city_oh_2.id)

self.assertEqual(self.sangomon.accepted_id, self.greeneoh.id)
self.assertEqual(self.ill.accepted_id, self.ohio.id)
self.assertEqual(self.ill.accepted_id, self.ohio.id)


class TestBatchReparent(GeographyTree):
"""Tests for the _batch_reparent_children function used during merge."""

def test_batch_reparent_single_child(self):
"""A single county can be reparented from Missouri to Ohio."""
# Greene County (MO) has Springfield as a city child
# Reparent Greene County from Missouri to Ohio
_batch_reparent_children([self.greene], self.ohio, Geography)

# Verify parent changed from Missouri to Ohio
self.greene.refresh_from_db()
self.assertEqual(self.greene.parent_id, self.ohio.id)

# Verify Springfield still exists and has correct parent
self.springmo.refresh_from_db()
self.assertEqual(self.springmo.parent_id, self.greene.id)

# Verify tree numbering is valid
validate_tree_numbering('geography')

def test_batch_reparent_multiple_counties(self):
"""Multiple counties can be reparented from one state to another in a single batch."""
# Create additional counties under Missouri
boone = self.make_geotree("Boone", "County", parent=self.mo)
jasper = self.make_geotree("Jasper", "County", parent=self.mo)
platte = self.make_geotree("Platte", "County", parent=self.mo)

_batch_reparent_children([boone, jasper, platte], self.ohio, Geography)

for county in [boone, jasper, platte]:
county.refresh_from_db()
self.assertEqual(county.parent_id, self.ohio.id)

validate_tree_numbering('geography')

def test_batch_reparent_county_with_cities(self):
"""A county with its own cities (subtree) is correctly reparented."""
# Greene County (MO) has Springfield as a city child
# Reparent Greene County from Missouri to Ohio
_batch_reparent_children([self.greene], self.ohio, Geography)

self.greene.refresh_from_db()
self.assertEqual(self.greene.parent_id, self.ohio.id)

# Verify Springfield is still a child of Greene County
self.springmo.refresh_from_db()
self.assertEqual(self.springmo.parent_id, self.greene.id)

validate_tree_numbering('geography')

def test_batch_reparent_updates_fullnames(self):
"""Full names reflect the new parent path after batch reparenting."""
# Create a taxonomy tree with ranks that include ancestry in fullname.
# In the real taxon tree, Genus and Species are the ranks with
# isinfullname=True, so Species includes Genus in its fullname.
root = Taxon.objects.create(
definition=self.taxontreedef,
definitionitem=self.taxon_root,
name="Life",
fullname="Life"
)

# Create two Kingdom-level nodes
animalia = self.make_taxontree("Animalia", "Kingdom", parent=root)
plantae = self.make_taxontree("Plantae", "Kingdom", parent=root)

# Enable fullname ancestry for Genus and Species ranks
genus_rank = Taxontreedefitem.objects.get(name="Genus")
species_rank = Taxontreedefitem.objects.get(name="Species")
self._update(genus_rank, dict(isinfullname=True))
self._update(species_rank, dict(isinfullname=True))

# Create a Genus (Canis) under Animalia with two Species
canis = self.make_taxontree("Canis", "Genus", parent=animalia)
canis_lupus = self.make_taxontree("lupus", "Species", parent=canis)
canis_latrans = self.make_taxontree("latrans", "Species", parent=canis)

# Refresh species to get computed fullnames
canis_lupus.refresh_from_db()
canis_latrans.refresh_from_db()

# Before reparenting, Species fullnames should include Canis
self.assertEqual("Canislupus", canis_lupus.fullname)
self.assertEqual("Canislatrans", canis_latrans.fullname)

# Reparent the Genus Canis from Animalia to Plantae
_batch_reparent_children([canis], plantae, Taxon)

# Refresh and check Species fullnames still include Canis
canis_lupus.refresh_from_db()
canis_latrans.refresh_from_db()
self.assertEqual("Canislupus", canis_lupus.fullname)
self.assertEqual("Canislatrans", canis_latrans.fullname)

def test_batch_reparent_preserves_node_numbers(self):
"""After batch reparenting, all node numbers are valid and unique."""
# Create additional counties under Missouri with cities
boone = self.make_geotree("Boone", "County", parent=self.mo)
jasper = self.make_geotree("Jasper", "County", parent=self.mo)
columbia = self.make_geotree("Columbia", "City", parent=boone)
joplin = self.make_geotree("Joplin", "City", parent=jasper)

_batch_reparent_children([boone, jasper], self.ohio, Geography)

# Verify node numbers are valid
validate_tree_numbering('geography')

# Verify all nodes have unique nodenumbers
nodenumbers = Geography.objects.values_list('nodenumber', flat=True)
self.assertEqual(len(nodenumbers), len(set(nodenumbers)))

def test_batch_reparent_empty_list(self):
"""Reparenting an empty list should not raise an error."""
# This should be a no-op
_batch_reparent_children([], self.ohio, Geography)

# Tree should still be valid
validate_tree_numbering('geography')

def test_batch_reparent_county_and_city_together(self):
"""A county and a city from different parents can be reparented together."""
# Reparent Greene County (MO) and Sangamon County (IL) both to Ohio
_batch_reparent_children([self.greene, self.sangomon], self.ohio, Geography)

self.greene.refresh_from_db()
self.sangomon.refresh_from_db()

self.assertEqual(self.greene.parent_id, self.ohio.id)
self.assertEqual(self.sangomon.parent_id, self.ohio.id)

validate_tree_numbering('geography')

def test_batch_reparent_within_merge(self):
"""A merge that triggers batch reparenting works correctly."""
# Create additional counties under Missouri
boone = self.make_geotree("Boone", "County", parent=self.mo)
jasper = self.make_geotree("Jasper", "County", parent=self.mo)

# Create a matching county under Ohio (same name) - this will be recursively merged
greene_oh = self.make_geotree("Greene", "County", parent=self.ohio)

# Create localities attached to the batch-reparented counties
loc_boone = self._make_locality(boone)
loc_jasper = self._make_locality(jasper)

# Merge Missouri into Ohio
merge(self.mo, self.ohio, self.agent)

# Verify Missouri is gone
self.assertFalse(Geography.objects.filter(id=self.mo.id).exists())

# Verify Ohio still exists
self.assertTrue(Geography.objects.filter(id=self.ohio.id).exists())

# Verify the matching Greene County was recursively merged (MO -> OH)
self.assertFalse(Geography.objects.filter(id=self.greene.id).exists())
greene_oh.refresh_from_db()
self.assertEqual(greene_oh.parent_id, self.ohio.id)

# Verify non-matching counties were batch reparented
boone.refresh_from_db()
self.assertEqual(boone.parent_id, self.ohio.id)
jasper.refresh_from_db()
self.assertEqual(jasper.parent_id, self.ohio.id)

# Verify localities for batch-reparented counties were moved
loc_boone.refresh_from_db()
self.assertEqual(loc_boone.geography_id, boone.id)
loc_jasper.refresh_from_db()
self.assertEqual(loc_jasper.geography_id, jasper.id)

# Verify tree numbering is valid
validate_tree_numbering('geography')

def test_batch_reparent_preserves_ordering(self):
"""Children maintain their relative ordering after batch reparenting."""
# Create a taxonomy tree.
root = Taxon.objects.create(
definition=self.taxontreedef,
definitionitem=self.taxon_root,
name="Life",
fullname="Life"
)

# Create two Kingdom-level nodes
animalia = self.make_taxontree("Animalia", "Kingdom", parent=root)
plantae = self.make_taxontree("Plantae", "Kingdom", parent=root)

# Create three real-world Genus-level children under Animalia
canis = self.make_taxontree("Canis", "Genus", parent=animalia)
felis = self.make_taxontree("Felis", "Genus", parent=animalia)
ursus = self.make_taxontree("Ursus", "Genus", parent=animalia)

# Reparent the three genera from Animalia to Plantae
_batch_reparent_children([canis, felis, ursus], plantae, Taxon)

# Refresh all
for c in [canis, felis, ursus]:
c.refresh_from_db()

# All should be children of Plantae
for c in [canis, felis, ursus]:
self.assertEqual(c.parent_id, plantae.id)

# Verify node numbers are nested under Plantae
plantae.refresh_from_db()
for c in [canis, felis, ursus]:
self.assertGreaterEqual(c.nodenumber, plantae.nodenumber)
self.assertLessEqual(c.nodenumber, plantae.highestchildnodenumber)

# Fetch Plantae's children ordered by nodenumber and verify that
# canis, felis, ursus appear in that exact relative order.
plantae_children = Taxon.objects.filter(
parent=plantae
).order_by('nodenumber').values_list('id', flat=True)

# Build a list of the child ids in the order they appear
child_ids = list(plantae_children)

# Find the positions of canis, felis, ursus in the ordered list
canis_idx = child_ids.index(canis.id)
felis_idx = child_ids.index(felis.id)
ursus_idx = child_ids.index(ursus.id)

# Assert that canis, felis, ursus appear in that exact order
self.assertLess(canis_idx, felis_idx,
"canis should appear before felis among Plantae's children")
self.assertLess(felis_idx, ursus_idx,
"felis should appear before ursus among Plantae's children")

# Optionally assert they are contiguous (no other children interleaved)
self.assertEqual(felis_idx, canis_idx + 1,
"canis and felis should be adjacent among Plantae's children")
self.assertEqual(ursus_idx, felis_idx + 1,
"felis and ursus should be adjacent among Plantae's children")

validate_tree_numbering('taxon')
7 changes: 6 additions & 1 deletion specifyweb/backend/trees/tests/test_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,12 @@ def make_storagetree(self, name, rank_name, **extra_kwargs):


class GeographyTree(TestTree, TestTreeSetup):
pass

def _make_locality(self, geo):
return models.Locality.objects.create(
discipline=self.discipline,
geography=geo
)


class SqlTreeSetup(SQLAlchemySetup, GeographyTree):
Expand Down
Loading