From 8be6e0930b0ee5255a7945d5f0cc79924da44b57 Mon Sep 17 00:00:00 2001 From: Joe Russack Date: Mon, 27 Apr 2026 13:16:09 -0700 Subject: [PATCH] feat(dwc): add CacheTableMeta model and cache table infrastructure Adds the export-app cache infrastructure: - CacheTableMeta model + migration tracking build state per (mapping, collection) - export.models shim: re-exports Caroline's Schemamapping/Exportdataset/ Exportdatasetextension under PascalCase aliases for use throughout the package - cache.py: get_cache_table_name, create_cache_table, drop_cache_table, _build_single_cache, _execute_and_populate, _infer_column_type, build_cache_tables - dwca_utils.py: shared sanitize/build helpers used by cache and archive code - Tests for SchemaMapping, ExportDataSet, ExportDataSetExtension, CacheTableMeta, and cache table operations Fixes #7737. Closes overlap with the cache mechanism part of c3819078c1 on dwc/foundation; remaining cache features (orphan cleanup, signal handlers, build API, progress callbacks) ship in later atomic PRs. --- specifyweb/backend/export/cache.py | 209 ++++++++++++++++++ specifyweb/backend/export/dwca_utils.py | 131 +++++++++++ .../export/migrations/0001_cachetablemeta.py | 45 ++++ .../backend/export/migrations/__init__.py | 0 specifyweb/backend/export/models.py | 48 ++++ specifyweb/backend/export/tests.py | 16 -- specifyweb/backend/export/tests/__init__.py | 0 specifyweb/backend/export/tests/test_cache.py | 120 ++++++++++ .../backend/export/tests/test_models.py | 187 ++++++++++++++++ 9 files changed, 740 insertions(+), 16 deletions(-) create mode 100644 specifyweb/backend/export/cache.py create mode 100644 specifyweb/backend/export/dwca_utils.py create mode 100644 specifyweb/backend/export/migrations/0001_cachetablemeta.py create mode 100644 specifyweb/backend/export/migrations/__init__.py delete mode 100644 specifyweb/backend/export/tests.py create mode 100644 specifyweb/backend/export/tests/__init__.py create mode 100644 specifyweb/backend/export/tests/test_cache.py create mode 100644 specifyweb/backend/export/tests/test_models.py diff --git a/specifyweb/backend/export/cache.py b/specifyweb/backend/export/cache.py new file mode 100644 index 00000000000..3f4075c54b3 --- /dev/null +++ b/specifyweb/backend/export/cache.py @@ -0,0 +1,209 @@ +"""Cache table operations for DwC export pipeline.""" +import logging +import re +from django.db import connection + +from .dwca_utils import sanitize_column_name + +logger = logging.getLogger(__name__) + + +def get_cache_table_name(mapping_id, collection_id, prefix='dwc_cache'): + """Generate a safe cache table name.""" + return f'{prefix}_{mapping_id}_{collection_id}' + + +def create_cache_table(table_name, columns): + """Create a cache table with the given columns. + + columns: list of (column_name, column_type) tuples. + An auto-increment primary key is always added. + """ + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + col_defs = ', '.join( + f'`{re.sub(r"[^a-zA-Z0-9_]", "", name)}` {col_type}' + for name, col_type in columns + ) + with connection.cursor() as cursor: + cursor.execute(f'DROP TABLE IF EXISTS `{safe_name}`') + cursor.execute( + f'CREATE TABLE `{safe_name}` (' + f'`id` INT AUTO_INCREMENT PRIMARY KEY, {col_defs}' + f') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4' + ) + logger.info('Created cache table %s', safe_name) + + +def drop_cache_table(table_name): + """Drop a cache table if it exists.""" + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + with connection.cursor() as cursor: + cursor.execute(f'DROP TABLE IF EXISTS `{safe_name}`') + logger.info('Dropped cache table %s', safe_name) + + +def build_cache_tables(export_dataset, user=None, progress_callback=None): + """Build cache tables for an ExportDataSet's core mapping and all extensions.""" + core_mapping = export_dataset.coremapping + collection = export_dataset.collection + + _build_single_cache(core_mapping, collection, user=user, + progress_callback=progress_callback) + + for ext in export_dataset.extensions.all().order_by('sortorder').iterator(chunk_size=2000): + _build_single_cache(ext.schemamapping, collection, + prefix=f'dwc_cache_ext{ext.sortorder}', + user=user, progress_callback=progress_callback) + + +def _build_single_cache(mapping, collection, prefix='dwc_cache', user=None, + progress_callback=None): + """Build a single cache table for one SchemaMapping.""" + from .models import CacheTableMeta + from django.utils import timezone + + table_name = get_cache_table_name(mapping.id, collection.id, prefix) + + meta, _ = CacheTableMeta.objects.update_or_create( + schemamapping=mapping, + defaults={'tablename': table_name, 'buildstatus': 'building'} + ) + + try: + display_fields = [ + f for f in mapping.query.fields.order_by('position') + if getattr(f, 'term', None) + ] + + columns = [ + (sanitize_column_name(f.term), _infer_column_type(f)) + for f in display_fields + ] + + create_cache_table(table_name, columns) + + rowcount = _execute_and_populate( + table_name, mapping, collection, user, progress_callback + ) + + meta.buildstatus = 'idle' + meta.lastbuilt = timezone.now() + meta.rowcount = rowcount + meta.save() + + logger.info('Cache table %s built with %d rows', table_name, rowcount) + + except Exception: + meta.buildstatus = 'error' + meta.save() + logger.exception('Failed to build cache table %s', table_name) + raise + + +def _execute_and_populate(table_name, mapping, collection, user, progress_callback=None): + """Execute a mapping's query and INSERT results into the cache table. + + Uses SQLAlchemy build_query() to ensure output matches query_to_csv + (date formatting, null replacement, etc.), then batch-INSERTs rows. + + Returns the number of rows inserted. + """ + from specifyweb.backend.stored_queries.execution import ( + build_query, BuildQueryProps, set_group_concat_max_len, + apply_special_post_query_processing, + ) + from specifyweb.backend.stored_queries.queryfield import QueryField + from specifyweb.backend.stored_queries.models import session_context + from .field_adapter import EphemeralFieldAdapter + + query_obj = mapping.query + display_fields = [ + f for f in query_obj.fields.order_by('position') + if getattr(f, 'term', None) + ] + field_specs = [ + QueryField.from_spqueryfield(EphemeralFieldAdapter(f, force_display=True)) + for f in display_fields + ] + + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + col_count = len(display_fields) + placeholders = ', '.join(['%s'] * col_count) + col_names = ', '.join( + f'`{sanitize_column_name(f.term)}`' + for f in display_fields + ) + insert_sql = f'INSERT INTO `{safe_name}` ({col_names}) VALUES ({placeholders})' + + total = 0 + BATCH_SIZE = 2000 + + with session_context() as session: + set_group_concat_max_len(session.connection()) + sa_query, _ = build_query( + session, collection, user, + query_obj.contexttableid, + field_specs, + BuildQueryProps( + replace_nulls=True, + date_format_override='%Y-%m-%d', + ), + ) + sa_query = apply_special_post_query_processing( + sa_query, query_obj.contexttableid, field_specs, collection, user, + should_list_query=False, + ) + + batch = [] + if isinstance(sa_query, list): + iterator = iter(sa_query) + else: + iterator = sa_query.yield_per(BATCH_SIZE) + + for row in iterator: + batch.append(tuple( + str(v) if v is not None else '' for v in row[1:] + )) + + if len(batch) >= BATCH_SIZE: + with connection.cursor() as cursor: + cursor.executemany(insert_sql, batch) + total += len(batch) + batch = [] + if progress_callback: + progress_callback(total, None) + + if batch: + with connection.cursor() as cursor: + cursor.executemany(insert_sql, batch) + total += len(batch) + + if progress_callback: + progress_callback(total, total) + + return total + + +def _infer_column_type(spqueryfield): + """Infer a MySQL column type from a Specify query field.""" + fname = (spqueryfield.fieldname or '').lower() + + if 'guid' in fname or 'uuid' in fname: + return 'VARCHAR(256)' + if fname in ('id', 'rankid', 'number1', 'number2', 'countamt', + 'sortorder', 'position', 'version'): + return 'INT' + if 'numericyear' in fname or 'numericmonth' in fname or 'numericday' in fname: + return 'INT' + if fname in ('latitude1', 'latitude2', 'longitude1', 'longitude2', + 'latlongaccuracy', 'maxelevation', 'minelevation'): + return 'DECIMAL(12,6)' + if fname in ('startdate', 'enddate', 'determineddate', 'catalogeddate', + 'timestampcreated', 'timestampmodified'): + return 'VARCHAR(32)' + if fname.startswith('is') or fname.startswith('yes'): + return 'VARCHAR(8)' + if fname in ('catalognumber', 'altcatalognumber', 'barcode', 'fieldnumber', + 'code', 'abbreviation', 'datum'): + return 'VARCHAR(256)' + return 'TEXT' diff --git a/specifyweb/backend/export/dwca_utils.py b/specifyweb/backend/export/dwca_utils.py new file mode 100644 index 00000000000..4314b8bb32f --- /dev/null +++ b/specifyweb/backend/export/dwca_utils.py @@ -0,0 +1,131 @@ +"""Shared utilities for DwC archive generation.""" +import re +from datetime import date +from uuid import uuid4 +from xml.etree import ElementTree as ET + + +def sanitize_term_name(term_iri): + """Extract the short name from a DwC term IRI. + + 'http://rs.tdwg.org/dwc/terms/catalogNumber' -> 'catalogNumber' + 'http://purl.org/dc/terms/type' -> 'type' + """ + if '/' in term_iri: + term_iri = term_iri.rsplit('/', 1)[-1] + if '#' in term_iri: + term_iri = term_iri.rsplit('#', 1)[-1] + return term_iri + + +def sanitize_column_name(name): + """Sanitize a term IRI into a valid MySQL column name.""" + name = sanitize_term_name(name) + name = re.sub(r'[^a-zA-Z0-9_]', '_', name) + return name[:64] + + +# Known extension rowType URIs +EXTENSION_ROW_TYPES = { + 'MeasurementOrFact': 'http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact', + 'ResourceRelationship': 'http://rs.tdwg.org/dwc/terms/ResourceRelationship', + 'Identification': 'http://rs.tdwg.org/dwc/terms/Identification', + 'Multimedia': 'http://rs.gbif.org/terms/1.0/Multimedia', +} + + +def build_meta_xml(core_terms, ext_info_list): + """Build meta.xml describing the DwC archive structure. + + core_terms: list of full term IRIs for the core file + ext_info_list: list of dicts with 'filename' and 'terms' (full IRIs) + """ + archive = ET.Element('archive') + archive.set('xmlns', 'http://rs.tdwg.org/dwc/text/') + archive.set('metadata', 'eml.xml') + + # Core + core = ET.SubElement(archive, 'core') + core.set('encoding', 'UTF-8') + core.set('fieldsTerminatedBy', ',') + core.set('linesTerminatedBy', '\\n') + core.set('fieldsEnclosedBy', '"') + core.set('ignoreHeaderLines', '1') + core.set('rowType', 'http://rs.tdwg.org/dwc/terms/Occurrence') + + files = ET.SubElement(core, 'files') + location = ET.SubElement(files, 'location') + location.text = 'occurrence.csv' + + if core_terms: + id_elem = ET.SubElement(core, 'id') + id_elem.set('index', '0') + + for idx, term_iri in enumerate(core_terms): + f = ET.SubElement(core, 'field') + f.set('index', str(idx)) + f.set('term', term_iri) + + # Extensions + for ext in ext_info_list: + extension = ET.SubElement(archive, 'extension') + extension.set('encoding', 'UTF-8') + extension.set('fieldsTerminatedBy', ',') + extension.set('linesTerminatedBy', '\\n') + extension.set('fieldsEnclosedBy', '"') + extension.set('ignoreHeaderLines', '1') + row_type = ext.get('rowType', 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact') + extension.set('rowType', row_type) + + files = ET.SubElement(extension, 'files') + location = ET.SubElement(files, 'location') + location.text = ext['filename'] + + coreid = ET.SubElement(extension, 'coreid') + coreid.set('index', '0') + + for idx, term_iri in enumerate(ext['terms']): + f = ET.SubElement(extension, 'field') + f.set('index', str(idx)) + f.set('term', term_iri) + + return ET.tostring(archive, encoding='unicode', xml_declaration=True) + + +def build_eml_xml(export_dataset): + """Build EML metadata. Returns custom EML if uploaded, else generates minimal EML.""" + if export_dataset.metadata: + try: + from specifyweb.specify.models import Spappresourcedata + data = Spappresourcedata.objects.filter( + spappresource=export_dataset.metadata + ).first() + if data and data.data: + content = data.data + if isinstance(content, bytes): + content = content.decode('utf-8') + return content + except Exception: + pass + + eml = ET.Element('eml:eml') + eml.set('xmlns:eml', 'eml://ecoinformatics.org/eml-2.1.1') + eml.set('packageId', str(uuid4())) + eml.set('system', 'http://specify.org') + + dataset = ET.SubElement(eml, 'dataset') + title = ET.SubElement(dataset, 'title') + title.text = export_dataset.exportname + + creator = ET.SubElement(dataset, 'creator') + org = ET.SubElement(creator, 'organizationName') + org.text = 'Specify Collection' + + pubdate = ET.SubElement(dataset, 'pubDate') + pubdate.text = date.today().strftime('%Y-%m-%d') + + abstract = ET.SubElement(dataset, 'abstract') + para = ET.SubElement(abstract, 'para') + para.text = f'Darwin Core Archive export: {export_dataset.exportname}' + + return ET.tostring(eml, encoding='unicode', xml_declaration=True) diff --git a/specifyweb/backend/export/migrations/0001_cachetablemeta.py b/specifyweb/backend/export/migrations/0001_cachetablemeta.py new file mode 100644 index 00000000000..34561616aca --- /dev/null +++ b/specifyweb/backend/export/migrations/0001_cachetablemeta.py @@ -0,0 +1,45 @@ +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('specify', '0048_extensions_and_vocabulary'), + ] + + operations = [ + migrations.CreateModel( + name='CacheTableMeta', + fields=[ + ('id', models.AutoField(db_column='CacheTableMetaID', primary_key=True, serialize=False)), + ('tablename', models.CharField(db_column='TableName', max_length=128, unique=True)), + ('lastbuilt', models.DateTimeField(blank=True, db_column='LastBuilt', null=True)), + ('rowcount', models.IntegerField(blank=True, db_column='RowCount', null=True)), + ('buildstatus', models.CharField( + choices=[('idle', 'idle'), ('building', 'building'), ('error', 'error')], + db_column='BuildStatus', default='idle', max_length=16, + )), + ('builderror', models.TextField(blank=True, db_column='BuildError', null=True)), + ('timestampcreated', models.DateTimeField(db_column='TimestampCreated', default=django.utils.timezone.now)), + ('timestampmodified', models.DateTimeField(db_column='TimestampModified', default=django.utils.timezone.now)), + ('collection', models.ForeignKey( + db_column='CollectionID', + on_delete=django.db.models.deletion.CASCADE, + related_name='+', to='specify.collection', + )), + ('schemamapping', models.ForeignKey( + db_column='SchemaMappingID', + on_delete=django.db.models.deletion.CASCADE, + related_name='cachetablemetas', to='specify.schemamapping', + )), + ], + options={ + 'db_table': 'cachetablemeta', + 'indexes': [models.Index(fields=['schemamapping', 'collection'], name='CacheMetaMappingColIDX')], + }, + ), + ] diff --git a/specifyweb/backend/export/migrations/__init__.py b/specifyweb/backend/export/migrations/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/export/models.py b/specifyweb/backend/export/models.py index e69de29bb2d..26ebe2f1d70 100644 --- a/specifyweb/backend/export/models.py +++ b/specifyweb/backend/export/models.py @@ -0,0 +1,48 @@ +"""Models for the export app. + +The core DwC mapping/dataset/extension tables live on the main `specify` +app (added in upstream PRs #7873/#7874/#7877 and #7746). This module +re-exports them under PascalCase aliases for use throughout this package +and adds the cache-tracking model that's specific to the cache engine. +""" +from django.db import models +from django.utils import timezone + +from specifyweb.specify.models import ( + Schemamapping as SchemaMapping, + Exportdataset as ExportDataSet, + Exportdatasetextension as ExportDataSetExtension, +) + +__all__ = ['SchemaMapping', 'ExportDataSet', 'ExportDataSetExtension', 'CacheTableMeta'] + + +class CacheTableMeta(models.Model): + """Tracks build state and metadata for cache tables backing DwC exports.""" + + id = models.AutoField(primary_key=True, db_column='CacheTableMetaID') + + schemamapping = models.ForeignKey( + 'specify.Schemamapping', db_column='SchemaMappingID', + related_name='cachetablemetas', null=False, on_delete=models.CASCADE, + ) + collection = models.ForeignKey( + 'specify.Collection', db_column='CollectionID', + related_name='+', null=False, on_delete=models.CASCADE, + ) + tablename = models.CharField(max_length=128, unique=True, db_column='TableName') + lastbuilt = models.DateTimeField(blank=True, null=True, db_column='LastBuilt') + rowcount = models.IntegerField(blank=True, null=True, db_column='RowCount') + buildstatus = models.CharField( + max_length=16, default='idle', db_column='BuildStatus', + choices=[('idle', 'idle'), ('building', 'building'), ('error', 'error')], + ) + builderror = models.TextField(blank=True, null=True, db_column='BuildError') + timestampcreated = models.DateTimeField(default=timezone.now, db_column='TimestampCreated') + timestampmodified = models.DateTimeField(default=timezone.now, db_column='TimestampModified') + + class Meta: + db_table = 'cachetablemeta' + indexes = [ + models.Index(fields=['schemamapping', 'collection'], name='CacheMetaMappingColIDX'), + ] diff --git a/specifyweb/backend/export/tests.py b/specifyweb/backend/export/tests.py deleted file mode 100644 index 501deb776c1..00000000000 --- a/specifyweb/backend/export/tests.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -This file demonstrates writing tests using the unittest module. These will pass -when you run "manage.py test". - -Replace this with more appropriate tests for your application. -""" - -from django.test import TestCase - - -class SimpleTest(TestCase): - def test_basic_addition(self): - """ - Tests that 1 + 1 always equals 2. - """ - self.assertEqual(1 + 1, 2) diff --git a/specifyweb/backend/export/tests/__init__.py b/specifyweb/backend/export/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/export/tests/test_cache.py b/specifyweb/backend/export/tests/test_cache.py new file mode 100644 index 00000000000..eba57175829 --- /dev/null +++ b/specifyweb/backend/export/tests/test_cache.py @@ -0,0 +1,120 @@ +from django.db import connection +from django.test import TestCase, TransactionTestCase + +from specifyweb.backend.export.cache import ( + create_cache_table, drop_cache_table, get_cache_table_name, + _build_single_cache, +) +from specifyweb.backend.export.dwca_utils import sanitize_column_name + + +class CacheTableNameTests(TestCase): + + def test_cache_table_name_generation(self): + name = get_cache_table_name(5, 4) + self.assertEqual(name, 'dwc_cache_5_4') + + def test_cache_table_name_sanitization(self): + # Special chars in prefix are not stripped by get_cache_table_name, + # but create_cache_table sanitizes the full name. + name = get_cache_table_name(1, 2, prefix='bad;prefix') + # create_cache_table will strip the semicolon + self.assertIn('bad', name) + + +class CacheTableOperationsTests(TransactionTestCase): + + def _table_exists(self, name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT COUNT(*) FROM information_schema.tables " + "WHERE table_name = %s", [name] + ) + return cursor.fetchone()[0] > 0 + + def test_create_and_drop_cache_table(self): + table_name = 'dwc_cache_test_99' + # create_cache_table auto-prepends an `id` PK; only pass user columns. + columns = [('val', 'VARCHAR(128)')] + create_cache_table(table_name, columns) + self.assertTrue(self._table_exists(table_name)) + + drop_cache_table(table_name) + self.assertFalse(self._table_exists(table_name)) + + def test_cache_table_name_sanitization_in_create(self): + # Semicolons and other special chars are stripped from table name. + dirty_name = 'test;drop--table' + columns = [('val', 'INT')] + create_cache_table(dirty_name, columns) + safe_name = 'testdroptable' + self.assertTrue(self._table_exists(safe_name)) + drop_cache_table(safe_name) + + +class SanitizeColumnNameTests(TestCase): + + def test_simple_name(self): + self.assertEqual(sanitize_column_name('catalogNumber'), 'catalogNumber') + + def test_uri_with_slash(self): + self.assertEqual( + sanitize_column_name('http://rs.tdwg.org/dwc/terms/catalogNumber'), + 'catalogNumber', + ) + + def test_uri_with_hash(self): + self.assertEqual( + sanitize_column_name('http://purl.org/dc/terms#modified'), + 'modified', + ) + + def test_special_chars_replaced(self): + self.assertEqual(sanitize_column_name('some-field.name'), 'some_field_name') + + def test_truncation_at_64(self): + long_name = 'a' * 100 + self.assertEqual(len(sanitize_column_name(long_name)), 64) + + +class BuildSingleCacheTests(TransactionTestCase): + + def _table_exists(self, name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT COUNT(*) FROM information_schema.tables " + "WHERE table_name = %s AND table_schema = DATABASE()", [name] + ) + return cursor.fetchone()[0] > 0 + + def _get_columns(self, table_name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT column_name FROM information_schema.columns " + "WHERE table_name = %s AND table_schema = DATABASE() " + "ORDER BY ordinal_position", [table_name] + ) + return [row[0] for row in cursor.fetchall()] + + def test_build_creates_table_with_columns(self): + """Verify cache table creation with correct columns from field terms.""" + table_name = 'dwc_cache_build_test' + columns = [ + ('occurrence_id', 'VARCHAR(256)'), + ('catalogNumber', 'TEXT'), + ('locality', 'TEXT'), + ] + try: + create_cache_table(table_name, columns) + self.assertTrue(self._table_exists(table_name)) + + db_columns = self._get_columns(table_name) + self.assertIn('occurrence_id', db_columns) + self.assertIn('catalogNumber', db_columns) + self.assertIn('locality', db_columns) + # 3 user columns + auto-prepended `id` primary key + self.assertEqual(len(db_columns), 4) + self.assertIn('id', db_columns) + finally: + drop_cache_table(table_name) + diff --git a/specifyweb/backend/export/tests/test_models.py b/specifyweb/backend/export/tests/test_models.py new file mode 100644 index 00000000000..4c3d7169877 --- /dev/null +++ b/specifyweb/backend/export/tests/test_models.py @@ -0,0 +1,187 @@ +from django.db import IntegrityError +from django.test import TestCase + +from specifyweb.specify.tests.test_api import MainSetupTearDown +from specifyweb.specify.models import Spquery, Spqueryfield +from specifyweb.backend.export.models import ( + SchemaMapping, ExportDataSet, ExportDataSetExtension, CacheTableMeta, +) + + +class SchemaMappingTests(MainSetupTearDown, TestCase): + + def _make_query(self, name='test query'): + return Spquery.objects.create( + name=name, + contextname='CollectionObject', + contexttableid=1, + createdbyagent=self.agent, + specifyuser=self.specifyuser, + ) + + def test_create_schema_mapping(self): + query = self._make_query() + mapping = SchemaMapping.objects.create( + query=query, + mapping_type='Core', + name='DwC Core Mapping', + createdbyagent=self.agent, + specifyuser=self.specifyuser, + ) + mapping.refresh_from_db() + self.assertEqual(mapping.query_id, query.pk) + self.assertEqual(mapping.mapping_type, 'Core') + self.assertEqual(mapping.name, 'DwC Core Mapping') + self.assertFalse(mapping.is_default) + + def test_schema_mapping_query_onetoone(self): + query = self._make_query() + SchemaMapping.objects.create( + query=query, + mapping_type='Core', + name='First', + specifyuser=self.specifyuser, + ) + with self.assertRaises(IntegrityError): + SchemaMapping.objects.create( + query=query, + mapping_type='Extension', + name='Second', + specifyuser=self.specifyuser, + ) + + def test_schema_mapping_cascade_delete(self): + query = self._make_query() + SchemaMapping.objects.create( + query=query, + mapping_type='Core', + name='Cascade Test', + specifyuser=self.specifyuser, + ) + self.assertEqual(SchemaMapping.objects.count(), 1) + query.delete() + self.assertEqual(SchemaMapping.objects.count(), 0) + + def test_spqueryfield_term_nullable(self): + query = self._make_query() + + # Field without DwC term — backward compatible + field_no_term = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=0, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + ) + field_no_term.refresh_from_db() + self.assertIsNone(field_no_term.term) + self.assertFalse(field_no_term.isstatic) + self.assertIsNone(field_no_term.staticvalue) + + # Field with DwC term + field_with_term = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=1, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/catalogNumber', + isstatic=False, + ) + field_with_term.refresh_from_db() + self.assertEqual( + field_with_term.term, + 'http://rs.tdwg.org/dwc/terms/catalogNumber', + ) + + # Static field + field_static = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=2, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/basisOfRecord', + isstatic=True, + staticvalue='PreservedSpecimen', + ) + field_static.refresh_from_db() + self.assertTrue(field_static.isstatic) + self.assertEqual(field_static.staticvalue, 'PreservedSpecimen') + + +class ExportDataSetTests(MainSetupTearDown, TestCase): + + def _make_mapping(self, name='test mapping'): + query = Spquery.objects.create( + name='q', + contextname='CollectionObject', + contexttableid=1, + createdbyagent=self.agent, + specifyuser=self.specifyuser, + ) + return SchemaMapping.objects.create( + query=query, mapping_type='Core', name=name, + specifyuser=self.specifyuser, + ) + + def test_create_export_dataset(self): + mapping = self._make_mapping() + ds = ExportDataSet.objects.create( + exportname='My Export', + filename='export.zip', + coremapping=mapping, + collection=self.collection, + ) + ds.refresh_from_db() + self.assertEqual(ds.exportname, 'My Export') + self.assertEqual(ds.filename, 'export.zip') + self.assertFalse(ds.rss) + self.assertIsNone(ds.frequency) + self.assertIsNone(ds.lastexported) + self.assertEqual(ds.coremapping_id, mapping.pk) + self.assertEqual(ds.collection_id, self.collection.pk) + + def test_export_dataset_extension(self): + core = self._make_mapping('core') + ext_mapping = self._make_mapping('ext') + ds = ExportDataSet.objects.create( + exportname='DS', filename='ds.zip', + coremapping=core, collection=self.collection, + ) + ext = ExportDataSetExtension.objects.create( + exportdataset=ds, schemamapping=ext_mapping, sortorder=1, + ) + ext.refresh_from_db() + self.assertEqual(ext.exportdataset_id, ds.pk) + self.assertEqual(ext.schemamapping_id, ext_mapping.pk) + self.assertEqual(ext.sortorder, 1) + + # unique_together enforced + with self.assertRaises(IntegrityError): + ExportDataSetExtension.objects.create( + exportdataset=ds, schemamapping=ext_mapping, sortorder=2, + ) + + def test_cache_table_meta(self): + mapping = self._make_mapping() + meta = CacheTableMeta.objects.create( + schemamapping=mapping, + collection=self.collection, + tablename='dwc_cache_1_4', + ) + meta.refresh_from_db() + self.assertEqual(meta.schemamapping_id, mapping.pk) + self.assertEqual(meta.tablename, 'dwc_cache_1_4') + self.assertIsNone(meta.lastbuilt) + self.assertIsNone(meta.rowcount) + self.assertEqual(meta.buildstatus, 'idle')