From 25612c27ac230e3f09ac539be23953b933f290ba Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 11:03:52 -0500
Subject: [PATCH 01/24] feat(webportal): add export tool
---
.../backend/stored_queries/execution.py | 330 +++++++++++++++++-
.../backend/stored_queries/queryfieldspec.py | 3 +-
specifyweb/backend/stored_queries/urls.py | 1 +
specifyweb/backend/stored_queries/views.py | 34 ++
specifyweb/backend/trees/utils.py | 4 +
.../Notifications/NotificationRenderers.tsx | 16 +
.../lib/components/Permissions/definitions.ts | 1 +
.../lib/components/QueryBuilder/Export.tsx | 17 +
.../js_src/lib/localization/notifications.ts | 10 +
.../frontend/js_src/lib/localization/query.ts | 10 +
10 files changed, 423 insertions(+), 3 deletions(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index e20f4f8b61d..f661ee2fa5c 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -3,8 +3,12 @@
import logging
import os
import re
+import uuid
+from io import StringIO
+from xml.sax.saxutils import escape
+from zipfile import ZIP_DEFLATED, ZipFile
-from typing import Literal, NamedTuple
+from typing import Any, Literal, NamedTuple
import xml.dom.minidom
from collections import namedtuple, defaultdict
from functools import reduce
@@ -14,8 +18,10 @@
from django.utils import timezone
from specifyweb.backend.inheritance.api import cog_inheritance_post_query_processing, parent_inheritance_post_query_processing
from specifyweb.backend.inheritance.utils import get_cat_num_inheritance_setting, get_parent_cat_num_inheritance_setting
+from specifyweb.backend.context.schema_localization import get_schema_localization
from specifyweb.backend.stored_queries.utils import log_sqlalchemy_query
from specifyweb.specify.utils.field_change_info import FieldChangeInfo
+from specifyweb.specify.utils.uiformatters import CNNField, get_catalognumber_format, get_uiformatter
from sqlalchemy import sql, orm, func, text
from sqlalchemy.sql.expression import asc, desc, insert, literal
@@ -33,8 +39,9 @@
from specifyweb.specify.models import Loan, Loanpreparation, Loanreturnpreparation, Taxontreedef
from specifyweb.backend.workbench.upload.auditlog import auditlog
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
-from specifyweb.backend.stored_queries.queryfield import fields_from_json, QUREYFIELD_SORT_T
+from specifyweb.backend.stored_queries.queryfield import QueryField, fields_from_json, QUREYFIELD_SORT_T
from specifyweb.backend.stored_queries.synonomy import synonymize_tree_query
+
from specifyweb.specify.datamodel import datamodel, is_tree_table
logger = logging.getLogger(__name__)
@@ -268,12 +275,331 @@ def do_export(spquery, collection, user, filename, exporttype, host):
query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
message_type = 'query-export-to-kml-complete'
+ elif exporttype == 'webportal':
+ query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ spquery['captions'],
+ recordsetid=recordsetid,
+ distinct=spquery['selectdistinct'],
+ )
+ message_type = 'query-export-to-web-portal-complete'
Message.objects.create(user=user, content=json.dumps({
'type': message_type,
'file': filename,
}))
+
+def _build_portal_collection_name(collection) -> str:
+ return settings.WEB_ATTACHMENT_COLLECTION or collection.collectionname
+
+
+def _build_image_base_url() -> str:
+ return os.getenv('ASSET_SERVER_URL') or settings.WEB_ATTACHMENT_URL or ''
+
+
+def _schema_localization_or_empty(collection) -> dict[str, Any]:
+ try:
+ return get_schema_localization(collection, 0, 'en-us')
+ except Exception:
+ logger.exception('Failed loading schema localization for web portal export')
+ return {}
+
+
+def _clean_cell(value: Any) -> str:
+ return re.sub("\r|\n", " ", str(value if value is not None else ''))
+
+
+def _dedupe_name(name: str, used_names: set[str]) -> str:
+ candidate = name
+ suffix = 2
+ while candidate in used_names:
+ candidate = f"{name}_{suffix}"
+ suffix += 1
+ used_names.add(candidate)
+ return candidate
+
+
+def _portal_solr_type(query_field: QueryField, collection, user) -> str:
+ fieldspec = query_field.fieldspec
+ field = fieldspec.get_field()
+
+ if field is None or field.is_relationship:
+ return 'string'
+
+ if fieldspec.table.name == 'CollectionObject' and field.name == 'catalogNumber':
+ formatter = get_catalognumber_format(collection, query_field.format_name, user)
+ if (
+ formatter is not None
+ and len(formatter.fields) == 1
+ and isinstance(formatter.fields[0], CNNField)
+ ):
+ return 'pint'
+ return 'string'
+
+ if field.type in ('java.lang.String', 'text'):
+ return 'string'
+ if field.type in ('java.util.Date', 'java.sql.Timestamp'):
+ return 'string'
+ if field.type == 'java.util.Calendar':
+ return 'pint' if fieldspec.date_part in {'Day', 'Month', 'Year'} else 'string'
+ if field.type in ('java.lang.Integer', 'java.lang.Byte', 'java.lang.Short'):
+ return 'pint'
+ if field.type == 'java.lang.Long':
+ return 'plong'
+ if field.type == 'java.lang.Float':
+ return 'pfloat'
+ if field.type in ('java.lang.Double', 'java.math.BigDecimal'):
+ return 'pdouble'
+ if field.type == 'java.lang.Boolean':
+ return 'string'
+ return 'string'
+
+
+def _portal_field_metadata(
+ query_field: QueryField,
+ caption: str,
+ colname: str,
+ index: int,
+ schema_localization: dict[str, Any],
+ collection,
+ user,
+) -> dict[str, Any]:
+ fieldspec = query_field.fieldspec
+ table = fieldspec.table
+ field = fieldspec.get_field()
+
+ table_key = table.name.lower()
+ table_localization = schema_localization.get(table_key, {})
+ item_localization = (
+ table_localization.get('items', {}).get(field.name.lower(), {})
+ if field is not None
+ else {}
+ )
+
+ spfld = field.name if field is not None else table.idFieldName
+ field_type = field.type if field is not None else 'java.lang.String'
+ field_length = field.length if field is not None and field.length is not None else 255
+
+ return {
+ 'colname': colname,
+ 'solrname': spfld,
+ 'solrtype': _portal_solr_type(query_field, collection, user),
+ 'title': caption,
+ 'type': field_type,
+ 'width': field_length,
+ 'concept': colname,
+ 'concepturl': 'http://rs.tdwg.org/dwc/terms/',
+ 'sptable': table_key,
+ 'sptabletitle': table_localization.get('name', table.name),
+ 'spfld': spfld,
+ 'spfldtitle': item_localization.get('name', spfld),
+ 'spdescription': item_localization.get('desc', spfld),
+ 'colidx': index,
+ 'linkify': 'true',
+ 'advancedsearch': 'true',
+ 'displaycolidx': index,
+ }
+
+
+def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any]:
+ simplified = {
+ 'colname': field_meta['colname'],
+ 'solrname': field_meta['solrname'],
+ 'solrtype': field_meta['solrtype'],
+ }
+
+ for key in (
+ 'title',
+ 'type',
+ 'width',
+ 'concept',
+ 'sptable',
+ 'sptabletitle',
+ 'spfld',
+ 'spfldtitle',
+ 'colidx',
+ 'linkify',
+ 'advancedsearch',
+ 'displaycolidx',
+ 'treeid',
+ 'treerank',
+ ):
+ if key in field_meta:
+ simplified[key] = field_meta[key]
+
+ return simplified
+
+
+def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
+ lines = [
+ '',
+ '',
+ ]
+
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+
+ emitted: set[str] = {'contents', 'geoc', 'img'}
+ for field in fields:
+ name = str(field['solrname'])
+ if name in emitted:
+ continue
+ emitted.add(name)
+
+ escaped_name = escape(name)
+ solr_type = escape(str(field['solrtype']))
+ required = 'true' if name == 'spid' else 'false'
+ lines.append(
+ f''
+ )
+ return "\n".join(lines) + "\n"
+
+
+def _serialize_portal_data(
+ rows: list[list[str]],
+ header: list[str],
+) -> str:
+ output = StringIO()
+ writer = csv.writer(output)
+ writer.writerow(header)
+ writer.writerows(rows)
+ return output.getvalue()
+
+
+def query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ captions,
+ recordsetid=None,
+ distinct=False,
+):
+ set_group_concat_max_len(session.connection())
+ query, __ = build_query(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ BuildQueryProps(recordsetid=recordsetid, replace_nulls=True, distinct=distinct),
+ )
+ query = apply_special_post_query_processing(
+ query,
+ tableid,
+ field_specs,
+ collection,
+ user,
+ should_list_query=False,
+ )
+
+ display_fields = [field_spec for field_spec in field_specs if field_spec.display]
+ effective_captions = captions if captions else [
+ (
+ field_spec.fieldspec.get_field().name
+ if field_spec.fieldspec.get_field() is not None
+ else field_spec.fieldspec.table.name
+ )
+ for field_spec in display_fields
+ ]
+
+ schema_localization = _schema_localization_or_empty(collection)
+
+ used_colnames: set[str] = {'spid'}
+ used_solrnames: set[str] = {'spid'}
+ column_defs: list[tuple[str, str, str, dict[str, Any]]] = []
+ for index, (field_spec, caption) in enumerate(
+ zip(display_fields, effective_captions, strict=False),
+ start=0,
+ ):
+ trimmed_caption = str(caption).strip()
+ base_name = trimmed_caption if trimmed_caption else f'column_{index + 1}'
+ colname = _dedupe_name(base_name, used_colnames)
+
+ field = field_spec.fieldspec.get_field()
+ if field is not None:
+ base_solrname = field.name
+ table_prefix = field_spec.fieldspec.table.name.lower()
+ else:
+ base_solrname = field_spec.fieldspec.table.idFieldName
+ table_prefix = field_spec.fieldspec.table.name.lower()
+
+ if base_solrname in used_solrnames:
+ solrname = _dedupe_name(f'{table_prefix}_{base_solrname}', used_solrnames)
+ else:
+ solrname = _dedupe_name(base_solrname, used_solrnames)
+
+ metadata = _portal_field_metadata(
+ field_spec,
+ trimmed_caption if trimmed_caption else colname,
+ colname,
+ index,
+ schema_localization,
+ collection,
+ user,
+ )
+ metadata['solrname'] = solrname
+ column_defs.append((colname, solrname, metadata['title'], metadata))
+
+ metadata_rows: list[dict[str, Any]] = [
+ {'colname': 'spid', 'solrname': 'spid', 'solrtype': 'string'},
+ *[
+ _simplify_portal_field_metadata(column_def[3])
+ for column_def in column_defs
+ ],
+ {'colname': 'img', 'solrname': 'img', 'solrtype': 'string', 'title': 'image'},
+ ]
+
+ output_rows: list[list[str]] = []
+ data_rows = query if isinstance(query, list) else query.yield_per(1)
+ for row in data_rows:
+ raw_id = row[0] if len(row) > 0 else ''
+ spid = str(uuid.uuid5(uuid.NAMESPACE_URL, f'{tableid}:{raw_id}'))
+ display_values = row[1:] if len(row) > 1 else []
+ cleaned_values = [_clean_cell(value) for value in display_values]
+ contents = '\t'.join(cleaned_values)
+ output_rows.append([spid, contents, '', '', *cleaned_values])
+
+ header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
+ portal_data = _serialize_portal_data(output_rows, header)
+ flds_json = json.dumps(metadata_rows, indent=2)
+ solr_schema = _make_solr_schema_xml(metadata_rows)
+
+ image_info_fields = [column_def[1] for column_def in column_defs[:2]]
+ portal_instance_settings = json.dumps(
+ {
+ 'portalInstance': str(uuid.uuid4()),
+ 'collectionName': _build_portal_collection_name(collection),
+ 'imageBaseUrl': _build_image_base_url(),
+ 'imageInfoFlds': ' '.join(image_info_fields),
+ },
+ indent=2,
+ )
+
+ with ZipFile(path, 'w', compression=ZIP_DEFLATED) as archive:
+ archive.writestr('PortalFiles/PortalData.csv', portal_data)
+ archive.writestr('PortalFiles/flds.json', flds_json)
+ archive.writestr(
+ 'PortalFiles/PortalInstanceSetting.json',
+ portal_instance_settings,
+ )
+ archive.writestr('PortalFiles/SolrFldSchema.xml', solr_schema)
+
# def stored_query_to_csv(query_id, collection, user, path):
# """Executes a query from the Spquery table with the given id and send
# the results to a CSV file at path.
diff --git a/specifyweb/backend/stored_queries/queryfieldspec.py b/specifyweb/backend/stored_queries/queryfieldspec.py
index ceec7f6c8bf..4d10dc1f67b 100644
--- a/specifyweb/backend/stored_queries/queryfieldspec.py
+++ b/specifyweb/backend/stored_queries/queryfieldspec.py
@@ -18,6 +18,7 @@
from . import models
from .query_ops import QueryOps
from specifyweb.specify.models_utils.load_datamodel import Table, Field, Relationship
+from specifyweb.specify.datamodel import is_tree_table
logger = logging.getLogger(__name__)
@@ -259,7 +260,7 @@ def from_stringid(cls, stringid: str, is_relation: bool):
field = node.get_field(extracted_fieldname, strict=False)
tree_rank_name = None
- if field is None: # try finding tree
+ if field is None and is_tree_table(node): # try finding tree only on tree tables
tree_rank_name, field = find_tree_and_field(node, extracted_fieldname)
if tree_rank_name:
tree_rank = TreeRankQuery.create(
diff --git a/specifyweb/backend/stored_queries/urls.py b/specifyweb/backend/stored_queries/urls.py
index ff5b0aee3fb..43948a5fcc3 100644
--- a/specifyweb/backend/stored_queries/urls.py
+++ b/specifyweb/backend/stored_queries/urls.py
@@ -7,6 +7,7 @@
path('ephemeral/', views.ephemeral),
path('exportcsv/', views.export_csv),
path('exportkml/', views.export_kml),
+ path('exportwebportal/', views.export_web_portal),
path('make_recordset/', views.make_recordset),
path('merge_recordsets/', views.merge_recordsets),
path('return_loan_preps/', views.return_loan_preps),
diff --git a/specifyweb/backend/stored_queries/views.py b/specifyweb/backend/stored_queries/views.py
index 7efb9c596c3..4a1312febd1 100644
--- a/specifyweb/backend/stored_queries/views.py
+++ b/specifyweb/backend/stored_queries/views.py
@@ -34,6 +34,7 @@ class QueryBuilderPt(PermissionTarget):
execute = PermissionTargetAction()
export_csv = PermissionTargetAction()
export_kml = PermissionTargetAction()
+ export_webportal = PermissionTargetAction()
create_recordset = PermissionTargetAction()
def value_from_request(field, get):
@@ -202,6 +203,39 @@ def export_kml(request):
thread.start()
return HttpResponse('OK', content_type='text/plain')
+
+@require_POST
+@login_maybe_required
+@never_cache
+def export_web_portal(request):
+ """Executes and returns as ZIP the web portal export package for the query provided as JSON in the POST body."""
+ check_permission_targets(request.specify_collection.id, request.specify_user.id, [
+ QueryBuilderPt.execute,
+ QueryBuilderPt.export_webportal,
+ ])
+ try:
+ spquery = json.load(request)
+ except ValueError as e:
+ return HttpResponseBadRequest(e)
+
+ logger.info('export web portal query: %s', spquery)
+
+ if 'collectionid' in spquery:
+ collection = Collection.objects.get(pk=spquery['collectionid'])
+ logger.debug('forcing collection to %s', collection.collectionname)
+ else:
+ collection = request.specify_collection
+
+ file_name = format_export_file_name(spquery, 'zip')
+
+ thread = Thread(
+ target=do_export,
+ args=(spquery, collection, request.specify_user, file_name, 'webportal', None),
+ )
+ thread.daemon = True
+ thread.start()
+ return HttpResponse('OK', content_type='text/plain')
+
@require_POST
@login_maybe_required
@never_cache
diff --git a/specifyweb/backend/trees/utils.py b/specifyweb/backend/trees/utils.py
index 58863726dac..a8603cc6942 100644
--- a/specifyweb/backend/trees/utils.py
+++ b/specifyweb/backend/trees/utils.py
@@ -38,6 +38,8 @@
def get_search_filters(collection: spmodels.Collection, tree: str):
tree_name = tree.lower()
+ if tree_name not in SPECIFY_TREES:
+ raise ValueError(f"unexpected tree type: {tree}")
if tree_name == 'storage':
return Q(institution=collection.discipline.division.institution)
discipline_query = Q(discipline=collection.discipline)
@@ -53,6 +55,8 @@ def get_search_filters(collection: spmodels.Collection, tree: str):
def get_treedefs(collection: spmodels.Collection, tree_name: str) -> list[tuple[int, int]]:
# Get the appropriate TreeDef based on the Collection and tree_name
+ if tree_name.lower() not in SPECIFY_TREES:
+ raise ValueError(f"unexpected tree type: {tree_name}")
# Mimic the old behavior of limiting the query to the first item for trees other than taxon.
# Even though the queryconstruct can handle trees with multiple types.
diff --git a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
index 26f4696c407..5d4d5a3b97e 100644
--- a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
+++ b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
@@ -131,6 +131,22 @@ export const notificationRenderers: IR<
>
);
},
+ 'query-export-to-web-portal-complete'(notification) {
+ return (
+ <>
+ {notificationsText.queryExportToWebPortalCompleted()}
+
+ {notificationsText.download()}
+
+ >
+ );
+ },
'dataset-ownership-transferred'(notification) {
return (
)}
+ {containsResults &&
+ hasPermission('/querybuilder/query', 'export_webportal') && (
+
+ doQueryExport(
+ '/stored_query/exportwebportal/',
+ undefined,
+ undefined,
+ undefined
+ )
+ }
+ >
+ {queryText.createWebPortalExport()}
+
+ )}
>
);
}
diff --git a/specifyweb/frontend/js_src/lib/localization/notifications.ts b/specifyweb/frontend/js_src/lib/localization/notifications.ts
index a9764aef182..07d052bee9c 100644
--- a/specifyweb/frontend/js_src/lib/localization/notifications.ts
+++ b/specifyweb/frontend/js_src/lib/localization/notifications.ts
@@ -134,6 +134,16 @@ export const notificationsText = createDictionary({
'pt-br': 'Exportação da consulta para KML concluída.',
'hr-hr': 'Izvoz upita u KML je završen.',
},
+ queryExportToWebPortalCompleted: {
+ 'en-us': 'Query export to Web Portal completed.',
+ 'ru-ru': 'Экспорт запроса в веб-портал завершен.',
+ 'es-es': 'La exportación de la consulta al Portal Web se completó.',
+ 'fr-fr': 'Exportation de la requête vers le portail Web terminée.',
+ 'uk-ua': 'Експорт запиту до веб-порталу завершено.',
+ 'de-ch': 'Der Abfrageexport zum Webportal wurde abgeschlossen.',
+ 'pt-br': 'A exportação da consulta para o portal web foi concluída.',
+ 'hr-hr': 'Izvoz upita na web portal je dovršen.',
+ },
dataSetOwnershipTransferred: {
'en-us':
' transferred the ownership of the dataset to you.',
diff --git a/specifyweb/frontend/js_src/lib/localization/query.ts b/specifyweb/frontend/js_src/lib/localization/query.ts
index 4336e5e40a2..485156f42ec 100644
--- a/specifyweb/frontend/js_src/lib/localization/query.ts
+++ b/specifyweb/frontend/js_src/lib/localization/query.ts
@@ -363,6 +363,16 @@ export const queryText = createDictionary({
'pt-br': 'Criar KML',
'hr-hr': 'Izradi KML',
},
+ createWebPortalExport: {
+ 'en-us': 'Create Web Portal Export',
+ 'ru-ru': 'Создать экспорт веб-портала',
+ 'es-es': 'Crear exportación de portal web',
+ 'fr-fr': 'Créer une exportation pour le portail Web',
+ 'uk-ua': 'Створити експорт для веб-порталу',
+ 'de-ch': 'Webportal-Export erstellen',
+ 'pt-br': 'Criar exportação para portal web',
+ 'hr-hr': 'Izradi izvoz za web portal',
+ },
createRecordSet: {
'en-us': 'Create {recordSetTable:string}',
'ru-ru': 'Создать {recordSetTable:string}',
From ee80b320226315ed56b267c7214f9065207ee725 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 11:04:03 -0500
Subject: [PATCH 02/24] feat(webportal): add tests
---
.../test_field_specs_from_json.py | 10 +++++++
.../test_views/test_export_web_portal.py | 26 +++++++++++++++++++
2 files changed, 36 insertions(+)
create mode 100644 specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
diff --git a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
index c317fa53cf1..84c548d049e 100644
--- a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
+++ b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
@@ -108,3 +108,13 @@ def test_static_field_specs(self): # pragma: no cover
# generate_fields_test_str(query_fields, "static_simple_field_spec")
self.assertEqual(static_simple_field_spec, query_fields)
+
+ def test_non_tree_table_does_not_parse_tree_rank(self):
+ table = datamodel.get_table_strict("CollectionObject")
+ stringid = f"{table.tableId}.collectionobject.NotARealField"
+
+ fieldspec = QueryFieldSpec.from_stringid(stringid, False)
+
+ self.assertFalse(fieldspec.contains_tree_rank())
+ self.assertIsNone(fieldspec.tree_rank)
+ self.assertIsNone(fieldspec.get_field())
diff --git a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
new file mode 100644
index 00000000000..87d61360a9a
--- /dev/null
+++ b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
@@ -0,0 +1,26 @@
+from unittest.mock import Mock, patch
+
+from django.test import Client
+
+from specifyweb.backend.stored_queries.tests.tests import SQLAlchemySetup
+
+from .raw_query import get_simple_query
+
+
+class TestExportWebPortal(SQLAlchemySetup):
+ @patch("specifyweb.backend.stored_queries.views.Thread")
+ def test_export(self, thread: Mock):
+ c = Client()
+ c.force_login(self.specifyuser)
+
+ response = c.post(
+ "/stored_query/exportwebportal/",
+ get_simple_query(self.specifyuser),
+ content_type="application/json",
+ )
+
+ self._assertStatusCodeEqual(response, 200)
+ thread.assert_called_once()
+ self.assertTrue(thread.return_value.daemon)
+ thread.return_value.start.assert_called_once()
+ self._assertContentEqual(response, "OK")
From 93bacdbdbe39aa2312ac331fe3d3c76bebf00b67 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:04:39 -0500
Subject: [PATCH 03/24] feat(webportal): split into separate file
also adds support for attachments and removes the `web_asset_store.xml` component of the asset server URL
---
.../backend/stored_queries/execution.py | 300 +------------
.../test_views/test_export_web_portal.py | 45 +-
.../stored_queries/web_portal_export.py | 406 ++++++++++++++++++
3 files changed, 462 insertions(+), 289 deletions(-)
create mode 100644 specifyweb/backend/stored_queries/web_portal_export.py
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index f661ee2fa5c..12c700277f3 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -14,6 +14,7 @@
from functools import reduce
from django.conf import settings
+from django.apps import apps
from django.db import transaction
from django.utils import timezone
from specifyweb.backend.inheritance.api import cog_inheritance_post_query_processing, parent_inheritance_post_query_processing
@@ -25,7 +26,7 @@
from sqlalchemy import sql, orm, func, text
from sqlalchemy.sql.expression import asc, desc, insert, literal
-from specifyweb.specify.models_utils.models_by_table_id import get_table_id_by_model_name
+from specifyweb.specify.models_utils.models_by_table_id import get_model_by_table_id, get_table_id_by_model_name
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
from specifyweb.backend.trees.utils import get_search_filters
@@ -34,6 +35,7 @@
from .query_construct import QueryConstruct
from .relative_date_utils import apply_absolute_date
from .field_spec_maps import apply_specify_user_name
+from .web_portal_export import query_to_web_portal_zip as _query_to_web_portal_zip
from specifyweb.backend.notifications.models import Message
from specifyweb.backend.permissions.permissions import check_table_permissions
from specifyweb.specify.models import Loan, Loanpreparation, Loanreturnpreparation, Taxontreedef
@@ -295,190 +297,6 @@ def do_export(spquery, collection, user, filename, exporttype, host):
}))
-def _build_portal_collection_name(collection) -> str:
- return settings.WEB_ATTACHMENT_COLLECTION or collection.collectionname
-
-
-def _build_image_base_url() -> str:
- return os.getenv('ASSET_SERVER_URL') or settings.WEB_ATTACHMENT_URL or ''
-
-
-def _schema_localization_or_empty(collection) -> dict[str, Any]:
- try:
- return get_schema_localization(collection, 0, 'en-us')
- except Exception:
- logger.exception('Failed loading schema localization for web portal export')
- return {}
-
-
-def _clean_cell(value: Any) -> str:
- return re.sub("\r|\n", " ", str(value if value is not None else ''))
-
-
-def _dedupe_name(name: str, used_names: set[str]) -> str:
- candidate = name
- suffix = 2
- while candidate in used_names:
- candidate = f"{name}_{suffix}"
- suffix += 1
- used_names.add(candidate)
- return candidate
-
-
-def _portal_solr_type(query_field: QueryField, collection, user) -> str:
- fieldspec = query_field.fieldspec
- field = fieldspec.get_field()
-
- if field is None or field.is_relationship:
- return 'string'
-
- if fieldspec.table.name == 'CollectionObject' and field.name == 'catalogNumber':
- formatter = get_catalognumber_format(collection, query_field.format_name, user)
- if (
- formatter is not None
- and len(formatter.fields) == 1
- and isinstance(formatter.fields[0], CNNField)
- ):
- return 'pint'
- return 'string'
-
- if field.type in ('java.lang.String', 'text'):
- return 'string'
- if field.type in ('java.util.Date', 'java.sql.Timestamp'):
- return 'string'
- if field.type == 'java.util.Calendar':
- return 'pint' if fieldspec.date_part in {'Day', 'Month', 'Year'} else 'string'
- if field.type in ('java.lang.Integer', 'java.lang.Byte', 'java.lang.Short'):
- return 'pint'
- if field.type == 'java.lang.Long':
- return 'plong'
- if field.type == 'java.lang.Float':
- return 'pfloat'
- if field.type in ('java.lang.Double', 'java.math.BigDecimal'):
- return 'pdouble'
- if field.type == 'java.lang.Boolean':
- return 'string'
- return 'string'
-
-
-def _portal_field_metadata(
- query_field: QueryField,
- caption: str,
- colname: str,
- index: int,
- schema_localization: dict[str, Any],
- collection,
- user,
-) -> dict[str, Any]:
- fieldspec = query_field.fieldspec
- table = fieldspec.table
- field = fieldspec.get_field()
-
- table_key = table.name.lower()
- table_localization = schema_localization.get(table_key, {})
- item_localization = (
- table_localization.get('items', {}).get(field.name.lower(), {})
- if field is not None
- else {}
- )
-
- spfld = field.name if field is not None else table.idFieldName
- field_type = field.type if field is not None else 'java.lang.String'
- field_length = field.length if field is not None and field.length is not None else 255
-
- return {
- 'colname': colname,
- 'solrname': spfld,
- 'solrtype': _portal_solr_type(query_field, collection, user),
- 'title': caption,
- 'type': field_type,
- 'width': field_length,
- 'concept': colname,
- 'concepturl': 'http://rs.tdwg.org/dwc/terms/',
- 'sptable': table_key,
- 'sptabletitle': table_localization.get('name', table.name),
- 'spfld': spfld,
- 'spfldtitle': item_localization.get('name', spfld),
- 'spdescription': item_localization.get('desc', spfld),
- 'colidx': index,
- 'linkify': 'true',
- 'advancedsearch': 'true',
- 'displaycolidx': index,
- }
-
-
-def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any]:
- simplified = {
- 'colname': field_meta['colname'],
- 'solrname': field_meta['solrname'],
- 'solrtype': field_meta['solrtype'],
- }
-
- for key in (
- 'title',
- 'type',
- 'width',
- 'concept',
- 'sptable',
- 'sptabletitle',
- 'spfld',
- 'spfldtitle',
- 'colidx',
- 'linkify',
- 'advancedsearch',
- 'displaycolidx',
- 'treeid',
- 'treerank',
- ):
- if key in field_meta:
- simplified[key] = field_meta[key]
-
- return simplified
-
-
-def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
- lines = [
- '',
- '',
- ]
-
- lines.append(
- ''
- )
- lines.append(
- ''
- )
- lines.append(
- ''
- )
-
- emitted: set[str] = {'contents', 'geoc', 'img'}
- for field in fields:
- name = str(field['solrname'])
- if name in emitted:
- continue
- emitted.add(name)
-
- escaped_name = escape(name)
- solr_type = escape(str(field['solrtype']))
- required = 'true' if name == 'spid' else 'false'
- lines.append(
- f''
- )
- return "\n".join(lines) + "\n"
-
-
-def _serialize_portal_data(
- rows: list[list[str]],
- header: list[str],
-) -> str:
- output = StringIO()
- writer = csv.writer(output)
- writer.writerow(header)
- writer.writerows(rows)
- return output.getvalue()
-
-
def query_to_web_portal_zip(
session,
collection,
@@ -490,116 +308,22 @@ def query_to_web_portal_zip(
recordsetid=None,
distinct=False,
):
- set_group_concat_max_len(session.connection())
- query, __ = build_query(
+ return _query_to_web_portal_zip(
session,
collection,
user,
tableid,
field_specs,
- BuildQueryProps(recordsetid=recordsetid, replace_nulls=True, distinct=distinct),
- )
- query = apply_special_post_query_processing(
- query,
- tableid,
- field_specs,
- collection,
- user,
- should_list_query=False,
- )
-
- display_fields = [field_spec for field_spec in field_specs if field_spec.display]
- effective_captions = captions if captions else [
- (
- field_spec.fieldspec.get_field().name
- if field_spec.fieldspec.get_field() is not None
- else field_spec.fieldspec.table.name
- )
- for field_spec in display_fields
- ]
-
- schema_localization = _schema_localization_or_empty(collection)
-
- used_colnames: set[str] = {'spid'}
- used_solrnames: set[str] = {'spid'}
- column_defs: list[tuple[str, str, str, dict[str, Any]]] = []
- for index, (field_spec, caption) in enumerate(
- zip(display_fields, effective_captions, strict=False),
- start=0,
- ):
- trimmed_caption = str(caption).strip()
- base_name = trimmed_caption if trimmed_caption else f'column_{index + 1}'
- colname = _dedupe_name(base_name, used_colnames)
-
- field = field_spec.fieldspec.get_field()
- if field is not None:
- base_solrname = field.name
- table_prefix = field_spec.fieldspec.table.name.lower()
- else:
- base_solrname = field_spec.fieldspec.table.idFieldName
- table_prefix = field_spec.fieldspec.table.name.lower()
-
- if base_solrname in used_solrnames:
- solrname = _dedupe_name(f'{table_prefix}_{base_solrname}', used_solrnames)
- else:
- solrname = _dedupe_name(base_solrname, used_solrnames)
-
- metadata = _portal_field_metadata(
- field_spec,
- trimmed_caption if trimmed_caption else colname,
- colname,
- index,
- schema_localization,
- collection,
- user,
- )
- metadata['solrname'] = solrname
- column_defs.append((colname, solrname, metadata['title'], metadata))
-
- metadata_rows: list[dict[str, Any]] = [
- {'colname': 'spid', 'solrname': 'spid', 'solrtype': 'string'},
- *[
- _simplify_portal_field_metadata(column_def[3])
- for column_def in column_defs
- ],
- {'colname': 'img', 'solrname': 'img', 'solrtype': 'string', 'title': 'image'},
- ]
-
- output_rows: list[list[str]] = []
- data_rows = query if isinstance(query, list) else query.yield_per(1)
- for row in data_rows:
- raw_id = row[0] if len(row) > 0 else ''
- spid = str(uuid.uuid5(uuid.NAMESPACE_URL, f'{tableid}:{raw_id}'))
- display_values = row[1:] if len(row) > 1 else []
- cleaned_values = [_clean_cell(value) for value in display_values]
- contents = '\t'.join(cleaned_values)
- output_rows.append([spid, contents, '', '', *cleaned_values])
-
- header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
- portal_data = _serialize_portal_data(output_rows, header)
- flds_json = json.dumps(metadata_rows, indent=2)
- solr_schema = _make_solr_schema_xml(metadata_rows)
-
- image_info_fields = [column_def[1] for column_def in column_defs[:2]]
- portal_instance_settings = json.dumps(
- {
- 'portalInstance': str(uuid.uuid4()),
- 'collectionName': _build_portal_collection_name(collection),
- 'imageBaseUrl': _build_image_base_url(),
- 'imageInfoFlds': ' '.join(image_info_fields),
- },
- indent=2,
+ path,
+ captions,
+ build_query_fn=build_query,
+ build_query_props_cls=BuildQueryProps,
+ apply_special_post_query_processing_fn=apply_special_post_query_processing,
+ set_group_concat_max_len_fn=set_group_concat_max_len,
+ recordsetid=recordsetid,
+ distinct=distinct,
)
- with ZipFile(path, 'w', compression=ZIP_DEFLATED) as archive:
- archive.writestr('PortalFiles/PortalData.csv', portal_data)
- archive.writestr('PortalFiles/flds.json', flds_json)
- archive.writestr(
- 'PortalFiles/PortalInstanceSetting.json',
- portal_instance_settings,
- )
- archive.writestr('PortalFiles/SolrFldSchema.xml', solr_schema)
-
# def stored_query_to_csv(query_id, collection, user, path):
# """Executes a query from the Spquery table with the given id and send
# the results to a CSV file at path.
diff --git a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
index 87d61360a9a..9430d0cc43b 100644
--- a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
+++ b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
@@ -1,4 +1,4 @@
-from unittest.mock import Mock, patch
+from unittest.mock import MagicMock, Mock, patch
from django.test import Client
@@ -24,3 +24,46 @@ def test_export(self, thread: Mock):
self.assertTrue(thread.return_value.daemon)
thread.return_value.start.assert_called_once()
self._assertContentEqual(response, "OK")
+
+ def test_portal_attachment_map(self):
+ from specifyweb.backend.stored_queries import execution
+
+ class FakeAttachment:
+ id = 5291
+ attachmentlocation = "sp6896513492722436219.att.JPG"
+ origfilename = "29432.JPG"
+
+ class FakeJoinRecord:
+ collectionobject_id = 123
+ attachment = FakeAttachment()
+
+ class FakeJoinQuery:
+ def select_related(self, *_args, **_kwargs):
+ return [FakeJoinRecord()]
+
+ class FakeJoinManager:
+ def __init__(self):
+ self.filter_kwargs = None
+
+ def filter(self, **kwargs):
+ self.filter_kwargs = kwargs
+ return FakeJoinQuery()
+
+ fake_join_manager = FakeJoinManager()
+ fake_base_model = type("Collectionobject", (), {"_meta": MagicMock(app_label="specifyweb")})
+ fake_table = MagicMock()
+ fake_table.attachments_field = MagicMock()
+
+ with patch.object(execution.datamodel, "get_table_by_id", return_value=fake_table), patch.object(
+ execution, "get_model_by_table_id", return_value=fake_base_model
+ ), patch.object(execution.apps, "get_model", return_value=type("Collectionobjectattachment", (), {"objects": fake_join_manager})):
+ result = execution._portal_attachment_map(1, [123])
+
+ self.assertEqual(
+ fake_join_manager.filter_kwargs,
+ {"collectionobject_id__in": [123]},
+ )
+ self.assertEqual(
+ result["123"],
+ '[{AttachmentID:5291,AttachmentLocation:"sp6896513492722436219.att.JPG",Title:"29432.JPG"}]',
+ )
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
new file mode 100644
index 00000000000..b3577b22464
--- /dev/null
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -0,0 +1,406 @@
+import csv
+import json
+import logging
+import os
+import re
+import uuid
+from collections import defaultdict
+from io import StringIO
+from typing import Any, Callable
+from urllib.parse import urlsplit, urlunsplit
+from xml.sax.saxutils import escape
+from zipfile import ZIP_DEFLATED, ZipFile
+
+from django.apps import apps
+from django.conf import settings
+
+from specifyweb.backend.context.schema_localization import get_schema_localization
+from specifyweb.specify.datamodel import datamodel
+from specifyweb.specify.models_utils.models_by_table_id import get_model_by_table_id
+from specifyweb.specify.utils.uiformatters import CNNField, get_catalognumber_format
+
+
+logger = logging.getLogger(__name__)
+_ASSET_STORE_FILENAME = 'web_asset_store.xml'
+
+
+def _build_portal_collection_name(collection) -> str:
+ return (
+ os.getenv('WEB_ATTACHMENT_COLLECTION')
+ or settings.WEB_ATTACHMENT_COLLECTION
+ or collection.collectionname
+ )
+
+
+def _strip_asset_store_xml(url: str) -> str:
+ if not url:
+ return ''
+
+ parsed = urlsplit(url)
+ path = parsed.path or ''
+ trimmed_path = path.rstrip('/')
+ path_parts = trimmed_path.split('/') if trimmed_path else []
+
+ if not path_parts or path_parts[-1].lower() != _ASSET_STORE_FILENAME:
+ return url
+
+ base_path = '/'.join(path_parts[:-1]).rstrip('/')
+ return urlunsplit((parsed.scheme, parsed.netloc, base_path, parsed.query, parsed.fragment))
+
+
+def _build_portal_image_base_url() -> str:
+ raw_url = (os.getenv('ASSET_SERVER_URL') or settings.WEB_ATTACHMENT_URL or '').strip()
+ return _strip_asset_store_xml(raw_url)
+
+
+def _schema_localization_or_empty(collection) -> dict[str, Any]:
+ try:
+ return get_schema_localization(collection, 0, 'en-us')
+ except Exception:
+ logger.exception('Failed loading schema localization for web portal export')
+ return {}
+
+
+def _clean_cell(value: Any) -> str:
+ return re.sub("\r|\n", " ", str(value if value is not None else ''))
+
+
+def _clean_portal_attachment_text(value: Any) -> str:
+ return re.sub(r'\r|\n|"', ' ', str(value if value is not None else '')).strip()
+
+
+def _portal_attachment_entry(attachment) -> str:
+ attachment_location = _clean_portal_attachment_text(attachment.attachmentlocation)
+ title = _clean_portal_attachment_text(
+ os.path.basename(attachment.origfilename or attachment.attachmentlocation or '')
+ )
+ return (
+ '{'
+ f'AttachmentID:{attachment.id},'
+ f'AttachmentLocation:"{attachment_location}",'
+ f'Title:"{title}"'
+ '}'
+ )
+
+
+def _portal_attachment_map(tableid: int, record_ids: list[Any]) -> dict[Any, str]:
+ if not record_ids:
+ return {}
+
+ table = datamodel.get_table_by_id(tableid, strict=True)
+ if table.attachments_field is None:
+ return {}
+
+ base_model = get_model_by_table_id(tableid)
+ join_model_name = base_model.__name__ + 'attachment'
+ join_model = apps.get_model(base_model._meta.app_label, join_model_name)
+ record_id_field = f'{base_model.__name__.lower()}_id'
+
+ join_records = join_model.objects.filter(**{f'{record_id_field}__in': record_ids}).select_related('attachment')
+ attachment_entries_by_record_id: dict[str, list[str]] = defaultdict(list)
+
+ for join_record in join_records:
+ attachment = join_record.attachment
+ if attachment.attachmentlocation is None:
+ continue
+ record_key = str(getattr(join_record, record_id_field))
+ attachment_entries_by_record_id[record_key].append(_portal_attachment_entry(attachment))
+
+ return {
+ record_id: '[' + ', '.join(entries) + ']'
+ for record_id, entries in attachment_entries_by_record_id.items()
+ }
+
+
+def _dedupe_name(name: str, used_names: set[str]) -> str:
+ candidate = name
+ suffix = 2
+ while candidate in used_names:
+ candidate = f"{name}_{suffix}"
+ suffix += 1
+ used_names.add(candidate)
+ return candidate
+
+
+def _portal_solr_type(query_field, collection, user) -> str:
+ fieldspec = query_field.fieldspec
+ field = fieldspec.get_field()
+
+ if field is None or field.is_relationship:
+ return 'string'
+
+ if fieldspec.table.name == 'CollectionObject' and field.name == 'catalogNumber':
+ formatter = get_catalognumber_format(collection, query_field.format_name, user)
+ if (
+ formatter is not None
+ and len(formatter.fields) == 1
+ and isinstance(formatter.fields[0], CNNField)
+ ):
+ return 'pint'
+ return 'string'
+
+ if field.type in ('java.lang.String', 'text'):
+ return 'string'
+ if field.type in ('java.util.Date', 'java.sql.Timestamp'):
+ return 'string'
+ if field.type == 'java.util.Calendar':
+ return 'pint' if fieldspec.date_part in {'Day', 'Month', 'Year'} else 'string'
+ if field.type in ('java.lang.Integer', 'java.lang.Byte', 'java.lang.Short'):
+ return 'pint'
+ if field.type == 'java.lang.Long':
+ return 'plong'
+ if field.type == 'java.lang.Float':
+ return 'pfloat'
+ if field.type in ('java.lang.Double', 'java.math.BigDecimal'):
+ return 'pdouble'
+ if field.type == 'java.lang.Boolean':
+ return 'string'
+ return 'string'
+
+
+def _portal_field_metadata(
+ query_field,
+ caption: str,
+ colname: str,
+ index: int,
+ schema_localization: dict[str, Any],
+ collection,
+ user,
+) -> dict[str, Any]:
+ fieldspec = query_field.fieldspec
+ table = fieldspec.table
+ field = fieldspec.get_field()
+
+ table_key = table.name.lower()
+ table_localization = schema_localization.get(table_key, {})
+ item_localization = (
+ table_localization.get('items', {}).get(field.name.lower(), {})
+ if field is not None
+ else {}
+ )
+
+ spfld = field.name if field is not None else table.idFieldName
+ field_type = field.type if field is not None else 'java.lang.String'
+ field_length = field.length if field is not None and field.length is not None else 255
+ solr_type = _portal_solr_type(query_field, collection, user)
+ is_linkified = solr_type == 'string' and field_type in ('java.lang.String', 'text')
+
+ return {
+ 'colname': colname,
+ 'solrname': spfld,
+ 'solrtype': solr_type,
+ 'title': caption,
+ 'type': field_type,
+ 'width': field_length,
+ 'concept': colname,
+ 'concepturl': 'http://rs.tdwg.org/dwc/terms/',
+ 'sptable': table_key,
+ 'sptabletitle': table_localization.get('name', table.name),
+ 'spfld': spfld,
+ 'spfldtitle': item_localization.get('name', spfld),
+ 'spdescription': item_localization.get('desc', spfld),
+ 'colidx': index,
+ 'linkify': is_linkified,
+ 'advancedsearch': True,
+ 'displaycolidx': index,
+ }
+
+
+def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any]:
+ simplified = {
+ 'colname': field_meta['colname'],
+ 'solrname': field_meta['solrname'],
+ 'solrtype': field_meta['solrtype'],
+ }
+
+ for key in (
+ 'title',
+ 'type',
+ 'width',
+ 'concept',
+ 'sptable',
+ 'sptabletitle',
+ 'spfld',
+ 'spfldtitle',
+ 'colidx',
+ 'linkify',
+ 'advancedsearch',
+ 'displaycolidx',
+ 'treeid',
+ 'treerank',
+ ):
+ if key in field_meta:
+ simplified[key] = field_meta[key]
+
+ return simplified
+
+
+def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
+ lines = [
+ '',
+ '',
+ ]
+
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+
+ emitted: set[str] = {'contents', 'geoc', 'img'}
+ for field in fields:
+ name = str(field['solrname'])
+ if name in emitted:
+ continue
+ emitted.add(name)
+
+ escaped_name = escape(name)
+ solr_type = 'string' if name == 'spid' else escape(str(field['solrtype']))
+ required = 'true' if name == 'spid' else 'false'
+ lines.append(
+ f''
+ )
+ return "\n".join(lines) + "\n"
+
+
+def _serialize_portal_data(
+ rows: list[list[str]],
+ header: list[str],
+) -> str:
+ output = StringIO()
+ writer = csv.writer(output)
+ writer.writerow(header)
+ writer.writerows(rows)
+ return output.getvalue()
+
+
+def query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ captions,
+ build_query_fn: Callable[..., tuple[Any, Any]],
+ build_query_props_cls,
+ apply_special_post_query_processing_fn: Callable[..., Any],
+ set_group_concat_max_len_fn: Callable[[Any], None],
+ recordsetid=None,
+ distinct=False,
+):
+ set_group_concat_max_len_fn(session.connection())
+ query, __ = build_query_fn(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ build_query_props_cls(recordsetid=recordsetid, replace_nulls=True, distinct=distinct),
+ )
+ query = apply_special_post_query_processing_fn(
+ query,
+ tableid,
+ field_specs,
+ collection,
+ user,
+ should_list_query=False,
+ )
+
+ display_fields = [field_spec for field_spec in field_specs if field_spec.display]
+ effective_captions = captions if captions else [
+ (
+ field_spec.fieldspec.get_field().name
+ if field_spec.fieldspec.get_field() is not None
+ else field_spec.fieldspec.table.name
+ )
+ for field_spec in display_fields
+ ]
+
+ schema_localization = _schema_localization_or_empty(collection)
+
+ used_colnames: set[str] = {'spid'}
+ used_solrnames: set[str] = {'spid'}
+ column_defs: list[tuple[str, str, str, dict[str, Any]]] = []
+ for index, (field_spec, caption) in enumerate(
+ zip(display_fields, effective_captions, strict=False),
+ start=0,
+ ):
+ trimmed_caption = str(caption).strip()
+ base_name = trimmed_caption if trimmed_caption else f'column_{index + 1}'
+ colname = _dedupe_name(base_name, used_colnames)
+
+ field = field_spec.fieldspec.get_field()
+ if field is not None:
+ base_solrname = field.name
+ table_prefix = field_spec.fieldspec.table.name.lower()
+ else:
+ base_solrname = field_spec.fieldspec.table.idFieldName
+ table_prefix = field_spec.fieldspec.table.name.lower()
+
+ if base_solrname in used_solrnames:
+ solrname = _dedupe_name(f'{table_prefix}_{base_solrname}', used_solrnames)
+ else:
+ solrname = _dedupe_name(base_solrname, used_solrnames)
+
+ metadata = _portal_field_metadata(
+ field_spec,
+ trimmed_caption if trimmed_caption else colname,
+ colname,
+ index,
+ schema_localization,
+ collection,
+ user,
+ )
+ metadata['solrname'] = solrname
+ column_defs.append((colname, solrname, metadata['title'], metadata))
+
+ metadata_rows: list[dict[str, Any]] = [
+ {'colname': 'spid', 'solrname': 'spid', 'solrtype': 'int'},
+ *[
+ _simplify_portal_field_metadata(column_def[3])
+ for column_def in column_defs
+ ],
+ {'colname': 'img', 'solrname': 'img', 'solrtype': 'string', 'title': 'image'},
+ ]
+
+ output_rows: list[list[str]] = []
+ data_rows = query if isinstance(query, list) else list(query.yield_per(1))
+ portal_attachments = _portal_attachment_map(tableid, [row[0] for row in data_rows])
+ for row in data_rows:
+ raw_id = row[0] if len(row) > 0 else ''
+ spid = str(uuid.uuid5(uuid.NAMESPACE_URL, f'{tableid}:{raw_id}'))
+ display_values = row[1:] if len(row) > 1 else []
+ cleaned_values = [_clean_cell(value) for value in display_values]
+ contents = '\t'.join(cleaned_values)
+ img = portal_attachments.get(str(raw_id), '')
+ output_rows.append([spid, contents, img, '', *cleaned_values])
+
+ header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
+ portal_data = _serialize_portal_data(output_rows, header)
+ flds_json = json.dumps(metadata_rows, indent=2)
+ solr_schema = _make_solr_schema_xml(metadata_rows)
+
+ image_info_fields = [column_def[1] for column_def in column_defs[:2]]
+ portal_instance_settings = json.dumps(
+ {
+ 'portalInstance': str(uuid.uuid4()),
+ 'collectionName': _build_portal_collection_name(collection),
+ 'imageBaseUrl': _build_portal_image_base_url(),
+ 'imageInfoFlds': ' '.join(image_info_fields),
+ },
+ indent=2,
+ )
+
+ with ZipFile(path, 'w', compression=ZIP_DEFLATED) as archive:
+ archive.writestr('PortalFiles/PortalData.csv', portal_data)
+ archive.writestr('PortalFiles/flds.json', flds_json)
+ archive.writestr(
+ 'PortalFiles/PortalInstanceSetting.json',
+ portal_instance_settings,
+ )
+ archive.writestr('PortalFiles/SolrFldSchema.xml', solr_schema)
From 7969550868cd2ba314ca79603a8b03274c181104 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:32:57 -0500
Subject: [PATCH 04/24] feat(webportal): build geoc values
---
.../stored_queries/web_portal_export.py | 60 ++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index b3577b22464..e2e956940c8 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -278,6 +278,56 @@ def _serialize_portal_data(
return output.getvalue()
+def _find_geoc_field_indexes(
+ column_defs: list[tuple[str, str, str, dict[str, Any]]],
+) -> tuple[int | None, int | None, int | None, int | None]:
+ lat1_idx = None
+ lon1_idx = None
+ lat2_idx = None
+ lon2_idx = None
+
+ for index, (_, __, ___, metadata) in enumerate(column_defs):
+ if str(metadata.get('sptable', '')).lower() != 'locality':
+ continue
+
+ spfld = str(metadata.get('spfld', '')).lower()
+ if spfld == 'latitude1' and lat1_idx is None:
+ lat1_idx = index
+ elif spfld == 'longitude1' and lon1_idx is None:
+ lon1_idx = index
+ elif spfld == 'latitude2' and lat2_idx is None:
+ lat2_idx = index
+ elif spfld == 'longitude2' and lon2_idx is None:
+ lon2_idx = index
+
+ return lat1_idx, lon1_idx, lat2_idx, lon2_idx
+
+
+def _build_geoc_value(
+ cleaned_values: list[str],
+ lat1_idx: int | None,
+ lon1_idx: int | None,
+ lat2_idx: int | None,
+ lon2_idx: int | None,
+) -> str:
+ def _pair_value(lat_idx: int | None, lon_idx: int | None) -> str:
+ if lat_idx is None or lon_idx is None:
+ return ''
+ if lat_idx >= len(cleaned_values) or lon_idx >= len(cleaned_values):
+ return ''
+
+ latitude = cleaned_values[lat_idx].strip()
+ longitude = cleaned_values[lon_idx].strip()
+ if not latitude or not longitude:
+ return ''
+ return f'{latitude} {longitude}'
+
+ primary = _pair_value(lat1_idx, lon1_idx)
+ if primary:
+ return primary
+ return _pair_value(lat2_idx, lon2_idx)
+
+
def query_to_web_portal_zip(
session,
collection,
@@ -369,6 +419,7 @@ def query_to_web_portal_zip(
]
output_rows: list[list[str]] = []
+ geoc_lat1_idx, geoc_lon1_idx, geoc_lat2_idx, geoc_lon2_idx = _find_geoc_field_indexes(column_defs)
data_rows = query if isinstance(query, list) else list(query.yield_per(1))
portal_attachments = _portal_attachment_map(tableid, [row[0] for row in data_rows])
for row in data_rows:
@@ -378,7 +429,14 @@ def query_to_web_portal_zip(
cleaned_values = [_clean_cell(value) for value in display_values]
contents = '\t'.join(cleaned_values)
img = portal_attachments.get(str(raw_id), '')
- output_rows.append([spid, contents, img, '', *cleaned_values])
+ geoc = _build_geoc_value(
+ cleaned_values,
+ geoc_lat1_idx,
+ geoc_lon1_idx,
+ geoc_lat2_idx,
+ geoc_lon2_idx,
+ )
+ output_rows.append([spid, contents, img, geoc, *cleaned_values])
header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
portal_data = _serialize_portal_data(output_rows, header)
From f60e0d1ce8f4134926593cb7b7220a475562631a Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 23:02:49 -0500
Subject: [PATCH 05/24] fix(webportal): build proper metadata
adds some small comments here as well
---
.../stored_queries/web_portal_export.py | 123 ++++++++++++++++--
1 file changed, 110 insertions(+), 13 deletions(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index e2e956940c8..3490bc110f9 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -6,6 +6,7 @@
import uuid
from collections import defaultdict
from io import StringIO
+from itertools import zip_longest
from typing import Any, Callable
from urllib.parse import urlsplit, urlunsplit
from xml.sax.saxutils import escape
@@ -21,10 +22,17 @@
logger = logging.getLogger(__name__)
+# Filename used by asset server URLs that should be stripped from portal asset paths.
_ASSET_STORE_FILENAME = 'web_asset_store.xml'
def _build_portal_collection_name(collection) -> str:
+ """Return the portal collection name override if configured.
+
+ The export uses either the env var, settings value, or the actual
+ collection name.
+ """
+
return (
os.getenv('WEB_ATTACHMENT_COLLECTION')
or settings.WEB_ATTACHMENT_COLLECTION
@@ -33,6 +41,11 @@ def _build_portal_collection_name(collection) -> str:
def _strip_asset_store_xml(url: str) -> str:
+ """Remove the asset store file path from attachment URLs.
+
+ The portal export should expose a clean base asset path, not the
+ internal web asset store XML filename itself.
+ """
if not url:
return ''
@@ -49,6 +62,8 @@ def _strip_asset_store_xml(url: str) -> str:
def _build_portal_image_base_url() -> str:
+ """Build the base URL that portal image attachments should use."""
+
raw_url = (os.getenv('ASSET_SERVER_URL') or settings.WEB_ATTACHMENT_URL or '').strip()
return _strip_asset_store_xml(raw_url)
@@ -84,6 +99,11 @@ def _portal_attachment_entry(attachment) -> str:
def _portal_attachment_map(tableid: int, record_ids: list[Any]) -> dict[Any, str]:
+ """Collect attachment metadata for portal rows by record id.
+
+ The portal CSV stores image attachments as JSON strings in the "img"
+ column, so we prebuild mapping from record IDs to attachments.
+ """
if not record_ids:
return {}
@@ -123,9 +143,11 @@ def _dedupe_name(name: str, used_names: set[str]) -> str:
def _portal_solr_type(query_field, collection, user) -> str:
+ """Map a query field to the Solr field type used in portal metadata."""
fieldspec = query_field.fieldspec
field = fieldspec.get_field()
+ # Relationship fields are always emitted as strings for portal search.
if field is None or field.is_relationship:
return 'string'
@@ -167,6 +189,11 @@ def _portal_field_metadata(
collection,
user,
) -> dict[str, Any]:
+ """Build the metadata row for one exported portal field.
+
+ This metadata is written to flds.json and is consumed by the portal
+ frontend to build field definitions, sorting, display labels, and linkification.
+ """
fieldspec = query_field.fieldspec
table = fieldspec.table
field = fieldspec.get_field()
@@ -180,7 +207,10 @@ def _portal_field_metadata(
)
spfld = field.name if field is not None else table.idFieldName
- field_type = field.type if field is not None else 'java.lang.String'
+ if field is not None and field.is_relationship:
+ field_type = 'java.lang.String'
+ else:
+ field_type = field.type if field is not None else 'java.lang.String'
field_length = field.length if field is not None and field.length is not None else 255
solr_type = _portal_solr_type(query_field, collection, user)
is_linkified = solr_type == 'string' and field_type in ('java.lang.String', 'text')
@@ -207,6 +237,7 @@ def _portal_field_metadata(
def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any]:
+ """Create the metadata for portal fields."""
simplified = {
'colname': field_meta['colname'],
'solrname': field_meta['solrname'],
@@ -236,6 +267,10 @@ def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any
def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
+ """Create a minimal Solr schema for exported portal fields.
+
+ It defines the fields that the portal will index and search.
+ """
lines = [
'',
'',
@@ -271,6 +306,11 @@ def _serialize_portal_data(
rows: list[list[str]],
header: list[str],
) -> str:
+ """Output rows to CSV.
+
+ The web portal expects a standard CSV file with a header row followed by
+ one row per portal record.
+ """
output = StringIO()
writer = csv.writer(output)
writer.writerow(header)
@@ -281,12 +321,14 @@ def _serialize_portal_data(
def _find_geoc_field_indexes(
column_defs: list[tuple[str, str, str, dict[str, Any]]],
) -> tuple[int | None, int | None, int | None, int | None]:
+ """Locate latitude/longitude columns for geocoding the portal row."""
lat1_idx = None
lon1_idx = None
lat2_idx = None
lon2_idx = None
for index, (_, __, ___, metadata) in enumerate(column_defs):
+ # Only locality fields are relevant for geocoding.
if str(metadata.get('sptable', '')).lower() != 'locality':
continue
@@ -310,6 +352,10 @@ def _build_geoc_value(
lat2_idx: int | None,
lon2_idx: int | None,
) -> str:
+ """Build the 'geoc' column value for portal mapping.
+
+ The portal uses the first valid latitude/longitude pair it finds.
+ """
def _pair_value(lat_idx: int | None, lon_idx: int | None) -> str:
if lat_idx is None or lon_idx is None:
return ''
@@ -343,6 +389,11 @@ def query_to_web_portal_zip(
recordsetid=None,
distinct=False,
):
+ """Export a stored query as a web portal ZIP package.
+
+ This writes PortalData.csv, flds.json, PortalInstanceSetting.json, and
+ SolrFldSchema.xml into the destination ZIP file.
+ """
set_group_concat_max_len_fn(session.connection())
query, __ = build_query_fn(
session,
@@ -362,14 +413,32 @@ def query_to_web_portal_zip(
)
display_fields = [field_spec for field_spec in field_specs if field_spec.display]
- effective_captions = captions if captions else [
- (
- field_spec.fieldspec.get_field().name
- if field_spec.fieldspec.get_field() is not None
- else field_spec.fieldspec.table.name
- )
- for field_spec in display_fields
- ]
+
+ # Match the exported captions to the actual displayed fields. Captions may
+ # be provided for all query fields, but only display fields are exported.
+ if captions and isinstance(captions, list):
+ if len(captions) == len(display_fields):
+ effective_captions = captions
+ elif len(captions) == len(field_specs):
+ effective_captions = [
+ caption
+ for field_spec, caption in zip(field_specs, captions)
+ if field_spec.display
+ ]
+ else:
+ effective_captions = captions[: len(display_fields)]
+ else:
+ effective_captions = []
+
+ if len(effective_captions) != len(display_fields):
+ effective_captions = [
+ (
+ field_spec.fieldspec.get_field().name
+ if field_spec.fieldspec.get_field() is not None
+ else field_spec.fieldspec.table.name
+ )
+ for field_spec in display_fields
+ ]
schema_localization = _schema_localization_or_empty(collection)
@@ -377,7 +446,7 @@ def query_to_web_portal_zip(
used_solrnames: set[str] = {'spid'}
column_defs: list[tuple[str, str, str, dict[str, Any]]] = []
for index, (field_spec, caption) in enumerate(
- zip(display_fields, effective_captions, strict=False),
+ zip_longest(display_fields, effective_captions, fillvalue=''),
start=0,
):
trimmed_caption = str(caption).strip()
@@ -409,23 +478,49 @@ def query_to_web_portal_zip(
metadata['solrname'] = solrname
column_defs.append((colname, solrname, metadata['title'], metadata))
+ # Build the JSON metadata rows used by the portal's field definition store.
+ # The fixed fields spid and img are always included, plus one row per display field.
metadata_rows: list[dict[str, Any]] = [
- {'colname': 'spid', 'solrname': 'spid', 'solrtype': 'int'},
+ {
+ 'colname': 'spid',
+ 'solrname': 'spid',
+ 'solrtype': 'int',
+ 'title': 'spid',
+ 'linkify': False,
+ 'colidx': 0,
+ 'displaycolidx': 0,
+ },
*[
_simplify_portal_field_metadata(column_def[3])
for column_def in column_defs
],
- {'colname': 'img', 'solrname': 'img', 'solrtype': 'string', 'title': 'image'},
+ {
+ 'colname': 'img',
+ 'solrname': 'img',
+ 'solrtype': 'string',
+ 'title': 'image',
+ 'linkify': False,
+ 'colidx': len(column_defs) + 1,
+ 'displaycolidx': len(column_defs) + 1,
+ },
]
output_rows: list[list[str]] = []
geoc_lat1_idx, geoc_lon1_idx, geoc_lat2_idx, geoc_lon2_idx = _find_geoc_field_indexes(column_defs)
data_rows = query if isinstance(query, list) else list(query.yield_per(1))
portal_attachments = _portal_attachment_map(tableid, [row[0] for row in data_rows])
+ # The portal frontend expects each row to have the same number of values as the field metadata.
+ # If the query returns too few values, pad with empty strings; if it returns too many,
+ # truncate extras so the CSV header and row data remain aligned.
+ expected_values = len(column_defs)
for row in data_rows:
raw_id = row[0] if len(row) > 0 else ''
spid = str(uuid.uuid5(uuid.NAMESPACE_URL, f'{tableid}:{raw_id}'))
- display_values = row[1:] if len(row) > 1 else []
+ display_values = list(row[1:] if len(row) > 1 else [])
+ if len(display_values) < expected_values:
+ display_values.extend([''] * (expected_values - len(display_values)))
+ elif len(display_values) > expected_values:
+ display_values = display_values[:expected_values]
cleaned_values = [_clean_cell(value) for value in display_values]
contents = '\t'.join(cleaned_values)
img = portal_attachments.get(str(raw_id), '')
@@ -440,7 +535,9 @@ def query_to_web_portal_zip(
header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
portal_data = _serialize_portal_data(output_rows, header)
+ # flds.json drives the portal's field definitions and display metadata.
flds_json = json.dumps(metadata_rows, indent=2)
+ # SolrFldSchema.xml is a minimal schema fragment for the portal's Solr index.
solr_schema = _make_solr_schema_xml(metadata_rows)
image_info_fields = [column_def[1] for column_def in column_defs[:2]]
From f3e181ad824cf9fb170bab24c9a6821f9ef1ec70 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 23:21:37 -0500
Subject: [PATCH 06/24] fix(webportal): remove unnecessary +1
---
specifyweb/backend/stored_queries/web_portal_export.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 3490bc110f9..05ddbbbfe4c 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -500,8 +500,8 @@ def query_to_web_portal_zip(
'solrtype': 'string',
'title': 'image',
'linkify': False,
- 'colidx': len(column_defs) + 1,
- 'displaycolidx': len(column_defs) + 1,
+ 'colidx': len(column_defs),
+ 'displaycolidx': len(column_defs),
},
]
From 0a83119c7796657145bf120d3646576a15dfd33a Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 23:32:26 -0500
Subject: [PATCH 07/24] chore(webportal): improve naming
---
specifyweb/backend/stored_queries/urls.py | 2 +-
specifyweb/backend/stored_queries/views.py | 6 +++---
.../js_src/lib/components/Permissions/definitions.ts | 2 +-
.../js_src/lib/components/QueryBuilder/Export.tsx | 4 ++--
specifyweb/frontend/js_src/lib/localization/query.ts | 11 ++---------
5 files changed, 9 insertions(+), 16 deletions(-)
diff --git a/specifyweb/backend/stored_queries/urls.py b/specifyweb/backend/stored_queries/urls.py
index 43948a5fcc3..38dfc774607 100644
--- a/specifyweb/backend/stored_queries/urls.py
+++ b/specifyweb/backend/stored_queries/urls.py
@@ -7,7 +7,7 @@
path('ephemeral/', views.ephemeral),
path('exportcsv/', views.export_csv),
path('exportkml/', views.export_kml),
- path('exportwebportal/', views.export_web_portal),
+ path('exportwebportal/', views.export_to_web_portal),
path('make_recordset/', views.make_recordset),
path('merge_recordsets/', views.merge_recordsets),
path('return_loan_preps/', views.return_loan_preps),
diff --git a/specifyweb/backend/stored_queries/views.py b/specifyweb/backend/stored_queries/views.py
index 4a1312febd1..8fee235a908 100644
--- a/specifyweb/backend/stored_queries/views.py
+++ b/specifyweb/backend/stored_queries/views.py
@@ -34,7 +34,7 @@ class QueryBuilderPt(PermissionTarget):
execute = PermissionTargetAction()
export_csv = PermissionTargetAction()
export_kml = PermissionTargetAction()
- export_webportal = PermissionTargetAction()
+ export_to_web_portal = PermissionTargetAction()
create_recordset = PermissionTargetAction()
def value_from_request(field, get):
@@ -207,11 +207,11 @@ def export_kml(request):
@require_POST
@login_maybe_required
@never_cache
-def export_web_portal(request):
+def export_to_web_portal(request):
"""Executes and returns as ZIP the web portal export package for the query provided as JSON in the POST body."""
check_permission_targets(request.specify_collection.id, request.specify_user.id, [
QueryBuilderPt.execute,
- QueryBuilderPt.export_webportal,
+ QueryBuilderPt.export_to_web_portal,
])
try:
spquery = json.load(request)
diff --git a/specifyweb/frontend/js_src/lib/components/Permissions/definitions.ts b/specifyweb/frontend/js_src/lib/components/Permissions/definitions.ts
index f6c09ba396d..3dbb7a13275 100644
--- a/specifyweb/frontend/js_src/lib/components/Permissions/definitions.ts
+++ b/specifyweb/frontend/js_src/lib/components/Permissions/definitions.ts
@@ -65,7 +65,7 @@ export const operationPolicies = {
'execute',
'export_csv',
'export_kml',
- 'export_webportal',
+ 'export_to_web_portal',
'create_recordset',
],
'/workbench/dataset': [
diff --git a/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx b/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
index 0e500d1ffb8..6928aa5cf2e 100644
--- a/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
+++ b/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
@@ -194,7 +194,7 @@ export function QueryExportButtons({
)}
{containsResults &&
- hasPermission('/querybuilder/query', 'export_webportal') && (
+ hasPermission('/querybuilder/query', 'export_to_web_portal') && (
- {queryText.createWebPortalExport()}
+ {queryText.exportToWebPortal()}
)}
>
diff --git a/specifyweb/frontend/js_src/lib/localization/query.ts b/specifyweb/frontend/js_src/lib/localization/query.ts
index 485156f42ec..94f4aad470c 100644
--- a/specifyweb/frontend/js_src/lib/localization/query.ts
+++ b/specifyweb/frontend/js_src/lib/localization/query.ts
@@ -363,15 +363,8 @@ export const queryText = createDictionary({
'pt-br': 'Criar KML',
'hr-hr': 'Izradi KML',
},
- createWebPortalExport: {
- 'en-us': 'Create Web Portal Export',
- 'ru-ru': 'Создать экспорт веб-портала',
- 'es-es': 'Crear exportación de portal web',
- 'fr-fr': 'Créer une exportation pour le portail Web',
- 'uk-ua': 'Створити експорт для веб-порталу',
- 'de-ch': 'Webportal-Export erstellen',
- 'pt-br': 'Criar exportação para portal web',
- 'hr-hr': 'Izradi izvoz za web portal',
+ exportToWebPortal: {
+ 'en-us': 'Export to Web Portal',
},
createRecordSet: {
'en-us': 'Create {recordSetTable:string}',
From a2bbdf84b728c610c909ab20f016212ace276d9c Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 23:46:03 -0500
Subject: [PATCH 08/24] fix(webportal): hide image column
---
specifyweb/backend/stored_queries/web_portal_export.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 05ddbbbfe4c..d8714d3bcba 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -499,9 +499,6 @@ def query_to_web_portal_zip(
'solrname': 'img',
'solrtype': 'string',
'title': 'image',
- 'linkify': False,
- 'colidx': len(column_defs),
- 'displaycolidx': len(column_defs),
},
]
From c204bd04bcb52449e3bd142d08f6f114bb090a53 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Fri, 17 Apr 2026 23:48:26 -0500
Subject: [PATCH 09/24] fix(webportal): use title instead of filename
---
.../stored_queries/tests/test_views/test_export_web_portal.py | 3 ++-
specifyweb/backend/stored_queries/web_portal_export.py | 4 +++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
index 9430d0cc43b..3715bd05d5a 100644
--- a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
+++ b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
@@ -32,6 +32,7 @@ class FakeAttachment:
id = 5291
attachmentlocation = "sp6896513492722436219.att.JPG"
origfilename = "29432.JPG"
+ title = "Figure 1"
class FakeJoinRecord:
collectionobject_id = 123
@@ -65,5 +66,5 @@ def filter(self, **kwargs):
)
self.assertEqual(
result["123"],
- '[{AttachmentID:5291,AttachmentLocation:"sp6896513492722436219.att.JPG",Title:"29432.JPG"}]',
+ '[{AttachmentID:5291,AttachmentLocation:"sp6896513492722436219.att.JPG",Title:"Figure 1"}]',
)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index d8714d3bcba..7f694dee9fb 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -87,7 +87,9 @@ def _clean_portal_attachment_text(value: Any) -> str:
def _portal_attachment_entry(attachment) -> str:
attachment_location = _clean_portal_attachment_text(attachment.attachmentlocation)
title = _clean_portal_attachment_text(
- os.path.basename(attachment.origfilename or attachment.attachmentlocation or '')
+ os.path.basename(
+ attachment.title or attachment.origfilename or attachment.attachmentlocation or ''
+ )
)
return (
'{'
From 8fad6e4764ae294ea7b4d9c3ae8b5005d2b3187a Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sat, 18 Apr 2026 00:02:11 -0500
Subject: [PATCH 10/24] fix(webportal): fix missing import
---
specifyweb/backend/stored_queries/execution.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index 12c700277f3..2f666659b1d 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -35,7 +35,7 @@
from .query_construct import QueryConstruct
from .relative_date_utils import apply_absolute_date
from .field_spec_maps import apply_specify_user_name
-from .web_portal_export import query_to_web_portal_zip as _query_to_web_portal_zip
+from .web_portal_export import query_to_web_portal_zip as _query_to_web_portal_zip, _portal_attachment_map
from specifyweb.backend.notifications.models import Message
from specifyweb.backend.permissions.permissions import check_table_permissions
from specifyweb.specify.models import Loan, Loanpreparation, Loanreturnpreparation, Taxontreedef
From f8ea2d9d912d6ed9695c7a363949e343c1efda58 Mon Sep 17 00:00:00 2001
From: alec_dev
Date: Wed, 22 Apr 2026 12:00:46 -0500
Subject: [PATCH 11/24] Fix stored query parsing for nested formatted relations
---
.../backend/stored_queries/queryfieldspec.py | 14 +++++++++++++-
.../test_execution/test_field_specs_from_json.py | 11 +++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/queryfieldspec.py b/specifyweb/backend/stored_queries/queryfieldspec.py
index 4d10dc1f67b..e56d12e1f05 100644
--- a/specifyweb/backend/stored_queries/queryfieldspec.py
+++ b/specifyweb/backend/stored_queries/queryfieldspec.py
@@ -260,7 +260,19 @@ def from_stringid(cls, stringid: str, is_relation: bool):
field = node.get_field(extracted_fieldname, strict=False)
tree_rank_name = None
- if field is None and is_tree_table(node): # try finding tree only on tree tables
+ if (
+ field is None
+ and is_relation
+ and not is_tree_table(node)
+ and extracted_fieldname.lower() == table_name.lower() == node.name.lower()
+ ):
+ # Legacy relation stringids like "locality.locality" serialize the current related table as a formatted
+ # step, not as an actual field on that table.
+ # Preserve that sentinel so nested formatted relations keep the same row plan shape, without treating
+ # arbitrary unknown fields on non-tree tables as tree ranks.
+ tree_rank_name = extracted_fieldname
+ join_path.append(TreeRankQuery.create(tree_rank_name, node.name))
+ elif field is None and is_tree_table(node): # try finding tree only on tree tables
tree_rank_name, field = find_tree_and_field(node, extracted_fieldname)
if tree_rank_name:
tree_rank = TreeRankQuery.create(
diff --git a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
index 84c548d049e..52932c5fff9 100644
--- a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
+++ b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
@@ -118,3 +118,14 @@ def test_non_tree_table_does_not_parse_tree_rank(self):
self.assertFalse(fieldspec.contains_tree_rank())
self.assertIsNone(fieldspec.tree_rank)
self.assertIsNone(fieldspec.get_field())
+
+ def test_nested_formatted_relation_keeps_legacy_sentinel(self):
+ fieldspec = QueryFieldSpec.from_stringid("1,10,2.locality.locality", True)
+
+ self.assertTrue(fieldspec.contains_tree_rank())
+ self.assertEqual(fieldspec.tree_rank, "locality")
+ self.assertEqual(
+ [node.name for node in fieldspec.join_path],
+ ["collectingEvent", "locality", "locality"],
+ )
+ self.assertIsInstance(fieldspec.get_field(), TreeRankQuery)
From 82e26b1766ad042e577f6f984ab50ee96b5bc8d3 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Thu, 23 Apr 2026 11:20:04 -0500
Subject: [PATCH 12/24] feat(webportal): use target image info fields
---
.../stored_queries/web_portal_export.py | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 7f694dee9fb..0c6583c168f 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -304,6 +304,22 @@ def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
return "\n".join(lines) + "\n"
+def _image_info_fields_from_column_defs(
+ column_defs: list[tuple[str, str, str, dict[str, Any]]],
+) -> list[str]:
+ """Grab the best image info fields."""
+ image_info_fields: list[str] = []
+ target_spflds = {
+ 'catalognumber',
+ 'fieldnumber',
+ 'stationfieldnumber',
+ }
+ for _, solrname, _, metadata in column_defs:
+ if str(metadata.get('spfld', '')).lower() in target_spflds:
+ image_info_fields.append(solrname)
+ return image_info_fields
+
+
def _serialize_portal_data(
rows: list[list[str]],
header: list[str],
@@ -539,7 +555,7 @@ def query_to_web_portal_zip(
# SolrFldSchema.xml is a minimal schema fragment for the portal's Solr index.
solr_schema = _make_solr_schema_xml(metadata_rows)
- image_info_fields = [column_def[1] for column_def in column_defs[:2]]
+ image_info_fields = _image_info_fields_from_column_defs(column_defs)
portal_instance_settings = json.dumps(
{
'portalInstance': str(uuid.uuid4()),
From e05f02d5fff58be9091d2d86738805b5118d4904 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Thu, 23 Apr 2026 12:07:42 -0500
Subject: [PATCH 13/24] fix(webportal): remove name field from tree ranks
---
.../stored_queries/web_portal_export.py | 24 ++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 0c6583c168f..6a20339bd31 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -144,6 +144,28 @@ def _dedupe_name(name: str, used_names: set[str]) -> str:
return candidate
+def _normalize_portal_column_name(name: Any, query_field=None) -> str:
+ """Normalize portal column captions for export.
+
+ Removes names and full name field labels which are not useful
+ in the portal.
+ """
+ normalized = str(name if name is not None else '').strip()
+
+ if query_field is not None:
+ fieldspec = query_field.fieldspec
+ field = fieldspec.get_field()
+ if (
+ fieldspec.tree_rank is not None
+ and field is not None
+ and field.name in {'name', 'fullName'}
+ and ' - ' in normalized
+ ):
+ return normalized.rsplit(' - ', 1)[0].rstrip()
+
+ return normalized
+
+
def _portal_solr_type(query_field, collection, user) -> str:
"""Map a query field to the Solr field type used in portal metadata."""
fieldspec = query_field.fieldspec
@@ -467,7 +489,7 @@ def query_to_web_portal_zip(
zip_longest(display_fields, effective_captions, fillvalue=''),
start=0,
):
- trimmed_caption = str(caption).strip()
+ trimmed_caption = _normalize_portal_column_name(caption, field_spec)
base_name = trimmed_caption if trimmed_caption else f'column_{index + 1}'
colname = _dedupe_name(base_name, used_colnames)
From 356cd9ba1bedf416901a92587bd11382b65829dd Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Thu, 23 Apr 2026 12:24:07 -0500
Subject: [PATCH 14/24] fix(webportal): show only public attachments
---
specifyweb/backend/stored_queries/web_portal_export.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 6a20339bd31..43234003d3d 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -118,7 +118,12 @@ def _portal_attachment_map(tableid: int, record_ids: list[Any]) -> dict[Any, str
join_model = apps.get_model(base_model._meta.app_label, join_model_name)
record_id_field = f'{base_model.__name__.lower()}_id'
- join_records = join_model.objects.filter(**{f'{record_id_field}__in': record_ids}).select_related('attachment')
+ join_records = join_model.objects.filter(
+ **{
+ f'{record_id_field}__in': record_ids,
+ 'attachment__ispublic': True,
+ }
+ ).select_related('attachment')
attachment_entries_by_record_id: dict[str, list[str]] = defaultdict(list)
for join_record in join_records:
From 22e4cc010d575ba52d7275f17932fa142f4753c4 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Thu, 23 Apr 2026 12:35:01 -0500
Subject: [PATCH 15/24] feat(webportal): prevent duplicate records in export
---
.../test_views/test_export_web_portal.py | 2 +-
.../lib/components/QueryBuilder/Export.tsx | 43 ++++++++++++++++---
.../frontend/js_src/lib/localization/query.ts | 10 +++++
3 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
index 3715bd05d5a..d4d5b2a1bf7 100644
--- a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
+++ b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
@@ -62,7 +62,7 @@ def filter(self, **kwargs):
self.assertEqual(
fake_join_manager.filter_kwargs,
- {"collectionobject_id__in": [123]},
+ {"collectionobject_id__in": [123], "attachment__ispublic": True},
)
self.assertEqual(
result["123"],
diff --git a/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx b/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
index 6928aa5cf2e..5c8adee8f89 100644
--- a/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
+++ b/specifyweb/frontend/js_src/lib/components/QueryBuilder/Export.tsx
@@ -22,6 +22,7 @@ import { QueryButton } from './Components';
import type { QueryField } from './helpers';
import { hasLocalityColumns } from './helpers';
import type { QueryResultRow } from './Results';
+import { dialogIcons } from '../Atoms/Icons';
export function QueryExportButtons({
baseTableName,
@@ -47,9 +48,23 @@ export function QueryExportButtons({
const showConfirmation = (): boolean =>
fields.some(({ mappingPath }) => !mappingPathIsComplete(mappingPath));
- const [state, setState] = React.useState<'creating' | 'warning' | undefined>(
- undefined
- );
+ const [state, setState] = React.useState<
+ 'creating' | 'warning' | 'duplicateWarning' | undefined
+ >(undefined);
+
+ const hasDuplicateRecordIds = (): boolean => {
+ const seenIds = new Set();
+ return (
+ results.current?.some((row) => {
+ if (row === undefined) return false;
+ const id = row[0];
+ if (id === undefined || id === null) return false;
+ if (seenIds.has(id)) return true;
+ seenIds.add(id);
+ return false;
+ }) ?? false
+ );
+ };
function doQueryExport(
url: string,
@@ -155,6 +170,17 @@ export function QueryExportButtons({
>
{queryText.missingCoordinatesForKmlDescription()}
+ ) : state === 'duplicateWarning' ? (
+
) : undefined}
{containsResults &&
hasPermission('/querybuilder/query', 'export_csv') && (
@@ -198,14 +224,19 @@ export function QueryExportButtons({
+ onClick={(): void => {
+ if (hasDuplicateRecordIds()) {
+ setState('duplicateWarning');
+ return;
+ }
+
doQueryExport(
'/stored_query/exportwebportal/',
undefined,
undefined,
undefined
- )
- }
+ );
+ }}
>
{queryText.exportToWebPortal()}
diff --git a/specifyweb/frontend/js_src/lib/localization/query.ts b/specifyweb/frontend/js_src/lib/localization/query.ts
index 94f4aad470c..0bf0b78956e 100644
--- a/specifyweb/frontend/js_src/lib/localization/query.ts
+++ b/specifyweb/frontend/js_src/lib/localization/query.ts
@@ -366,6 +366,16 @@ export const queryText = createDictionary({
exportToWebPortal: {
'en-us': 'Export to Web Portal',
},
+ webPortalExportDuplicateRecordIds: {
+ 'en-us': 'Query contains duplicate record IDs',
+ },
+ webPortalExportDuplicateRecordIdsDescription: {
+ 'en-us':
+ 'Please modify the query so that it returns unique records for each returned row.',
+ },
+ webPortalExportDuplicateHint: {
+ 'en-us': '(Hint: You may need to add a condition for current determination or use an aggregator for preparations or collectors.)',
+ },
createRecordSet: {
'en-us': 'Create {recordSetTable:string}',
'ru-ru': 'Создать {recordSetTable:string}',
From 540a55da3fb0b9924c0725c2c3b468acdd222344 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Thu, 23 Apr 2026 17:51:07 +0000
Subject: [PATCH 16/24] Lint code with ESLint and Prettier
Triggered by 22e4cc010d575ba52d7275f17932fa142f4753c4 on branch refs/heads/issue-7606
---
specifyweb/frontend/js_src/lib/localization/query.ts | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/specifyweb/frontend/js_src/lib/localization/query.ts b/specifyweb/frontend/js_src/lib/localization/query.ts
index 0bf0b78956e..51c7947d981 100644
--- a/specifyweb/frontend/js_src/lib/localization/query.ts
+++ b/specifyweb/frontend/js_src/lib/localization/query.ts
@@ -374,7 +374,8 @@ export const queryText = createDictionary({
'Please modify the query so that it returns unique records for each returned row.',
},
webPortalExportDuplicateHint: {
- 'en-us': '(Hint: You may need to add a condition for current determination or use an aggregator for preparations or collectors.)',
+ 'en-us':
+ '(Hint: You may need to add a condition for current determination or use an aggregator for preparations or collectors.)',
},
createRecordSet: {
'en-us': 'Create {recordSetTable:string}',
From 4c29e59dd1d89a41ae1ad3ff13e2badef02b78be Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:37:48 -0500
Subject: [PATCH 17/24] feat: handle export failures with notifications
---
.../backend/stored_queries/execution.py | 59 ++++++++++-------
.../Notifications/NotificationRenderers.tsx | 65 ++++++++++++++++++-
.../js_src/lib/localization/notifications.ts | 30 +++++++++
3 files changed, 131 insertions(+), 23 deletions(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index 2f666659b1d..c1e68358db7 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -3,6 +3,7 @@
import logging
import os
import re
+import traceback
import uuid
from io import StringIO
from xml.sax.saxutils import escape
@@ -268,28 +269,42 @@ def do_export(spquery, collection, user, filename, exporttype, host):
with models.session_context() as session:
field_specs = fields_from_json(spquery['fields'])
- if exporttype == 'csv':
- query_to_csv(session, collection, user, tableid, field_specs, path,
- recordsetid=recordsetid,
- captions=spquery['captions'], strip_id=True,
- distinct=spquery['selectdistinct'], delimiter=spquery['delimiter'], bom=spquery['bom'])
- elif exporttype == 'kml':
- query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
- recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
- message_type = 'query-export-to-kml-complete'
- elif exporttype == 'webportal':
- query_to_web_portal_zip(
- session,
- collection,
- user,
- tableid,
- field_specs,
- path,
- spquery['captions'],
- recordsetid=recordsetid,
- distinct=spquery['selectdistinct'],
- )
- message_type = 'query-export-to-web-portal-complete'
+ try:
+ if exporttype == 'csv':
+ query_to_csv(session, collection, user, tableid, field_specs, path,
+ recordsetid=recordsetid,
+ captions=spquery['captions'], strip_id=True,
+ distinct=spquery['selectdistinct'], delimiter=spquery['delimiter'], bom=spquery['bom'])
+ message_type = 'query-export-to-csv-complete'
+ elif exporttype == 'kml':
+ query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
+ recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
+ message_type = 'query-export-to-kml-complete'
+ elif exporttype == 'webportal':
+ query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ spquery['captions'],
+ recordsetid=recordsetid,
+ distinct=spquery['selectdistinct'],
+ )
+ message_type = 'query-export-to-webportal-complete'
+ except Exception as e:
+ error_details = {
+ 'error': str(e),
+ 'traceback': traceback.format_exc(),
+ }
+ message_type = f'query-export-to-{exporttype}-failed'
+ Message.objects.create(user=user, content=json.dumps({
+ 'type': message_type,
+ 'file': filename,
+ 'error': error_details,
+ }))
+ return
Message.objects.create(user=user, content=json.dumps({
'type': message_type,
diff --git a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
index 5d4d5a3b97e..6e9587331b3 100644
--- a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
+++ b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
@@ -115,6 +115,48 @@ export const notificationRenderers: IR<
>
);
},
+ 'query-export-to-csv-failed'(notification) {
+ const errorPayload = notification.payload.error as unknown as
+ | { readonly error: string; readonly traceback: string }
+ | undefined;
+ return (
+ <>
+ {notificationsText.queryExportToCsvFailed()}
+ {errorPayload !== undefined && (
+
+ {notificationsText.exception()}
+
+ )}
+ >
+ );
+ },
+ 'query-export-to-kml-failed'(notification) {
+ const errorPayload = notification.payload.error as unknown as
+ | { readonly error: string; readonly traceback: string }
+ | undefined;
+ return (
+ <>
+ {notificationsText.queryExportToKmlFailed()}
+ {errorPayload !== undefined && (
+
+ {notificationsText.exception()}
+
+ )}
+ >
+ );
+ },
'query-export-to-kml-complete'(notification) {
return (
<>
@@ -131,7 +173,7 @@ export const notificationRenderers: IR<
>
);
},
- 'query-export-to-web-portal-complete'(notification) {
+ 'query-export-to-webportal-complete'(notification) {
return (
<>
{notificationsText.queryExportToWebPortalCompleted()}
@@ -147,6 +189,27 @@ export const notificationRenderers: IR<
>
);
},
+ 'query-export-to-webportal-failed'(notification) {
+ const errorPayload = notification.payload.error as unknown as
+ | { readonly error: string; readonly traceback: string }
+ | undefined;
+ return (
+ <>
+ {notificationsText.queryExportToWebPortalFailed()}
+ {errorPayload !== undefined && (
+
+ {notificationsText.exception()}
+
+ )}
+ >
+ );
+ },
'dataset-ownership-transferred'(notification) {
return (
transferred the ownership of the dataset to you.',
From 558cf4bc4fe75731b2c1b23bd7447fbe75f9db64 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:45:10 -0500
Subject: [PATCH 18/24] fix(queries): move fields parsing into try block
Makes sure any exceptions raised while parsing field specs are caught by the export's exception handling
---
specifyweb/backend/stored_queries/execution.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index c1e68358db7..66a108fddea 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -268,8 +268,8 @@ def do_export(spquery, collection, user, filename, exporttype, host):
message_type = "query-export-to-csv-complete"
with models.session_context() as session:
- field_specs = fields_from_json(spquery['fields'])
try:
+ field_specs = fields_from_json(spquery['fields'])
if exporttype == 'csv':
query_to_csv(session, collection, user, tableid, field_specs, path,
recordsetid=recordsetid,
From 714145810641f88d70aced349f74c077c3362b65 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:48:01 -0500
Subject: [PATCH 19/24] fix(queries): add error for unexpected export type
---
specifyweb/backend/stored_queries/execution.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index 66a108fddea..5963471e571 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -293,6 +293,9 @@ def do_export(spquery, collection, user, filename, exporttype, host):
distinct=spquery['selectdistinct'],
)
message_type = 'query-export-to-webportal-complete'
+ else:
+ # This should never happen because the export type is controlled by the backend, but just in case.
+ raise ValueError(f"Unsupported export type: {exporttype}")
except Exception as e:
error_details = {
'error': str(e),
From 14d4711425aeff9ea37f635da3d548ccd72af1f8 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:00:59 -0500
Subject: [PATCH 20/24] fix(webportal): hide traceback in prod
---
specifyweb/backend/stored_queries/execution.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index 5963471e571..ad208b3a936 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -299,7 +299,7 @@ def do_export(spquery, collection, user, filename, exporttype, host):
except Exception as e:
error_details = {
'error': str(e),
- 'traceback': traceback.format_exc(),
+ 'traceback': traceback.format_exc() if settings.DEBUG else None,
}
message_type = f'query-export-to-{exporttype}-failed'
Message.objects.create(user=user, content=json.dumps({
From 5c6459f688eeb1ba21e8c13f09dda0e372c7c95c Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:31:59 -0500
Subject: [PATCH 21/24] fix(queries): include exception context in prod
---
specifyweb/backend/stored_queries/execution.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index ad208b3a936..ed7157fbb2b 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -297,10 +297,14 @@ def do_export(spquery, collection, user, filename, exporttype, host):
# This should never happen because the export type is controlled by the backend, but just in case.
raise ValueError(f"Unsupported export type: {exporttype}")
except Exception as e:
- error_details = {
- 'error': str(e),
- 'traceback': traceback.format_exc() if settings.DEBUG else None,
- }
+ logger.exception(
+ "Export failed for %s: collection %s, file %s, type %s",
+ user, collection, filename, exporttype,
+ )
+ tb = traceback.format_exc()
+ error_details = {'error': str(e)}
+ if tb:
+ error_details['traceback'] = tb
message_type = f'query-export-to-{exporttype}-failed'
Message.objects.create(user=user, content=json.dumps({
'type': message_type,
From 7956b26c6e7ea35ec8f70f4a03057636cf874018 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:37:46 -0500
Subject: [PATCH 22/24] fix: check permissions after parsing query
This was a privilege escalation vulnerability. The `get_query` helper (used by `ephemeral` and `batch_edit`) already correctly parses JSON first, resolves the collection, then checks permissions. The three export functions were inconsistent and vulnerable.
---
specifyweb/backend/stored_queries/views.py | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/specifyweb/backend/stored_queries/views.py b/specifyweb/backend/stored_queries/views.py
index 8fee235a908..5d4616c27bf 100644
--- a/specifyweb/backend/stored_queries/views.py
+++ b/specifyweb/backend/stored_queries/views.py
@@ -151,8 +151,6 @@ def batch_edit(request):
@never_cache
def export_csv(request):
"""Executes and return as CSV the results of the query provided as JSON in the POST body."""
- check_permission_targets(request.specify_collection.id, request.specify_user.id, [
- QueryBuilderPt.execute, QueryBuilderPt.export_csv])
try:
spquery = json.load(request)
except ValueError as e:
@@ -165,6 +163,9 @@ def export_csv(request):
logger.debug('forcing collection to %s', collection.collectionname)
else:
collection = request.specify_collection
+
+ check_permission_targets(collection.id, request.specify_user.id, [
+ QueryBuilderPt.execute, QueryBuilderPt.export_csv])
file_name = format_export_file_name(spquery, "csv")
@@ -178,8 +179,6 @@ def export_csv(request):
@never_cache
def export_kml(request):
"""Executes and return as KML the results of the query provided as JSON in the POST body."""
- check_permission_targets(request.specify_collection.id, request.specify_user.id, [
- QueryBuilderPt.execute, QueryBuilderPt.export_kml])
try:
spquery = json.load(request)
except ValueError as e:
@@ -196,6 +195,9 @@ def export_kml(request):
else:
collection = request.specify_collection
+ check_permission_targets(collection.id, request.specify_user.id, [
+ QueryBuilderPt.execute, QueryBuilderPt.export_kml])
+
file_name = format_export_file_name(spquery, "kml")
thread = Thread(target=do_export, args=(spquery, collection, request.specify_user, file_name, 'kml', the_host))
@@ -209,10 +211,6 @@ def export_kml(request):
@never_cache
def export_to_web_portal(request):
"""Executes and returns as ZIP the web portal export package for the query provided as JSON in the POST body."""
- check_permission_targets(request.specify_collection.id, request.specify_user.id, [
- QueryBuilderPt.execute,
- QueryBuilderPt.export_to_web_portal,
- ])
try:
spquery = json.load(request)
except ValueError as e:
@@ -226,6 +224,11 @@ def export_to_web_portal(request):
else:
collection = request.specify_collection
+ check_permission_targets(collection.id, request.specify_user.id, [
+ QueryBuilderPt.execute,
+ QueryBuilderPt.export_to_web_portal,
+ ])
+
file_name = format_export_file_name(spquery, 'zip')
thread = Thread(
From 87c70a5b0e5dd3ee43d51fbfc78c6671d2326af3 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:42:10 -0500
Subject: [PATCH 23/24] fix(webportal): remove unneeded fields
---
specifyweb/backend/stored_queries/web_portal_export.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
index 43234003d3d..36eb114260e 100644
--- a/specifyweb/backend/stored_queries/web_portal_export.py
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -252,12 +252,10 @@ def _portal_field_metadata(
'type': field_type,
'width': field_length,
'concept': colname,
- 'concepturl': 'http://rs.tdwg.org/dwc/terms/',
'sptable': table_key,
'sptabletitle': table_localization.get('name', table.name),
'spfld': spfld,
'spfldtitle': item_localization.get('name', spfld),
- 'spdescription': item_localization.get('desc', spfld),
'colidx': index,
'linkify': is_linkified,
'advancedsearch': True,
From 51730a447c06c86440a5f617af69cca4a6c7e474 Mon Sep 17 00:00:00 2001
From: Grant Fitzsimmons <37256050+grantfitzsimmons@users.noreply.github.com>
Date: Mon, 27 Apr 2026 09:20:33 -0500
Subject: [PATCH 24/24] fix(queries): re-raise error
---
specifyweb/backend/stored_queries/execution.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index ed7157fbb2b..49b993fc6ca 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -311,7 +311,7 @@ def do_export(spquery, collection, user, filename, exporttype, host):
'file': filename,
'error': error_details,
}))
- return
+ raise
Message.objects.create(user=user, content=json.dumps({
'type': message_type,