Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
25612c2
feat(webportal): add export tool
grantfitzsimmons Apr 17, 2026
ee80b32
feat(webportal): add tests
grantfitzsimmons Apr 17, 2026
93bacdb
feat(webportal): split into separate file
grantfitzsimmons Apr 17, 2026
7969550
feat(webportal): build geoc values
grantfitzsimmons Apr 18, 2026
f60e0d1
fix(webportal): build proper metadata
grantfitzsimmons Apr 18, 2026
f3e181a
fix(webportal): remove unnecessary +1
grantfitzsimmons Apr 18, 2026
0a83119
chore(webportal): improve naming
grantfitzsimmons Apr 18, 2026
a2bbdf8
fix(webportal): hide image column
grantfitzsimmons Apr 18, 2026
c204bd0
fix(webportal): use title instead of filename
grantfitzsimmons Apr 18, 2026
8fad6e4
fix(webportal): fix missing import
grantfitzsimmons Apr 18, 2026
5642fe4
Merge branch 'main' into issue-7606
grantfitzsimmons Apr 21, 2026
158b0bc
Merge branch 'main' into issue-7606
grantfitzsimmons Apr 22, 2026
8d16a3b
Merge branch 'main' into issue-7606
acwhite211 Apr 22, 2026
f8ea2d9
Fix stored query parsing for nested formatted relations
acwhite211 Apr 22, 2026
82e26b1
feat(webportal): use target image info fields
grantfitzsimmons Apr 23, 2026
e05f02d
fix(webportal): remove name field from tree ranks
grantfitzsimmons Apr 23, 2026
356cd9b
fix(webportal): show only public attachments
grantfitzsimmons Apr 23, 2026
22e4cc0
feat(webportal): prevent duplicate records in export
grantfitzsimmons Apr 23, 2026
540a55d
Lint code with ESLint and Prettier
grantfitzsimmons Apr 23, 2026
4c29e59
feat: handle export failures with notifications
grantfitzsimmons Apr 27, 2026
558cf4b
fix(queries): move fields parsing into try block
grantfitzsimmons Apr 27, 2026
7141458
fix(queries): add error for unexpected export type
grantfitzsimmons Apr 27, 2026
14d4711
fix(webportal): hide traceback in prod
grantfitzsimmons Apr 27, 2026
5c6459f
fix(queries): include exception context in prod
grantfitzsimmons Apr 27, 2026
7956b26
fix: check permissions after parsing query
grantfitzsimmons Apr 27, 2026
87c70a5
fix(webportal): remove unneeded fields
grantfitzsimmons Apr 27, 2026
f742c10
Merge branch 'main' into issue-7606
grantfitzsimmons Apr 27, 2026
51730a4
fix(queries): re-raise error
grantfitzsimmons Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 85 additions & 13 deletions specifyweb/backend/stored_queries/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,31 @@
import logging
import os
import re
import traceback
import uuid
from io import StringIO
from xml.sax.saxutils import escape
from zipfile import ZIP_DEFLATED, ZipFile

from typing import Literal, NamedTuple
from typing import Any, Literal, NamedTuple
import xml.dom.minidom
from collections import namedtuple, defaultdict
from functools import reduce

from django.conf import settings
from django.apps import apps
from django.db import transaction
from django.utils import timezone
from specifyweb.backend.inheritance.api import cog_inheritance_post_query_processing, parent_inheritance_post_query_processing
from specifyweb.backend.inheritance.utils import get_cat_num_inheritance_setting, get_parent_cat_num_inheritance_setting
from specifyweb.backend.context.schema_localization import get_schema_localization
from specifyweb.backend.stored_queries.utils import log_sqlalchemy_query
from specifyweb.specify.utils.field_change_info import FieldChangeInfo
from specifyweb.specify.utils.uiformatters import CNNField, get_catalognumber_format, get_uiformatter
from sqlalchemy import sql, orm, func, text
from sqlalchemy.sql.expression import asc, desc, insert, literal

from specifyweb.specify.models_utils.models_by_table_id import get_table_id_by_model_name
from specifyweb.specify.models_utils.models_by_table_id import get_model_by_table_id, get_table_id_by_model_name
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
from specifyweb.backend.trees.utils import get_search_filters

Expand All @@ -28,13 +36,15 @@
from .query_construct import QueryConstruct
from .relative_date_utils import apply_absolute_date
from .field_spec_maps import apply_specify_user_name
from .web_portal_export import query_to_web_portal_zip as _query_to_web_portal_zip, _portal_attachment_map
from specifyweb.backend.notifications.models import Message
from specifyweb.backend.permissions.permissions import check_table_permissions
from specifyweb.specify.models import Loan, Loanpreparation, Loanreturnpreparation, Taxontreedef
from specifyweb.backend.workbench.upload.auditlog import auditlog
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
from specifyweb.backend.stored_queries.queryfield import fields_from_json, QUREYFIELD_SORT_T
from specifyweb.backend.stored_queries.queryfield import QueryField, fields_from_json, QUREYFIELD_SORT_T
from specifyweb.backend.stored_queries.synonomy import synonymize_tree_query

from specifyweb.specify.datamodel import datamodel, is_tree_table

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -258,22 +268,84 @@ def do_export(spquery, collection, user, filename, exporttype, host):
message_type = "query-export-to-csv-complete"

with models.session_context() as session:
field_specs = fields_from_json(spquery['fields'])
if exporttype == 'csv':
query_to_csv(session, collection, user, tableid, field_specs, path,
recordsetid=recordsetid,
captions=spquery['captions'], strip_id=True,
distinct=spquery['selectdistinct'], delimiter=spquery['delimiter'], bom=spquery['bom'])
elif exporttype == 'kml':
query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
message_type = 'query-export-to-kml-complete'
try:
field_specs = fields_from_json(spquery['fields'])
if exporttype == 'csv':
query_to_csv(session, collection, user, tableid, field_specs, path,
recordsetid=recordsetid,
captions=spquery['captions'], strip_id=True,
distinct=spquery['selectdistinct'], delimiter=spquery['delimiter'], bom=spquery['bom'])
message_type = 'query-export-to-csv-complete'
elif exporttype == 'kml':
query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
message_type = 'query-export-to-kml-complete'
elif exporttype == 'webportal':
query_to_web_portal_zip(
session,
collection,
user,
tableid,
field_specs,
path,
spquery['captions'],
recordsetid=recordsetid,
distinct=spquery['selectdistinct'],
)
message_type = 'query-export-to-webportal-complete'
Comment thread
coderabbitai[bot] marked this conversation as resolved.
else:
# This should never happen because the export type is controlled by the backend, but just in case.
raise ValueError(f"Unsupported export type: {exporttype}")
except Exception as e:
logger.exception(
"Export failed for %s: collection %s, file %s, type %s",
user, collection, filename, exporttype,
)
tb = traceback.format_exc()
error_details = {'error': str(e)}
if tb:
error_details['traceback'] = tb
message_type = f'query-export-to-{exporttype}-failed'
Message.objects.create(user=user, content=json.dumps({
'type': message_type,
'file': filename,
'error': error_details,
}))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
raise

Message.objects.create(user=user, content=json.dumps({
'type': message_type,
'file': filename,
}))


def query_to_web_portal_zip(
session,
collection,
user,
tableid,
field_specs,
path,
captions,
recordsetid=None,
distinct=False,
):
return _query_to_web_portal_zip(
session,
collection,
user,
tableid,
field_specs,
path,
captions,
build_query_fn=build_query,
build_query_props_cls=BuildQueryProps,
apply_special_post_query_processing_fn=apply_special_post_query_processing,
set_group_concat_max_len_fn=set_group_concat_max_len,
recordsetid=recordsetid,
distinct=distinct,
)

# def stored_query_to_csv(query_id, collection, user, path):
# """Executes a query from the Spquery table with the given id and send
# the results to a CSV file at path.
Expand Down
15 changes: 14 additions & 1 deletion specifyweb/backend/stored_queries/queryfieldspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from . import models
from .query_ops import QueryOps
from specifyweb.specify.models_utils.load_datamodel import Table, Field, Relationship
from specifyweb.specify.datamodel import is_tree_table

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -259,7 +260,19 @@ def from_stringid(cls, stringid: str, is_relation: bool):
field = node.get_field(extracted_fieldname, strict=False)

tree_rank_name = None
if field is None: # try finding tree
if (
field is None
and is_relation
and not is_tree_table(node)
and extracted_fieldname.lower() == table_name.lower() == node.name.lower()
):
# Legacy relation stringids like "locality.locality" serialize the current related table as a formatted
# step, not as an actual field on that table.
# Preserve that sentinel so nested formatted relations keep the same row plan shape, without treating
# arbitrary unknown fields on non-tree tables as tree ranks.
tree_rank_name = extracted_fieldname
join_path.append(TreeRankQuery.create(tree_rank_name, node.name))
elif field is None and is_tree_table(node): # try finding tree only on tree tables
Comment thread
grantfitzsimmons marked this conversation as resolved.
tree_rank_name, field = find_tree_and_field(node, extracted_fieldname)
if tree_rank_name:
tree_rank = TreeRankQuery.create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,24 @@ def test_static_field_specs(self): # pragma: no cover
# generate_fields_test_str(query_fields, "static_simple_field_spec")

self.assertEqual(static_simple_field_spec, query_fields)

def test_non_tree_table_does_not_parse_tree_rank(self):
table = datamodel.get_table_strict("CollectionObject")
stringid = f"{table.tableId}.collectionobject.NotARealField"

fieldspec = QueryFieldSpec.from_stringid(stringid, False)

self.assertFalse(fieldspec.contains_tree_rank())
self.assertIsNone(fieldspec.tree_rank)
self.assertIsNone(fieldspec.get_field())

def test_nested_formatted_relation_keeps_legacy_sentinel(self):
fieldspec = QueryFieldSpec.from_stringid("1,10,2.locality.locality", True)

self.assertTrue(fieldspec.contains_tree_rank())
self.assertEqual(fieldspec.tree_rank, "locality")
self.assertEqual(
[node.name for node in fieldspec.join_path],
["collectingEvent", "locality", "locality"],
)
self.assertIsInstance(fieldspec.get_field(), TreeRankQuery)
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from unittest.mock import MagicMock, Mock, patch

from django.test import Client

from specifyweb.backend.stored_queries.tests.tests import SQLAlchemySetup

from .raw_query import get_simple_query


class TestExportWebPortal(SQLAlchemySetup):
@patch("specifyweb.backend.stored_queries.views.Thread")
def test_export(self, thread: Mock):
c = Client()
c.force_login(self.specifyuser)

response = c.post(
"/stored_query/exportwebportal/",
get_simple_query(self.specifyuser),
content_type="application/json",
)

self._assertStatusCodeEqual(response, 200)
thread.assert_called_once()
self.assertTrue(thread.return_value.daemon)
thread.return_value.start.assert_called_once()
self._assertContentEqual(response, "OK")

def test_portal_attachment_map(self):
from specifyweb.backend.stored_queries import execution

class FakeAttachment:
id = 5291
attachmentlocation = "sp6896513492722436219.att.JPG"
origfilename = "29432.JPG"
title = "Figure 1"

class FakeJoinRecord:
collectionobject_id = 123
attachment = FakeAttachment()

class FakeJoinQuery:
def select_related(self, *_args, **_kwargs):
return [FakeJoinRecord()]

class FakeJoinManager:
def __init__(self):
self.filter_kwargs = None

def filter(self, **kwargs):
self.filter_kwargs = kwargs
return FakeJoinQuery()

fake_join_manager = FakeJoinManager()
fake_base_model = type("Collectionobject", (), {"_meta": MagicMock(app_label="specifyweb")})
fake_table = MagicMock()
fake_table.attachments_field = MagicMock()

with patch.object(execution.datamodel, "get_table_by_id", return_value=fake_table), patch.object(
execution, "get_model_by_table_id", return_value=fake_base_model
), patch.object(execution.apps, "get_model", return_value=type("Collectionobjectattachment", (), {"objects": fake_join_manager})):
result = execution._portal_attachment_map(1, [123])

self.assertEqual(
fake_join_manager.filter_kwargs,
{"collectionobject_id__in": [123], "attachment__ispublic": True},
)
self.assertEqual(
result["123"],
'[{AttachmentID:5291,AttachmentLocation:"sp6896513492722436219.att.JPG",Title:"Figure 1"}]',
)
1 change: 1 addition & 0 deletions specifyweb/backend/stored_queries/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
path('ephemeral/', views.ephemeral),
path('exportcsv/', views.export_csv),
path('exportkml/', views.export_kml),
path('exportwebportal/', views.export_to_web_portal),
path('make_recordset/', views.make_recordset),
path('merge_recordsets/', views.merge_recordsets),
path('return_loan_preps/', views.return_loan_preps),
Expand Down
45 changes: 41 additions & 4 deletions specifyweb/backend/stored_queries/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class QueryBuilderPt(PermissionTarget):
execute = PermissionTargetAction()
export_csv = PermissionTargetAction()
export_kml = PermissionTargetAction()
export_to_web_portal = PermissionTargetAction()
create_recordset = PermissionTargetAction()

def value_from_request(field, get):
Expand Down Expand Up @@ -150,8 +151,6 @@ def batch_edit(request):
@never_cache
def export_csv(request):
"""Executes and return as CSV the results of the query provided as JSON in the POST body."""
check_permission_targets(request.specify_collection.id, request.specify_user.id, [
QueryBuilderPt.execute, QueryBuilderPt.export_csv])
try:
spquery = json.load(request)
except ValueError as e:
Expand All @@ -164,6 +163,9 @@ def export_csv(request):
logger.debug('forcing collection to %s', collection.collectionname)
else:
collection = request.specify_collection

check_permission_targets(collection.id, request.specify_user.id, [
QueryBuilderPt.execute, QueryBuilderPt.export_csv])
Comment on lines +167 to +168
Copy link
Copy Markdown
Contributor

@melton-jason melton-jason Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually, aren't we exporting the CSV in the collection the user is signed into, and running the query in the Collection specified by the query?

Say the user was signed into Collection A and exported a Query that was run in Collection B.
It's clear we should at least check that the user has the correct Query permissions in Collection B, but which collection should we check Export CSV permissions against?

This is definitely a question regarding the scope of the permission. I can see arguments for all three sides:

  • The query is being executed in Collection B, regardless of what collection the user is signed in to, so we only need to check Export CSV permission in Collection B
  • The action was initiated and is being performed in Collection A by the user, so we should check whether they have Export permissions in Collection A
  • Both of the above apply, so we should check Export permissions in both collections

From a strictly data-origin standpoint, I think it probably makes the most sense to keep this as-is: that is, scope the permission check to the Collection in which the query is scoped to.
Do we think that approach would be the most intuitive interpretation of the permission? (this is not a rhetorical question, down to hear what other people think! 😅)


file_name = format_export_file_name(spquery, "csv")

Expand All @@ -177,8 +179,6 @@ def export_csv(request):
@never_cache
def export_kml(request):
"""Executes and return as KML the results of the query provided as JSON in the POST body."""
check_permission_targets(request.specify_collection.id, request.specify_user.id, [
QueryBuilderPt.execute, QueryBuilderPt.export_kml])
try:
spquery = json.load(request)
except ValueError as e:
Expand All @@ -195,13 +195,50 @@ def export_kml(request):
else:
collection = request.specify_collection

check_permission_targets(collection.id, request.specify_user.id, [
QueryBuilderPt.execute, QueryBuilderPt.export_kml])

file_name = format_export_file_name(spquery, "kml")

thread = Thread(target=do_export, args=(spquery, collection, request.specify_user, file_name, 'kml', the_host))
thread.daemon = True
thread.start()
return HttpResponse('OK', content_type='text/plain')


@require_POST
@login_maybe_required
@never_cache
def export_to_web_portal(request):
"""Executes and returns as ZIP the web portal export package for the query provided as JSON in the POST body."""
try:
spquery = json.load(request)
except ValueError as e:
return HttpResponseBadRequest(e)

logger.info('export web portal query: %s', spquery)

if 'collectionid' in spquery:
collection = Collection.objects.get(pk=spquery['collectionid'])
logger.debug('forcing collection to %s', collection.collectionname)
else:
collection = request.specify_collection

check_permission_targets(collection.id, request.specify_user.id, [
QueryBuilderPt.execute,
QueryBuilderPt.export_to_web_portal,
])

file_name = format_export_file_name(spquery, 'zip')

thread = Thread(
target=do_export,
args=(spquery, collection, request.specify_user, file_name, 'webportal', None),
)
thread.daemon = True
thread.start()
Comment on lines +234 to +239
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(optional, performance)

I know we've been using threads for Query exports (CSV, KML, and now WebPortal).
These queries can use a significant amount of simultaneous memory resources on the server. As noted in #7858 and its sub-issues (particularly #7875 in this case), this high allocation of resources can persist in the main Specify process until memory is released (which oftentimes will be when the process is stopped or killed).

Ideally, these potentially very memory-intensive operations should be performed in a separate process. That way, as soon as the operation is completed the allocated memory will be returned to the operating system and reused (there is some memory overhead when spawning a new process, which for Specify will largely be ~82 MiB for its setup and global structures as of v7.11.4).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@melton-jason Maybe I can open a new issue for this– what is your suggestion for implementation?

return HttpResponse('OK', content_type='text/plain')

@require_POST
@login_maybe_required
@never_cache
Expand Down
Loading
Loading