Skip to content
Open
5 changes: 4 additions & 1 deletion specifyweb/backend/workbench/upload/column_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from specifyweb.specify.utils.uiformatters import ScopedFormatter

MatchBehavior = Literal["ignoreWhenBlank", "ignoreAlways", "ignoreNever"]
DisambiguationBehavior = Literal["ask", "pickFirst"]

# A single row in the workbench. Maps column names to values in the row
Row = dict[str, str]
Expand All @@ -14,9 +15,10 @@ class ColumnOptions(NamedTuple):
matchBehavior: MatchBehavior
nullAllowed: bool
default: str | None
disambiguationBehavior: DisambiguationBehavior

def to_json(self) -> dict | str:
if self.matchBehavior == "ignoreNever" and self.nullAllowed and self.default is None:
if self.matchBehavior == "ignoreNever" and self.nullAllowed and self.default is None and self.disambiguationBehavior == "ask":
return self.column

return dict(self._asdict())
Expand All @@ -26,6 +28,7 @@ class ExtendedColumnOptions(NamedTuple):
matchBehavior: MatchBehavior
nullAllowed: bool
default: str | None
disambiguationBehavior: DisambiguationBehavior
uiformatter: ScopedFormatter | None
schemaitem: Any
picklist: Any
Expand Down
29 changes: 16 additions & 13 deletions specifyweb/backend/workbench/upload/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from specifyweb.specify.datamodel import datamodel
from specifyweb.backend.workbench.upload.predicates import filter_match_key
from .column_options import ExtendedColumnOptions
from .column_options import DisambiguationBehavior, ExtendedColumnOptions
from specifyweb.backend.workbench.upload.parse import parse_field, is_latlong, ParseSucess, ParseFailure

Row = dict[str, str]
Expand Down Expand Up @@ -38,17 +38,20 @@ class ParseResult(NamedTuple):
add_to_picklist: PicklistAddition | None
column: str
missing_required: str | None
disambiguation_behavior: DisambiguationBehavior

@classmethod
def from_parse_success(cls, ps: ParseSucess, filter_on: Filter, add_to_picklist: PicklistAddition | None, column: str, missing_required: str | None):
return cls(filter_on=filter_on, upload=ps.payload, add_to_picklist=add_to_picklist, column=column, missing_required=missing_required)
def from_parse_success(cls, ps: ParseSucess, filter_on: Filter, add_to_picklist: PicklistAddition | None, column: str,
missing_required: str | None, disambiguation_behavior: DisambiguationBehavior):
return cls(filter_on=filter_on, upload=ps.payload, add_to_picklist=add_to_picklist, column=column,
missing_required=missing_required, disambiguation_behavior=disambiguation_behavior)

def match_key(self) -> str:
return filter_match_key(self.filter_on)


def filter_and_upload(f: Filter, column: str) -> ParseResult:
return ParseResult(f, f, None, column, None)
def filter_and_upload(f: Filter, column: str, disambiguation_behavior: DisambiguationBehavior = 'ask') -> ParseResult:
return ParseResult(f, f, None, column, None, disambiguation_behavior)


def parse_many(tablename: str, mapping: dict[str, ExtendedColumnOptions], row: Row) -> tuple[list[ParseResult], list[WorkBenchParseFailure]]:
Expand Down Expand Up @@ -76,7 +79,7 @@ def parse_value(tablename: str, fieldname: str, value_in: str, colopts: Extended
None
)
result = ParseResult({fieldname: None}, {fieldname: None},
None, colopts.column, missing_required)
None, colopts.column, missing_required, colopts.disambiguationBehavior)
else:
result = _parse(tablename, fieldname,
colopts, colopts.default)
Expand Down Expand Up @@ -105,7 +108,7 @@ def _parse(tablename: str, fieldname: str, colopts: ExtendedColumnOptions, value
field = table.get_field_strict(fieldname)

if colopts.picklist:
result = parse_with_picklist(colopts.picklist, fieldname, value, colopts.column)
result = parse_with_picklist(colopts.picklist, fieldname, value, colopts.column, colopts.disambiguationBehavior,)
if result is not None:
if isinstance(result, ParseResult) and hasattr(field, 'length') and field.length is not None and len(result.upload[fieldname]) > field.length:
return WorkBenchParseFailure(
Expand All @@ -123,19 +126,19 @@ def _parse(tablename: str, fieldname: str, colopts: ExtendedColumnOptions, value
if is_latlong(table, field) and isinstance(parsed, ParseSucess):
coord_text_field = field.name.replace('itude', '') + 'text' if field.name else ''
filter_on = {coord_text_field: parsed.payload[coord_text_field]}
return ParseResult.from_parse_success(parsed, filter_on, None, colopts.column, None)
return ParseResult.from_parse_success(parsed, filter_on, None, colopts.column, None, colopts.disambiguationBehavior)

if isinstance(parsed, ParseFailure):
return WorkBenchParseFailure.from_parse_failure(parsed, colopts.column)
else:
return ParseResult.from_parse_success(parsed, parsed.payload, None, colopts.column, None)
return ParseResult.from_parse_success(parsed, parsed.payload, None, colopts.column, None, colopts.disambiguationBehavior)


def parse_with_picklist(picklist, fieldname: str, value: str, column: str) -> ParseResult | WorkBenchParseFailure | None:
def parse_with_picklist(picklist, fieldname: str, value: str, column: str, disambiguation_behavior: DisambiguationBehavior) -> ParseResult | WorkBenchParseFailure | None:
if picklist.type == 0: # items from picklistitems table
try:
item = picklist.picklistitems.get(title=value)
return filter_and_upload({fieldname: item.value}, column)
return filter_and_upload({fieldname: item.value}, column, disambiguation_behavior)
except ObjectDoesNotExist:
if picklist.readonly:
return WorkBenchParseFailure(
Expand All @@ -144,11 +147,11 @@ def parse_with_picklist(picklist, fieldname: str, value: str, column: str) -> Pa
column
)
else:
return filter_and_upload({fieldname: value}, column)._replace(
return filter_and_upload({fieldname: value}, column, disambiguation_behavior)._replace(
add_to_picklist=PicklistAddition(
picklist=picklist, column=column, value=value)
)
return filter_and_upload({fieldname: value})
return filter_and_upload({fieldname: value}, column, disambiguation_behavior)

elif picklist.type == 1: # items from rows in some table
# we ignore this type of picklist because it is primarily used to choose many-to-one's on forms
Expand Down
1 change: 1 addition & 0 deletions specifyweb/backend/workbench/upload/scoping.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def extend_columnoptions(
matchBehavior=colopts.matchBehavior,
nullAllowed=colopts.nullAllowed,
default=colopts.default,
disambiguationBehavior=colopts.disambiguationBehavior,
schemaitem=schemaitem,
# Formatters are "scoped" here, that is, all they need is a value coming directly from the row.
uiformatter=(None if scoped_formatter is None else CustomRepr(scoped_formatter, friendly_repr)),
Expand Down
94 changes: 78 additions & 16 deletions specifyweb/backend/workbench/upload/tests/testparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
WorkBenchParseFailure
from ..upload_results_schema import schema as upload_results_schema
from ..upload_table import UploadTable
from specifyweb.specify import models

from django.conf import settings

Expand Down Expand Up @@ -444,7 +445,7 @@ def test_tree_cols_with_ignoreWhenBlank(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -466,7 +467,7 @@ def test_higher_tree_cols_with_ignoreWhenBlank(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None)),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask")),
Subspecies=dict(name=parse_column_options('Subspecies')),
)
)
Expand All @@ -488,7 +489,7 @@ def test_tree_cols_with_ignoreNever(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -508,7 +509,7 @@ def test_tree_cols_with_required(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=False, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=False, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -530,7 +531,7 @@ def test_tree_cols_with_ignoreAlways(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreAlways", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreAlways", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -551,7 +552,7 @@ def test_wbcols_with_ignoreWhenBlank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -577,7 +578,7 @@ def test_wbcols_with_ignoreWhenBlank_and_default(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -609,7 +610,7 @@ def test_wbcols_with_ignoreNever(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -634,7 +635,7 @@ def test_wbcols_with_ignoreAlways(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -662,7 +663,7 @@ def test_wbcols_with_default(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -690,7 +691,7 @@ def test_wbcols_with_default_matching(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -721,7 +722,7 @@ def test_wbcols_with_default_and_null_disallowed(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -750,7 +751,7 @@ def test_wbcols_with_default_blank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=""),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -780,7 +781,7 @@ def test_wbcols_with_null_disallowed(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -805,7 +806,7 @@ def test_wbcols_with_null_disallowed_and_ignoreWhenBlank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -834,7 +835,7 @@ def test_wbcols_with_null_disallowed_and_ignoreAlways(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -857,3 +858,64 @@ def test_wbcols_with_null_disallowed_and_ignoreAlways(self) -> None:
self.assertIsInstance(results[2].record_result, Uploaded)
self.assertIsInstance(results[3].record_result, Matched)
self.assertIsInstance(results[4].record_result, Matched)

class DisambiguationBehaviorTests(UploadTestsBase):
def test_pickFirst_disambiguation_behavior(self) -> None:
# Upload some agents first
agent_plan = UploadTable(
name='Agent',
wbcols={
'firstname': parse_column_options('firstname'),
'lastname': parse_column_options('lastname'),
'email': parse_column_options('email'),
},
overrideScope=None,
static={},
toOne={},
toMany={}
)
agent_data = [
{'lastname': 'Doe', 'firstname': 'John', 'email': '0'},
{'lastname': 'Doe', 'firstname': 'John', 'email': '1'},
]

results = do_upload(self.collection, agent_data, agent_plan, self.agent.id)
for result in results:
validate([result.to_json()], upload_results_schema, cls=Draft7Validator)

self.assertIsInstance(results[0].record_result, Uploaded)
self.assertIsInstance(results[1].record_result, Uploaded)

# Try to add some Collection Objects with ambiguous catalogers
plan = UploadTable(
name='Collectionobject',
wbcols={
'catalognumber': parse_column_options('Cat #'),
},
overrideScope=None,
static={},
toOne={
'cataloger': UploadTable(
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="pickFirst"),
},
overrideScope=None,
static={},
toOne={},
toMany={},
)
},
toMany={}
)
data = [
{'Cat #': '124', 'lastname': 'Doe', 'firstname': 'John'},
{'Cat #': '125', 'lastname': 'Doe', 'firstname': 'Jane'}
]
results = do_upload(self.collection, data, plan, self.agent.id)
for result in results:
validate([result.to_json()], upload_results_schema, cls=Draft7Validator)

self.assertIsInstance(results[0].toOne['cataloger'].record_result, Matched, "Record was not disambiguated automatically despite having disambiguationBehavior='pickFirst'.")
self.assertIsInstance(results[1].toOne['cataloger'].record_result, Uploaded)
6 changes: 4 additions & 2 deletions specifyweb/backend/workbench/upload/upload_attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,17 @@ def add_attachments_to_plan(
column=f"_ATTACHMENT_ORDINAL_{index}",
matchBehavior="ignoreNever",
nullAllowed=True,
default="0"
default="0",
disambiguationBehavior="ask"
)
attackment_columns = {}
for field in attachment_fields_to_copy:
attackment_columns[field] = ColumnOptions(
column=f"_ATTACHMENT_{field.upper()}_{index}",
matchBehavior="ignoreNever",
nullAllowed=True,
default=attachment_field_default(field)
default=attachment_field_default(field),
disambiguationBehavior="ask"
)
attachment_uploadable = UploadTable(
name="Attachment",
Expand Down
Loading
Loading