diff --git a/resources/ResourceInterface.ts b/resources/ResourceInterface.ts index 448d7bc61..6316f1516 100644 --- a/resources/ResourceInterface.ts +++ b/resources/ResourceInterface.ts @@ -134,8 +134,13 @@ export type Comparator = | 'ends_with' | 'eq' | 'equals' + | 'gt' + | 'ge' + | 'lt' + | 'le' | 'greater_than' | 'greater_than_equal' + | 'in' | 'less_than' | 'less_than_equal' | 'ne' @@ -149,8 +154,13 @@ interface TypedDirectCondition | string | string[]; comparator?: Comparator; search_type?: Comparator; - value?: Record[Property]; - search_value?: Record[Property]; + value?: Record[Property] | Record[Property][]; + search_value?: Record[Property] | Record[Property][]; + /** + * If true, the condition is negated. Phase 1: filter-only — forces a + * full scan unless paired with another indexed condition. + */ + negated?: boolean; } interface ConditionGroup { diff --git a/resources/Table.ts b/resources/Table.ts index 795d4913c..e28408c44 100644 --- a/resources/Table.ts +++ b/resources/Table.ts @@ -38,6 +38,7 @@ import { flattenKey, COERCIBLE_OPERATORS, executeConditions, + resolveComparator, } from './search.ts'; import { logger } from '../utility/logging/logger.ts'; import { Addition, assignTrackedAccessors, updateAndFreeze, hasChanges, GenericTrackedObject } from './tracked.ts'; @@ -2056,15 +2057,37 @@ export function makeTable(options) { condition.conditions = prepareConditions(condition.conditions, condition.operator); continue; } + // Normalize `not_X` comparator forms passed in via structured queries. + // The REST parser already does this, but programmatic callers may + // pass `not_in`, `not_starts_with`, etc. directly. + if (condition.comparator) { + const resolved = resolveComparator(condition.comparator); + if (resolved.negated) { + condition.comparator = resolved.comparator; + condition.negated = true; + } + } const attribute_name = condition[0] ?? condition.attribute; - const attribute = attribute_name == null ? primaryKeyAttribute : findAttribute(attributes, attribute_name); + let attribute = attribute_name == null ? primaryKeyAttribute : findAttribute(attributes, attribute_name); + if (!attribute && Array.isArray(attribute_name) && attribute_name.length > 1) { + // Plain JSON nested path: the leaf may not be declared in the + // schema. Fall back to the root attribute so we can validate + // existence without requiring the inner structure to be typed. + attribute = findAttribute(attributes, attribute_name[0]); + } if (!attribute) { if (attribute_name != null && !target.allowConditionsOnDynamicAttributes) throw handleHDBError(new Error(), `${attribute_name} is not a defined attribute`, 404); } else if (attribute.type || COERCIBLE_OPERATORS[condition.comparator]) { - // Do auto-coercion or coercion as required by the attribute type - if (condition[1] === undefined) condition.value = coerceTypedValues(condition.value, attribute); - else condition[1] = coerceTypedValues(condition[1], attribute); + // Do auto-coercion or coercion as required by the attribute type. + // Skipped for nested paths into plain JSON — the root attribute's + // type is not the leaf type, so coercion would be wrong. + const isNestedPathRoot = + Array.isArray(attribute_name) && attribute_name.length > 1 && !attribute.relationship; + if (!isNestedPathRoot) { + if (condition[1] === undefined) condition.value = coerceTypedValues(condition.value, attribute); + else condition[1] = coerceTypedValues(condition[1], attribute); + } } if (condition.chainedConditions) { if (condition.chainedConditions.length === 1 && (!condition.operator || condition.operator == 'and')) { @@ -2572,12 +2595,22 @@ export function makeTable(options) { } else { value = record[attribute_name]; if (value && typeof value === 'object' && attribute_name !== attribute) { - value = TableResource.transformEntryForSelect( + const subTransform = TableResource.transformEntryForSelect( attribute.select || attribute, context, readTxn, null - )({ value }); + ); + // Plain JSON nested values: arrays project per-element so that + // `select: [{ name: 'addresses', select: ['city'] }]` returns + // `addresses: [{ city }, { city }]` rather than a single object. + if (Array.isArray(value)) { + value = value.map((item) => + item && typeof item === 'object' ? subTransform({ value: item }) : item + ); + } else if (!(value instanceof Date)) { + value = subTransform({ value }); + } } } callback(value, attribute_name); diff --git a/resources/search.ts b/resources/search.ts index 4040b1cef..45dba5980 100644 --- a/resources/search.ts +++ b/resources/search.ts @@ -13,6 +13,16 @@ const OPEN_RANGE_ESTIMATE = 0.3; const BETWEEN_ESTIMATE = 0.1; const STARTS_WITH_ESTIMATE = 0.05; +// Synthetic Table-like object used to recurse through plain JSON nested-path +// segments. It has no attributes, indices, or property resolvers, so the +// recursive filterByType call dispatches purely on the comparator. +const NESTED_PATH_TABLE = Object.freeze({ + attributes: [], + indices: {}, + primaryKey: null, + propertyResolvers: null, +}); + const SYMBOL_OPERATORS = { // these are coercing operators '<': 'lt', @@ -135,11 +145,12 @@ export function searchByIndex( if (value === undefined && comparator !== 'sort') { throw new ClientError(`Search condition for ${attribute_name} must have a value`); } + let needFullScan; if (Array.isArray(attribute_name)) { const firstAttributeName = attribute_name[0]; // get the potential relationship attribute const attribute = findAttribute(Table.attributes, firstAttributeName); - if (attribute.relationship) { + if (attribute?.relationship) { // it is a join/relational query if (attribute_name.length < 2) throw new ClientError( @@ -153,6 +164,7 @@ export function searchByIndex( attribute: attribute_name.length > 2 ? attribute_name.slice(1) : attribute_name[1], value, comparator, + negated: searchCondition.negated, }, transaction, reverse, @@ -192,7 +204,10 @@ export function searchByIndex( } else if (attribute_name.length === 1) { attribute_name = attribute_name[0]; } else { - throw new ClientError('Unable to query by attribute ' + JSON.stringify(attribute_name)); + // Non-relationship nested path (plain JSON path). Phase 1: filter-only — + // we don't have an index for the nested path, so fall through to a full + // scan with `filterByType` walking the path on each record. + needFullScan = true; } } const isPrimaryKey = attribute_name === Table.primaryKey || attribute_name == null; @@ -200,72 +215,85 @@ export function searchByIndex( let start; let end, inclusiveEnd, exclusiveStart; if (value instanceof Date) value = value.getTime(); - let needFullScan; - switch (ALTERNATE_COMPARATOR_NAMES[comparator] || comparator) { - case 'lt': - start = true; - end = value; - break; - case 'le': - start = true; - end = value; - inclusiveEnd = true; - break; - case 'gt': - start = value; - exclusiveStart = true; - break; - case 'ge': - start = value; - break; - case 'prefix': // this is form finding multi-part keys that start with the provided prefix - // this search needs to be of the form: - // start: [prefix, null], end: [prefix, MAXIMUM_KEY] - if (!Array.isArray(value)) value = [value, null]; - else if (value[value.length - 1] != null) value = value.concat(null); - start = value; - end = value.slice(0); - end[end.length - 1] = MAXIMUM_KEY; - break; - case 'starts_with': - start = value.toString(); - end = value + String.fromCharCode(0xffff); - break; - case 'between': - case 'gele': - case 'gelt': - case 'gtlt': - case 'gtle': - start = value[0]; - if (start instanceof Date) start = start.getTime(); - end = value[1]; - if (end instanceof Date) end = end.getTime(); - inclusiveEnd = comparator === 'gele' || comparator === 'gtle' || comparator === 'between'; - exclusiveStart = comparator === 'gtlt' || comparator === 'gtle'; - break; - case 'equals': - case undefined: - start = value; - end = value; - inclusiveEnd = true; - break; - case 'ne': - if (value === null) { - // since null is the lowest value in an index, we can treat anything higher as a non-null + if (searchCondition.negated) { + // Negated conditions are filter-only in Phase 1: scan the whole index/table + // and exclude matching rows in the filter. Without overriding the range, + // the bounded index iteration would only visit *included* rows. + start = true; + needFullScan = true; + } else + switch (ALTERNATE_COMPARATOR_NAMES[comparator] || comparator) { + case 'lt': + start = true; + end = value; + break; + case 'le': + start = true; + end = value; + inclusiveEnd = true; + break; + case 'gt': start = value; exclusiveStart = true; break; - } - case 'sort': // this is a special case for when we want to get all records for sorting - case 'contains': - case 'ends_with': - // we have to revert to full table scan here - start = true; - needFullScan = true; - break; - default: - throw new ClientError(`Unknown query comparator "${comparator}"`); - } + case 'ge': + start = value; + break; + case 'prefix': // this is form finding multi-part keys that start with the provided prefix + // this search needs to be of the form: + // start: [prefix, null], end: [prefix, MAXIMUM_KEY] + if (!Array.isArray(value)) value = [value, null]; + else if (value[value.length - 1] != null) value = value.concat(null); + start = value; + end = value.slice(0); + end[end.length - 1] = MAXIMUM_KEY; + break; + case 'starts_with': + start = value.toString(); + end = value + String.fromCharCode(0xffff); + break; + case 'between': + case 'gele': + case 'gelt': + case 'gtlt': + case 'gtle': + start = value[0]; + if (start instanceof Date) start = start.getTime(); + end = value[1]; + if (end instanceof Date) end = end.getTime(); + inclusiveEnd = comparator === 'gele' || comparator === 'gtle' || comparator === 'between'; + exclusiveStart = comparator === 'gtlt' || comparator === 'gtle'; + break; + case 'equals': + case undefined: + start = value; + end = value; + inclusiveEnd = true; + break; + case 'in': + // Phase 1: route through filter — index-merge optimization is a Phase 2 follow-up. + // `value` is expected to be an array (empty array matches no rows). + if (!Array.isArray(value)) throw new ClientError(`"in" comparator requires an array value`); + start = true; + needFullScan = true; + break; + case 'ne': + if (value === null) { + // since null is the lowest value in an index, we can treat anything higher as a non-null + start = value; + exclusiveStart = true; + break; + } + case 'sort': // this is a special case for when we want to get all records for sorting + case 'contains': + case 'ends_with': + // we have to revert to full table scan here + start = true; + needFullScan = true; + break; + default: + throw new ClientError(`Unknown query comparator "${comparator}"`); + } let filter; if (typeof start === 'string' && start.length > MAX_SEARCH_KEY_LENGTH) { // if the key is too long, we need to truncate it and filter the results @@ -287,7 +315,11 @@ export function searchByIndex( exclusiveStart = !inclusiveEnd; inclusiveEnd = newEnd; } - if (!index || index.isIndexing || needFullScan || (value === null && !index.indexNulls)) { + // For negated conditions we need to consider records whose attribute value is + // missing from the index (e.g. nulls when the index doesn't index nulls), so + // we bypass the secondary index and iterate over the primary store. + const skipIndex = searchCondition.negated && index && !isPrimaryKey; + if (!index || index.isIndexing || needFullScan || (value === null && !index.indexNulls) || skipIndex) { // no indexed searching available, need a full scan if (allowFullScan === false && !index) throw new ClientError(`"${attribute_name}" is not indexed, can not search for this attribute`, 404); @@ -351,7 +383,7 @@ export function searchByIndex( ); results.hasEntries = true; return results; - } else if (index) { + } else if (index && !skipIndex) { if (index.customIndex) { return index.customIndex.search(searchCondition, context).map((entry) => { // if the custom index returns an entry with metadata, merge it with the loaded entry @@ -596,6 +628,7 @@ const ALTERNATE_COMPARATOR_NAMES = { 'ew': 'ends_with', 'endsWith': 'ends_with', 'ct': 'contains', + 'includes': 'in', '>': 'gt', '>=': 'ge', '<': 'lt', @@ -603,6 +636,55 @@ const ALTERNATE_COMPARATOR_NAMES = { '...': 'between', }; +// Comparators whose value is a list (array) of values. Used to recognize the +// REST `(v1,v2,...)` value syntax during parsing. +const LIST_VALUE_COMPARATORS = new Set(['in', 'between']); + +// Base comparators that accept the `not_` prefix to produce a negated form. +// `not_equal` is an existing alias for `ne` and keeps its existing semantics. +const NEGATABLE_BASE_COMPARATORS = new Set(['in', 'between', 'starts_with', 'ends_with', 'contains', 'equals']); + +/** + * Resolve a comparator name to a (possibly stripped) base comparator and a + * `negated` flag. Existing aliases are preserved as-is — the execution layer + * resolves them via `ALTERNATE_COMPARATOR_NAMES`. Only the `not_` prefix is + * stripped here, and only when the base is a recognized negatable comparator + * and the full name is not itself an existing alias (so `not_equal` keeps its + * historical mapping to `ne`). + */ +export function resolveComparator(comparator: string | undefined): { + comparator: string | undefined; + negated: boolean; +} { + if (comparator == null) return { comparator, negated: false }; + // Preserve existing aliases (e.g., `not_equal` -> `ne`) — let execution-time + // alias resolution handle them so we don't change comparator strings stored + // on the condition object. + if (ALTERNATE_COMPARATOR_NAMES[comparator]) return { comparator, negated: false }; + if (typeof comparator === 'string' && comparator.startsWith('not_')) { + const base = comparator.slice(4); + const baseResolved = ALTERNATE_COMPARATOR_NAMES[base] || base; + if (NEGATABLE_BASE_COMPARATORS.has(baseResolved)) { + return { comparator: base, negated: true }; + } + } + return { comparator, negated: false }; +} + +/** + * Walk a nested-property path on a record. Returns the value at the path, + * or undefined if any intermediate property is missing. + */ +export function getNestedValue(record: any, path: string | string[]): any { + if (typeof path === 'string') return record?.[path]; + let current = record; + for (let i = 0; i < path.length; i++) { + if (current == null) return undefined; + current = current[path[i]]; + } + return current; +} + /** * Create a filter based on the search condition that can be used to test each supplied record. * @param {SearchObject} searchCondition @@ -610,6 +692,7 @@ const ALTERNATE_COMPARATOR_NAMES = { */ export function filterByType(searchCondition, Table, context, filtered, isPrimaryKey?, estimatedIncomingCount?) { const comparator = searchCondition.comparator; + const negated = searchCondition.negated; let attribute = searchCondition[0] ?? searchCondition.attribute; let value = searchCondition[1] ?? searchCondition.value; if (Array.isArray(attribute)) { @@ -617,9 +700,29 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar if (attribute.length === 1) attribute = attribute[0]; else if (attribute.length > 1) { const firstAttributeName = attribute[0]; - // get the relationship attribute - const firstAttribute = findAttribute(Table.attributes, firstAttributeName); - const relatedTable = firstAttribute.definition?.tableClass || firstAttribute.elements.definition?.tableClass; + // get the relationship attribute (may be undefined for plain JSON paths) + const firstAttribute = findAttribute(Table?.attributes, firstAttributeName); + const relatedTable = firstAttribute?.definition?.tableClass || firstAttribute?.elements?.definition?.tableClass; + if (!relatedTable) { + // Plain JSON nested path — walk the path on each record. Array + // intermediates use `some` semantics (match if any element matches). + const restAttribute = attribute.length > 2 ? attribute.slice(1) : attribute[1]; + const leafFilter = filterByType( + { attribute: restAttribute, value, comparator, negated }, + NESTED_PATH_TABLE, + context, + null, + false, + estimatedIncomingCount + ); + if (!leafFilter) return; + return function nestedRecordFilter(record, entry) { + const subObject = record?.[firstAttributeName]; + if (subObject == null) return leafFilter(undefined, entry); + if (Array.isArray(subObject)) return subObject.some((item) => leafFilter(item, entry)); + return leafFilter(subObject, entry); + }; + } // TODO: If this is a relationship, we can potentially make this more efficient by using the index // and retrieving the set of matching ids first const filterMap = filtered?.[firstAttributeName]; @@ -628,6 +731,7 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar attribute: attribute.length > 2 ? attribute.slice(1) : attribute[1], value, comparator, + negated, }, relatedTable, context, @@ -642,7 +746,7 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar return; } const resolver = Table.propertyResolvers?.[firstAttributeName]; - if (resolver.to) nextFilter.to = resolver.to; + if (resolver?.to) nextFilter.to = resolver.to; let subIdFilter; const getSubObject = (record, entry) => { let subObject, subEntry; @@ -711,24 +815,29 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar } if (value instanceof Date) value = value.getTime(); + let baseFilter; switch (ALTERNATE_COMPARATOR_NAMES[comparator] || comparator) { case SEARCH_TYPES.EQUALS: case undefined: - return attributeComparator(attribute, (recordValue) => recordValue === value, true); + baseFilter = attributeComparator(attribute, (recordValue) => recordValue === value, true); + break; case 'contains': - return attributeComparator(attribute, (recordValue) => recordValue?.toString().includes(value)); + baseFilter = attributeComparator(attribute, (recordValue) => recordValue?.toString().includes(value)); + break; case 'ends_with': - return attributeComparator(attribute, (recordValue) => recordValue?.toString().endsWith(value)); + baseFilter = attributeComparator(attribute, (recordValue) => recordValue?.toString().endsWith(value)); + break; case 'starts_with': - return attributeComparator( + baseFilter = attributeComparator( attribute, (recordValue) => typeof recordValue === 'string' && recordValue.startsWith(value), true ); + break; case 'prefix': if (!Array.isArray(value)) value = [value]; else if (value[value.length - 1] == null) value = value.slice(0, -1); - return attributeComparator( + baseFilter = attributeComparator( attribute, (recordValue) => { if (!Array.isArray(recordValue)) return false; @@ -739,31 +848,68 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar }, true ); + break; case 'between': + case 'gele': + case 'gtlt': + case 'gtle': + case 'gelt': { if (value[0] instanceof Date) value[0] = value[0].getTime(); if (value[1] instanceof Date) value[1] = value[1].getTime(); - return attributeComparator( + const resolvedComparator = ALTERNATE_COMPARATOR_NAMES[comparator] || comparator; + const startInclusive = + resolvedComparator === 'between' || resolvedComparator === 'gele' || resolvedComparator === 'gelt'; + const endInclusive = + resolvedComparator === 'between' || resolvedComparator === 'gele' || resolvedComparator === 'gtle'; + baseFilter = attributeComparator( attribute, (recordValue) => { - return compareKeys(recordValue, value[0]) >= 0 && compareKeys(recordValue, value[1]) <= 0; + const cmpStart = compareKeys(recordValue, value[0]); + const cmpEnd = compareKeys(recordValue, value[1]); + return (startInclusive ? cmpStart >= 0 : cmpStart > 0) && (endInclusive ? cmpEnd <= 0 : cmpEnd < 0); }, true ); + break; + } + case 'in': { + if (!Array.isArray(value)) throw new ClientError(`"in" comparator requires an array value`); + // Cache the Set on the condition so multi-row evaluation reuses it. + let valueSet: Set = (searchCondition as any).cachedSet; + if (!valueSet) { + valueSet = new Set(value.map((v) => (v instanceof Date ? v.getTime() : v))); + (searchCondition as any).cachedSet = valueSet; + } + baseFilter = attributeComparator(attribute, (recordValue) => valueSet.has(recordValue), true); + break; + } case 'gt': - return attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) > 0); + baseFilter = attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) > 0); + break; case 'ge': - return attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) >= 0); + baseFilter = attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) >= 0); + break; case 'lt': - return attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) < 0); + baseFilter = attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) < 0); + break; case 'le': - return attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) <= 0); + baseFilter = attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) <= 0); + break; case 'ne': - return attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) !== 0, false, true); + baseFilter = attributeComparator(attribute, (recordValue) => compareKeys(recordValue, value) !== 0, false, true); + break; case 'sort': return () => true; default: throw new ClientError(`Unknown query comparator "${comparator}"`); } + if (negated && baseFilter) { + // Wrap with negation. Internal index-optimization state on `baseFilter` + // (idFilter, to, etc.) is intentionally not propagated through the + // negation wrapper — negated conditions are filter-only in Phase 1. + return (record, entry) => !baseFilter(record, entry); + } + return baseFilter; /** Create a comparison function that can take the record and check the attribute's value with the filter function */ function attributeComparator( attribute: string, @@ -787,7 +933,9 @@ export function filterByType(searchCondition, Table, context, filtered, isPrimar let misses = 0; let filteredSoFar = 3; // what we use to calculate miss rate; we give some buffer so we don't jump to indexed retrieval too quickly function recordFilter(record: any) { - const value = record[attribute]; + // `record` may be null/undefined when called via a nested-path filter + // where an intermediate property is missing. + const value = record == null ? undefined : record[attribute]; let matches: boolean; if (typeof value !== 'object' || !value || allowObjectMatching) matches = filter(value); else if (Array.isArray(value)) matches = value.some(filter); @@ -862,20 +1010,25 @@ export function estimateCondition(table) { if (attribute_name == null || attribute_name === table.primaryKey) condition.estimated_count = 1; else if (Array.isArray(attribute_name) && attribute_name.length > 1) { const attribute = findAttribute(table.attributes, attribute_name[0]); - const relatedTable = attribute.definition?.tableClass || attribute.elements.definition?.tableClass; - const estimate = estimateCondition(relatedTable)({ - value: condition.value, - attribute: attribute_name.length > 2 ? attribute_name.slice(1) : attribute_name[1], - comparator: 'equals', - }); - const fromIndex = table.indices[attribute.relationship.from]; - // the estimated count is sum of the estimate of the related table and the estimate of the index - condition.estimated_count = - estimate + - (fromIndex - ? (estimate * estimatedEntryCount(table.indices[attribute.relationship.from])) / - (estimatedEntryCount(relatedTable.primaryStore) || 1) - : estimate); + const relatedTable = attribute?.definition?.tableClass || attribute?.elements?.definition?.tableClass; + if (!relatedTable) { + // Plain JSON nested path — no index, can't estimate cheaply. + condition.estimated_count = Infinity; + } else { + const estimate = estimateCondition(relatedTable)({ + value: condition.value, + attribute: attribute_name.length > 2 ? attribute_name.slice(1) : attribute_name[1], + comparator: 'equals', + }); + const fromIndex = table.indices[attribute.relationship?.from]; + // the estimated count is sum of the estimate of the related table and the estimate of the index + condition.estimated_count = + estimate + + (fromIndex + ? (estimate * estimatedEntryCount(table.indices[attribute.relationship.from])) / + (estimatedEntryCount(relatedTable.primaryStore) || 1) + : estimate); + } } else { // we only attempt to estimate count on equals operator because that's really all that LMDB supports (some other key-value stores like libmdbx could be considered if we need to do estimated counts of ranges at some point) const index = table.indices[attribute_name]; @@ -888,6 +1041,19 @@ export function estimateCondition(table) { condition.estimated_count = estimatedEntryCount(table.primaryStore) - (index ? index.getValuesCount(null) : 0); } else condition.estimated_count = Infinity; + } else if (searchType === 'in') { + const attribute_name = condition[0] ?? condition.attribute; + const index = table.indices[attribute_name]; + if (Array.isArray(condition.value) && index) { + // Sum of per-value matches (over-counts duplicates but is a fine ceiling) + let estimate = 0; + for (const item of condition.value) { + estimate += index.getValuesCount(item); + } + condition.estimated_count = estimate; + } else if (Array.isArray(condition.value)) { + condition.estimated_count = Infinity; + } else condition.estimated_count = Infinity; // for range queries (betweens, startsWith, greater, etc.), just arbitrarily guess } else if (searchType === 'starts_with' || searchType === 'prefix') condition.estimated_count = STARTS_WITH_ESTIMATE * estimatedEntryCount(table.primaryStore) + 1; @@ -920,6 +1086,7 @@ class SyntaxViolation extends Violation {} const NEEDS_PARSER = /[()[\]|!<>.]|(=\w*=)/; const QUERY_PARSER = /([^?&|=<>!([{}\]),]*)([([{}\])|,&]|[=<>!]*)/g; const VALUE_PARSER = /([^&|=[\]{}]+)([[\]{}]|[&|=]*)/g; +const FIQL_OPERATOR_NAME = /^[a-zA-Z_][a-zA-Z_0-9]*$/; let lastIndex; let currentQuery; let queryString; @@ -981,8 +1148,10 @@ function parseBlock(query, expectedEnd) { switch (operator) { case '=': if (attribute != undefined) { - // a FIQL operator like =gt= (and don't allow just any string) - if (value.length <= 2) comparator = value; + // FIQL operator like =gt= or =starts_with= — accept any identifier + // (letters, digits, underscores). Unknown comparator names are + // caught at execution time with a clearer error. + if (FIQL_OPERATOR_NAME.test(value)) comparator = value; else recordError(`invalid FIQL operator ${value}`); valueDecoder = typedDecoding; // use typed/auto-cast decoding for FIQL operators } else { @@ -1024,12 +1193,7 @@ function parseBlock(query, expectedEnd) { } } else { if (!query.conditions) recordError('conditions/comparisons are not allowed in a property list'); - const condition = { - comparator, - attribute: attribute || null, - value: valueDecoder(value), - }; - if (comparator === 'eq') wildcardDecoding(condition, value); + const condition = buildCondition(attribute, comparator, value, valueDecoder); if (attribute === '') { // this is a nested condition const lastCondition = query.conditions[query.conditions.length - 1]; @@ -1154,12 +1318,7 @@ function parseBlock(query, expectedEnd) { if (query.conditions) { // finish condition if (attribute) { - const condition = { - comparator: comparator || 'equals', - attribute, - value: valueDecoder(value), - }; - if (comparator === 'eq') wildcardDecoding(condition, value); + const condition = buildCondition(attribute, comparator || 'equals', value, valueDecoder); assignOperator(query, lastBinaryOperator); query.conditions.push(condition); } else if (value) { @@ -1228,6 +1387,44 @@ function wildcardDecoding(condition, value) { } } +/** + * Build a condition from a parsed attribute, raw comparator name, raw value + * string, and the value decoder for the operator. Centralizes: + * - alias resolution and `not_` prefix handling (for `negated`) + * - `(v1,v2,...)` list-value syntax for list-taking comparators + * - wildcard detection on `eq` (typed-equality) + */ +function buildCondition( + attribute: any, + rawComparator: string | undefined, + rawValue: string, + valueDecoder: (s: string) => any +) { + const { comparator: resolvedComparator, negated } = resolveComparator(rawComparator); + let value: any; + if ( + LIST_VALUE_COMPARATORS.has(resolvedComparator as string) && + rawValue.length >= 2 && + rawValue.charCodeAt(0) === 0x28 /* ( */ && + rawValue.charCodeAt(rawValue.length - 1) === 0x29 /* ) */ + ) { + // `(v1,v2,...)` list-value syntax. Each element is decoded individually. + const inner = rawValue.slice(1, -1); + value = inner.length === 0 ? [] : inner.split(',').map(valueDecoder); + } else { + value = valueDecoder(rawValue); + } + const condition: any = { + comparator: resolvedComparator, + attribute: attribute || null, + value, + }; + if (negated) condition.negated = true; + // preserve existing wildcard behavior on coercive equality + if (rawComparator === 'eq') wildcardDecoding(condition, rawValue); + return condition; +} + function toSortObject(sort) { const sortObject = toSortEntry(sort[0]); if (sort.length > 1) { diff --git a/unitTests/resources/query-tier1.test.js b/unitTests/resources/query-tier1.test.js new file mode 100644 index 000000000..7d6c9b097 --- /dev/null +++ b/unitTests/resources/query-tier1.test.js @@ -0,0 +1,506 @@ +require('../testUtils'); +const assert = require('assert'); +const { setupTestDBPath } = require('../testUtils'); +const { parseQuery, resolveComparator, getNestedValue } = require('#src/resources/search'); +const { table } = require('#src/resources/databases'); +const { setMainIsWorker } = require('#js/server/threads/manageThreads'); + +describe('Query Tier-1 additions', () => { + let Items, People; + + before(async function () { + setupTestDBPath(); + setMainIsWorker(true); + + Items = table({ + table: 'Tier1Items', + database: 'test', + attributes: [ + { name: 'id', isPrimaryKey: true, type: 'Int' }, + { name: 'status', indexed: true, type: 'String' }, + { name: 'category', indexed: true, type: 'String' }, + { name: 'price', indexed: true, type: 'Float' }, + { name: 'description', type: 'String' }, // non-indexed + { name: 'metadata' }, // plain JSON object: { city, region: { state, country } } + { name: 'tags', elements: { type: 'String' } }, // array of strings + ], + }); + + People = table({ + table: 'Tier1People', + database: 'test', + attributes: [ + { name: 'id', isPrimaryKey: true, type: 'Int' }, + { name: 'name', indexed: true, type: 'String' }, + { name: 'children' }, // array of plain JSON objects: { name, age } + ], + }); + + const items = [ + { + id: 1, + status: 'active', + category: 'A', + price: 10, + description: 'apple pie', + metadata: { city: 'Denver', region: { state: 'CO', country: 'US' } }, + tags: ['fresh', 'sale'], + }, + { + id: 2, + status: 'pending', + category: 'B', + price: 20, + description: 'banana bread', + metadata: { city: 'Boulder', region: { state: 'CO', country: 'US' } }, + tags: ['sale'], + }, + { + id: 3, + status: 'inactive', + category: 'A', + price: 30, + description: 'cherry tart', + metadata: { city: 'Denver', region: { state: 'CO', country: 'US' } }, + tags: [], + }, + { + id: 4, + status: 'active', + category: 'C', + price: 40, + description: 'date scone', + metadata: { city: 'Austin', region: { state: 'TX', country: 'US' } }, + tags: ['new'], + }, + { + id: 5, + status: 'cancelled', + category: 'B', + price: 50, + description: 'elderberry', + metadata: { city: 'Toronto', region: { state: 'ON', country: 'CA' } }, + tags: ['fresh'], + }, + { id: 6, status: 'active', category: 'A', price: 60, description: 'fig roll', metadata: null, tags: [] }, + ]; + for (const item of items) await Items.put(item); + + const people = [ + { + id: 1, + name: 'Alice', + children: [ + { name: 'Tim', age: 5 }, + { name: 'Sara', age: 12 }, + ], + }, + { id: 2, name: 'Bob', children: [{ name: 'Pat', age: 22 }] }, + { + id: 3, + name: 'Carol', + children: [ + { name: 'Lee', age: 13 }, + { name: 'Max', age: 30 }, + ], + }, + { id: 4, name: 'Dave', children: [{ name: 'Eve', age: 8 }] }, + { id: 5, name: 'Eve', children: [] }, + ]; + for (const p of people) await People.put(p); + }); + + async function collectIds(iter) { + const ids = []; + for await (const record of iter) ids.push(record.id); + return ids.sort((a, b) => a - b); + } + + describe('resolveComparator helper', () => { + it('preserves existing aliases as-is', () => { + assert.deepEqual(resolveComparator('eq'), { comparator: 'eq', negated: false }); + assert.deepEqual(resolveComparator('not_equal'), { comparator: 'not_equal', negated: false }); + assert.deepEqual(resolveComparator('greater_than'), { comparator: 'greater_than', negated: false }); + }); + it('strips not_ prefix on negatable comparators', () => { + assert.deepEqual(resolveComparator('not_in'), { comparator: 'in', negated: true }); + assert.deepEqual(resolveComparator('not_starts_with'), { comparator: 'starts_with', negated: true }); + assert.deepEqual(resolveComparator('not_between'), { comparator: 'between', negated: true }); + assert.deepEqual(resolveComparator('not_contains'), { comparator: 'contains', negated: true }); + assert.deepEqual(resolveComparator('not_ends_with'), { comparator: 'ends_with', negated: true }); + }); + it('returns input unchanged for unknown comparators', () => { + assert.deepEqual(resolveComparator('unknown'), { comparator: 'unknown', negated: false }); + assert.deepEqual(resolveComparator(undefined), { comparator: undefined, negated: false }); + }); + }); + + describe('getNestedValue helper', () => { + it('walks a single segment string', () => { + assert.equal(getNestedValue({ a: 1 }, 'a'), 1); + assert.equal(getNestedValue(null, 'a'), undefined); + }); + it('walks a path array', () => { + assert.equal(getNestedValue({ a: { b: { c: 7 } } }, ['a', 'b', 'c']), 7); + }); + it('returns undefined for missing intermediates', () => { + assert.equal(getNestedValue({ a: null }, ['a', 'b']), undefined); + assert.equal(getNestedValue({}, ['a', 'b', 'c']), undefined); + }); + }); + + describe('REST query parsing', () => { + it('parses (v1,v2,v3) list-value syntax with `in`', () => { + const q = parseQuery('status=in=(active,pending,inactive)'); + assert.equal(q.conditions[0].comparator, 'in'); + assert.deepEqual(q.conditions[0].value, ['active', 'pending', 'inactive']); + }); + it('parses single-element list', () => { + const q = parseQuery('status=in=(active)'); + assert.deepEqual(q.conditions[0].value, ['active']); + }); + it('parses empty list', () => { + const q = parseQuery('status=in=()'); + assert.deepEqual(q.conditions[0].value, []); + }); + it('parses not_in to negated in', () => { + const q = parseQuery('status=not_in=(active,pending)'); + assert.equal(q.conditions[0].comparator, 'in'); + assert.deepEqual(q.conditions[0].value, ['active', 'pending']); + assert.equal(q.conditions[0].negated, true); + }); + it('parses not_starts_with as negated starts_with', () => { + const q = parseQuery('name=not_starts_with=Joh'); + assert.equal(q.conditions[0].comparator, 'starts_with'); + assert.equal(q.conditions[0].value, 'Joh'); + assert.equal(q.conditions[0].negated, true); + }); + it('parses between with list value', () => { + const q = parseQuery('age=between=(18,65)'); + assert.equal(q.conditions[0].comparator, 'between'); + assert.deepEqual(q.conditions[0].value, ['18', '65']); + }); + it('parses typed values inside list', () => { + const q = parseQuery('id=in=(number:1,number:2,number:3)'); + assert.deepEqual(q.conditions[0].value, [1, 2, 3]); + }); + it('preserves backwards-compat for non-list (...) values on non-list comparators', () => { + // gt is not a list-value comparator, so (4) stays as a string + const q = parseQuery('value=gt=(4)'); + assert.equal(q.conditions[0].value, '(4)'); + }); + it('accepts multi-character FIQL operators', () => { + const q = parseQuery('a=between=(1,2)|b=in=(x,y)'); + assert.equal(q.conditions[0].comparator, 'between'); + assert.equal(q.conditions[1].comparator, 'in'); + }); + }); + + describe('`in` comparator execution', () => { + it('matches multiple values on indexed attribute (full scan in Phase 1)', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'status', comparator: 'in', value: ['active', 'pending'] }], + }) + ); + assert.deepEqual(results, [1, 2, 4, 6]); + }); + it('empty list matches nothing', async function () { + const results = await collectIds( + Items.search({ allowFullScan: true, conditions: [{ attribute: 'status', comparator: 'in', value: [] }] }) + ); + assert.deepEqual(results, []); + }); + it('single-value list is equivalent to equals', async function () { + const results = await collectIds( + Items.search({ allowFullScan: true, conditions: [{ attribute: 'category', comparator: 'in', value: ['A'] }] }) + ); + assert.deepEqual(results, [1, 3, 6]); + }); + it('combines with another indexed condition (no allowFullScan needed)', async function () { + const results = await collectIds( + Items.search({ + operator: 'and', + conditions: [ + { attribute: 'category', comparator: 'equals', value: 'A' }, // indexed + { attribute: 'status', comparator: 'in', value: ['active', 'inactive'] }, + ], + }) + ); + assert.deepEqual(results, [1, 3, 6]); + }); + it('throws when in is used without indexed sibling and full scan disallowed', async function () { + await assert.rejects(async () => { + for await (const _ of Items.search({ + allowFullScan: false, + conditions: [{ attribute: 'status', comparator: 'in', value: ['active'] }], + })); + }, /can not search for|index/i); + }); + it('matches arrays of values on multi-valued attribute (some-of semantics)', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'tags', comparator: 'in', value: ['fresh', 'new'] }], + }) + ); + // items 1 (fresh,sale), 4 (new), 5 (fresh) — items 2,3,6 don't have any of these + assert.deepEqual(results, [1, 4, 5]); + }); + }); + + describe('negated comparators (`not_in`, `not_starts_with`, etc.)', () => { + it('not_in matches records whose value is NOT in the list', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'status', comparator: 'in', value: ['active'], negated: true }], + }) + ); + assert.deepEqual(results, [2, 3, 5]); + }); + it('not_in via parser/REST shape', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'status', comparator: 'not_in', value: ['active', 'pending'] }], + }) + ); + assert.deepEqual(results, [3, 5]); + }); + it('not_starts_with', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'description', comparator: 'starts_with', value: 'a', negated: true }], + }) + ); + // only id 1 starts with 'a' (apple pie); rest do not + assert.deepEqual(results, [2, 3, 4, 5, 6]); + }); + it('not_contains', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'description', comparator: 'contains', value: 'e', negated: true }], + }) + ); + // 'banana bread', 'date scone', 'fig roll' do not contain 'e' wait + // apple, cherry, elderberry, date, fig — let's compute: + // id 1: 'apple pie' has 'e' + // id 2: 'banana bread' has 'e' + // id 3: 'cherry tart' has 'e' + // id 4: 'date scone' has 'e' + // id 5: 'elderberry' has 'e' + // id 6: 'fig roll' no 'e' + assert.deepEqual(results, [6]); + }); + it('not_between', async function () { + const results = await collectIds( + Items.search({ + operator: 'and', + conditions: [ + { attribute: 'category', comparator: 'in', value: ['A', 'B', 'C'] }, + { attribute: 'price', comparator: 'between', value: [20, 40], negated: true }, + ], + }) + ); + // prices not in [20, 40]: 10 (id 1), 50 (id 5), 60 (id 6) + assert.deepEqual(results, [1, 5, 6]); + }); + }); + + describe('Nested-path filtering (plain JSON paths)', () => { + it('filters on 2-level nested path', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: ['metadata', 'city'], comparator: 'equals', value: 'Denver' }], + }) + ); + assert.deepEqual(results, [1, 3]); + }); + it('filters on 3-level nested path', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: ['metadata', 'region', 'country'], comparator: 'equals', value: 'CA' }], + }) + ); + assert.deepEqual(results, [5]); + }); + it('handles missing intermediate (null metadata)', async function () { + // id 6 has metadata: null — should NOT match any nested-path equality + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: ['metadata', 'city'], comparator: 'equals', value: 'Denver' }], + }) + ); + assert.equal(results.includes(6), false); + }); + it('parses dot-notation REST query for nested path', () => { + const q = parseQuery('metadata.city=Denver'); + assert.deepEqual(q.conditions[0].attribute, ['metadata', 'city']); + }); + it('combines nested path filter with another indexed condition', async function () { + const results = await collectIds( + Items.search({ + operator: 'and', + conditions: [ + { attribute: 'category', comparator: 'equals', value: 'A' }, + { attribute: ['metadata', 'city'], comparator: 'equals', value: 'Denver' }, + ], + }) + ); + assert.deepEqual(results, [1, 3]); + }); + it('supports starts_with on nested path', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: ['metadata', 'region', 'state'], comparator: 'starts_with', value: 'C' }], + }) + ); + assert.deepEqual(results, [1, 2, 3]); + }); + }); + + describe('Multi-value association: independent vs chained conditions', () => { + it('independent conditions on multi-value attr: any element matches each (different elements OK)', async function () { + // people with SOME child > 10 AND SOME child < 15 (could be different children) + const results = await collectIds( + People.search({ + operator: 'and', + allowFullScan: true, + conditions: [ + { attribute: ['children', 'age'], comparator: 'gt', value: 10 }, + { attribute: ['children', 'age'], comparator: 'lt', value: 15 }, + ], + }) + ); + // Alice: children ages [5,12] — 12>10 yes, 5<15 yes (different children) → match + // Bob: [22] — 22>10 yes, 22<15 no → no match + // Carol: [13,30] — 30>10 yes, 13<15 yes → match + // Dave: [8] — 8>10 no → no match + // Eve: [] → no match + assert.deepEqual(results, [1, 3]); + }); + it('chained conditions: same element must satisfy both (collapses to range)', async function () { + // people with SOME child whose age is BOTH > 10 AND < 15 + const results = await collectIds( + People.search({ + allowFullScan: true, + conditions: [ + { + attribute: ['children', 'age'], + comparator: 'gt', + value: 10, + chainedConditions: [{ comparator: 'lt', value: 15 }], + }, + ], + }) + ); + // Alice: child age 12 satisfies 10 8 AND SOME child < 6 (separate elements ok) + const independent = await collectIds( + People.search({ + operator: 'and', + allowFullScan: true, + conditions: [ + { attribute: ['children', 'age'], comparator: 'gt', value: 8 }, + { attribute: ['children', 'age'], comparator: 'lt', value: 6 }, + ], + }) + ); + // Alice: [5,12] — 12>8 yes, 5<6 yes (different children) → match + // no others have a child <6 — only Alice + assert.deepEqual(independent, [1]); + + // Now chained: SOME child satisfies BOTH 8 { + it('accepts not_in directly without going through parser', async function () { + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'status', comparator: 'not_in', value: ['active', 'pending', 'cancelled'] }], + }) + ); + assert.deepEqual(results, [3]); // only inactive + }); + }); + + describe('Edge cases', () => { + it('not_in: records with null value match (since null is not in the list)', async function () { + // Add a record with null status to confirm null behavior + await Items.put({ id: 99, status: null, category: 'A', price: 1, description: 'null-status' }); + const results = await collectIds( + Items.search({ + allowFullScan: true, + conditions: [{ attribute: 'status', comparator: 'in', value: ['active', 'pending'], negated: true }], + }) + ); + assert(results.includes(99), 'record with null status should match not_in'); + await Items.delete(99); + }); + it('triple-nested path through array intermediate: some-of at every level', async function () { + // children is array of objects; child.tags is an array of strings nested inside + const Multi = table({ + table: 'Tier1Multi', + database: 'test', + attributes: [{ name: 'id', isPrimaryKey: true, type: 'Int' }, { name: 'children' }], + }); + await Multi.put({ + id: 1, + children: [ + { name: 'A', items: [{ kind: 'red' }, { kind: 'blue' }] }, + { name: 'B', items: [{ kind: 'green' }] }, + ], + }); + await Multi.put({ + id: 2, + children: [{ name: 'C', items: [{ kind: 'green' }] }], + }); + await Multi.put({ id: 3, children: [] }); + + const results = []; + for await (const r of Multi.search({ + allowFullScan: true, + conditions: [{ attribute: ['children', 'items', 'kind'], comparator: 'equals', value: 'red' }], + })) { + results.push(r.id); + } + assert.deepEqual(results.sort(), [1]); + }); + it('preserves backwards-compat: existing queries without (...) syntax', async function () { + // Make sure regular equality still works exactly the same + const results = await collectIds( + Items.search({ conditions: [{ attribute: 'status', comparator: 'equals', value: 'active' }] }) + ); + assert.deepEqual(results, [1, 4, 6]); + }); + }); +});