diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 6ad935e59d..a40a6f733f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -17,13 +17,20 @@ import java.util.function.BiFunction; import lombok.Getter; import lombok.Setter; +import org.apache.calcite.config.NullCollation; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexLambdaRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.tools.FrameworkConfig; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.calcite.utils.CalciteToolsHelper; import org.opensearch.sql.calcite.utils.CalciteToolsHelper.OpenSearchRelBuilder; +import org.opensearch.sql.calcite.validate.OpenSearchSparkSqlDialect; +import org.opensearch.sql.calcite.validate.PplTypeCoercion; +import org.opensearch.sql.calcite.validate.PplTypeCoercionRule; +import org.opensearch.sql.calcite.validate.PplValidator; +import org.opensearch.sql.calcite.validate.SqlOperatorTableProvider; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.expression.function.FunctionProperties; @@ -72,6 +79,14 @@ public class CalcitePlanContext { /** Whether we're currently inside a lambda context. */ @Getter @Setter private boolean inLambdaContext = false; + /** + * -- SETTER -- Sets the SQL operator table provider. This must be called during initialization by + * the opensearch module. + * + * @param provider the provider to use for obtaining operator tables + */ + @Setter private static SqlOperatorTableProvider operatorTableProvider; + private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType queryType) { this.config = config; this.sysLimit = sysLimit; @@ -101,6 +116,34 @@ private CalcitePlanContext(CalcitePlanContext parent) { this.inLambdaContext = true; // Mark that we're inside a lambda } + /** + * Creates a new SqlValidator instance. SqlValidator is stateful and should not be reused across + * validations, so a new instance is created for each call. + * + * @return new SqlValidator instance + */ + public SqlValidator getValidator() { + if (operatorTableProvider == null) { + throw new IllegalStateException( + "SqlOperatorTableProvider must be set before creating CalcitePlanContext"); + } + SqlValidator.Config validatorConfig = + SqlValidator.Config.DEFAULT + .withTypeCoercionRules(PplTypeCoercionRule.instance()) + .withTypeCoercionFactory(PplTypeCoercion::create) + // Use lenient conformance for PPL compatibility + .withConformance(OpenSearchSparkSqlDialect.DEFAULT.getConformance()) + // Use Spark SQL's NULL collation (NULLs sorted LOW/FIRST) + .withDefaultNullCollation(NullCollation.LOW) + // This ensures that coerced arguments are replaced with cast version in sql + // select list because coercion is performed during select list expansion during + // sql validation. Affects 4356.yml + // See SqlValidatorImpl#validateSelectList and AggConverter#translateAgg + .withIdentifierExpansion(true); + return PplValidator.create( + config, operatorTableProvider.getOperatorTable(), TYPE_FACTORY, validatorConfig); + } + public RexNode resolveJoinCondition( UnresolvedExpression expr, BiFunction transformFunction) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 58087898fe..f493ec3038 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -43,9 +43,12 @@ import java.util.stream.IntStream; import java.util.stream.Stream; import lombok.AllArgsConstructor; +import lombok.NonNull; import org.apache.calcite.adapter.enumerable.RexToLixTranslator; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; @@ -56,10 +59,14 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlLibraryOperators; @@ -765,8 +772,8 @@ public RelNode visitTranspose( .map( f -> Map.entry( - ImmutableList.of(rx.makeLiteral(f)), - ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) + ImmutableList.of((RexLiteral) rx.makeLiteral(f, varchar, true)), + ImmutableList.of(rx.makeCast(varchar, b.field(f), true, true)))) .collect(Collectors.toList())); // Step 3: Trim spaces from columnName column before pivot @@ -1795,6 +1802,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) // Default: first get rawExpr List overExpressions = node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); + overExpressions = embedExistingCollationsIntoOver(overExpressions, context); if (hasGroup) { // only build sequence when there is by condition @@ -1836,6 +1844,84 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) return context.relBuilder.peek(); } + /** + * Embed existing collation into window function's over clauses. + * + *

Window functions with frame specifications like {@code ROWS n PRECEDING} require ORDER BY to + * determine row order. Without it, results are non-deterministic. + * + *

Without this fix, the initial plan has ORDER BY separate from window functions: + * + *

+   * LogicalProject(SUM($5) OVER (ROWS 1 PRECEDING))  ← Missing ORDER BY
+   *   LogicalSort(sort0=[$5])
+   * 
+ * + *

This causes problems during validation as the order is not bound to the window. With this + * fix, sort collations are embeded into each {@code RexOver} window: + * + *

+   * LogicalProject(SUM($5) OVER (ORDER BY $5 ROWS 1 PRECEDING))  ← ORDER BY embedded
+   * 
+ * + * @param overExpressions Window function expressions (may contain nested {@link RexOver}) + * @param context Plan context for building RexNodes + * @return Expressions with ORDER BY embedded in all window specifications + */ + private List embedExistingCollationsIntoOver( + List overExpressions, CalcitePlanContext context) { + RelCollation existingCollation = context.relBuilder.peek().getTraitSet().getCollation(); + List<@NonNull RelFieldCollation> relCollations = + existingCollation == null ? List.of() : existingCollation.getFieldCollations(); + ImmutableList<@NonNull RexFieldCollation> rexCollations = + relCollations.stream() + .map(f -> relCollationToRexCollation(f, context.relBuilder)) + .collect(ImmutableList.toImmutableList()); + return overExpressions.stream() + .map( + n -> + n.accept( + new RexShuttle() { + @Override + public RexNode visitOver(RexOver over) { + RexWindow window = over.getWindow(); + return context.rexBuilder.makeOver( + over.getType(), + over.getAggOperator(), + over.getOperands(), + window.partitionKeys, + rexCollations, + window.getLowerBound(), + window.getUpperBound(), + window.isRows(), + true, + false, + over.isDistinct(), + over.ignoreNulls()); + } + })) + .collect(Collectors.toList()); + } + + private static RexFieldCollation relCollationToRexCollation( + RelFieldCollation relCollation, RelBuilder builder) { + RexNode fieldRef = builder.field(relCollation.getFieldIndex()); + + // Convert direction flags to SqlKind set + Set flags = new HashSet<>(); + if (relCollation.direction == RelFieldCollation.Direction.DESCENDING + || relCollation.direction == RelFieldCollation.Direction.STRICTLY_DESCENDING) { + flags.add(SqlKind.DESCENDING); + } + if (relCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) { + flags.add(SqlKind.NULLS_FIRST); + } else if (relCollation.nullDirection == RelFieldCollation.NullDirection.LAST) { + flags.add(SqlKind.NULLS_LAST); + } + + return new RexFieldCollation(fieldRef, flags); + } + private List wrapWindowFunctionsWithGroupNotNull( List overExpressions, RexNode groupNotNull, CalcitePlanContext context) { List wrappedOverExprs = new ArrayList<>(overExpressions.size()); diff --git a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java index 9b8ac7dfc9..409f79e56e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java +++ b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java @@ -14,14 +14,19 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCallBinding; import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.calcite.sql.validate.implicit.TypeCoercionImpl; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; @@ -146,7 +151,7 @@ public RexNode makeCast( // SqlStdOperatorTable.NOT_EQUALS, // ImmutableList.of(exp, makeZeroLiteral(sourceType))); } - } else if (OpenSearchTypeFactory.isUserDefinedType(type)) { + } else if (OpenSearchTypeUtil.isUserDefinedType(type)) { if (RexLiteral.isNullLiteral(exp)) { return super.makeCast(pos, type, exp, matchNullability, safe, format); } @@ -185,4 +190,33 @@ else if ((SqlTypeUtil.isApproximateNumeric(sourceType) || SqlTypeUtil.isDecimal( } return super.makeCast(pos, type, exp, matchNullability, safe, format); } + + /** + * Derives the return type of call to an operator. + * + *

In Calcite, coercion between STRING and NUMERIC operands takes place during converting SQL + * to RelNode. However, as we are building logical plans directly, the coercion is not yet + * implemented at this point. Hence, we duplicate {@link + * TypeCoercionImpl#binaryArithmeticWithStrings} here to infer the correct type, enabling + * operations like {@code "5" / 10}. The actual coercion will be inserted later when performing + * validation on SqlNode. + * + * @see TypeCoercionImpl#binaryArithmeticCoercion(SqlCallBinding) + * @param op the operator being called + * @param exprs actual operands + * @return derived type + */ + @Override + public RelDataType deriveReturnType(SqlOperator op, List exprs) { + if (op.getKind().belongsTo(SqlKind.BINARY_ARITHMETIC) && exprs.size() == 2) { + final RelDataType type1 = exprs.get(0).getType(); + final RelDataType type2 = exprs.get(1).getType(); + if (SqlTypeUtil.isNumeric(type1) && OpenSearchTypeUtil.isCharacter(type2)) { + return type1; + } else if (OpenSearchTypeUtil.isCharacter(type1) && SqlTypeUtil.isNumeric(type2)) { + return type2; + } + } + return super.deriveReturnType(op, exprs); + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index a6d57ea01f..bd13be2f69 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -62,13 +62,11 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.plan.RelOptTable.ViewExpander; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.prepare.CalciteCatalogReader; import org.apache.calcite.prepare.CalcitePrepareImpl; -import org.apache.calcite.prepare.Prepare.CatalogReader; import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; @@ -88,7 +86,6 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.sql2rel.SqlRexConvertletTable; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.tools.FrameworkConfig; @@ -98,12 +95,12 @@ import org.apache.calcite.tools.RelRunner; import org.apache.calcite.util.Holder; import org.apache.calcite.util.Util; -import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.plan.Scannable; import org.opensearch.sql.calcite.plan.rule.OpenSearchRules; import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule; import org.opensearch.sql.calcite.profile.PlanProfileBuilder; +import org.opensearch.sql.calcite.validate.converters.OpenSearchSqlToRelConverter; import org.opensearch.sql.expression.function.PPLBuiltinOperators; import org.opensearch.sql.monitor.profile.ProfileContext; import org.opensearch.sql.monitor.profile.ProfileMetric; @@ -259,7 +256,7 @@ private void registerCustomizedRules(RelOptPlanner planner) { * return {@link OpenSearchCalcitePreparingStmt} */ @Override - protected CalcitePrepareImpl.CalcitePreparingStmt getPreparingStmt( + public CalcitePrepareImpl.CalcitePreparingStmt getPreparingStmt( CalcitePrepare.Context context, Type elementType, CalciteCatalogReader catalogReader, @@ -369,34 +366,6 @@ protected SqlToRelConverter getSqlToRelConverter( } } - public static class OpenSearchSqlToRelConverter extends SqlToRelConverter { - protected final RelBuilder relBuilder; - - public OpenSearchSqlToRelConverter( - ViewExpander viewExpander, - @Nullable SqlValidator validator, - CatalogReader catalogReader, - RelOptCluster cluster, - SqlRexConvertletTable convertletTable, - Config config) { - super(viewExpander, validator, catalogReader, cluster, convertletTable, config); - this.relBuilder = - config - .getRelBuilderFactory() - .create( - cluster, - validator != null - ? validator.getCatalogReader().unwrap(RelOptSchema.class) - : null) - .transform(config.getRelBuilderConfigTransform()); - } - - @Override - protected RelFieldTrimmer newFieldTrimmer() { - return new OpenSearchRelFieldTrimmer(validator, this.relBuilder); - } - } - public static class OpenSearchRelRunners { /** * Runs a relational expression by existing connection. This class copied from {@link @@ -438,7 +407,8 @@ public RelNode visit(TableScan scan) { "The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled" + " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and" + " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats" - + " count() by @timestamp')."); + + " count() by @timestamp').", + e); } throw Util.throwAsRuntime(e); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 8dfe963081..27cb6e233f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -5,9 +5,6 @@ package org.opensearch.sql.calcite.utils; -import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT.EXPR_DATE; -import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT.EXPR_TIME; -import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP; import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.BINARY; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; @@ -38,6 +35,7 @@ import java.util.Locale; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import lombok.Getter; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.rel.type.RelDataType; @@ -45,12 +43,14 @@ import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.type.ExprBinaryType; import org.opensearch.sql.calcite.type.ExprDateType; import org.opensearch.sql.calcite.type.ExprIPType; import org.opensearch.sql.calcite.type.ExprTimeStampType; import org.opensearch.sql.calcite.type.ExprTimeType; +import org.opensearch.sql.calcite.validate.PplTypeCoercionRule; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; @@ -259,7 +259,7 @@ public static String getLegacyTypeName(RelDataType relDataType, QueryType queryT /** Converts a Calcite data type to OpenSearch ExprCoreType. */ public static ExprType convertRelDataTypeToExprType(RelDataType type) { - if (isUserDefinedType(type)) { + if (OpenSearchTypeUtil.isUserDefinedType(type)) { AbstractExprRelDataType udt = (AbstractExprRelDataType) type; return udt.getExprType(); } @@ -330,26 +330,75 @@ public Type getJavaClass(RelDataType type) { return super.getJavaClass(type); } - /** - * Whether a given RelDataType is a user-defined type (UDT) - * - * @param type the RelDataType to check - * @return true if the type is a user-defined type, false otherwise - */ - public static boolean isUserDefinedType(RelDataType type) { - return type instanceof AbstractExprRelDataType; + @Override + public @Nullable RelDataType leastRestrictive(List types) { + // Handle UDTs separately, otherwise the least restrictive type will become VARCHAR + if (types.stream().anyMatch(OpenSearchTypeUtil::isUserDefinedType) + && types.stream().allMatch(Objects::nonNull)) { + int nullCount = 0; + int anyCount = 0; + int nullableCount = 0; + int dateCount = 0; + int timeCount = 0; + int ipCount = 0; + int binaryCount = 0; + int otherCount = 0; + for (RelDataType t : types) { + if (t.isNullable()) { + nullableCount++; + } + if (t.getSqlTypeName() == SqlTypeName.NULL) { + nullCount++; + } else if (t.getSqlTypeName() == SqlTypeName.ANY) { + anyCount++; + } + if (t.getSqlTypeName() == SqlTypeName.OTHER) { + otherCount++; + } + if (OpenSearchTypeUtil.isDate(t)) { + dateCount++; + } else if (OpenSearchTypeUtil.isTime(t)) { + timeCount++; + } else if (OpenSearchTypeUtil.isIp(t)) { + ipCount++; + } else if (OpenSearchTypeUtil.isBinary(t)) { + binaryCount++; + } + } + // When there is ANY, fall through to standard leastRestrictive + if (anyCount == 0) { + RelDataType udt; + boolean nullable = nullableCount > 0 || nullCount > 0; + if (dateCount + nullCount == types.size()) { + udt = createUDT(ExprUDT.EXPR_DATE, nullable); + } else if (timeCount + nullCount == types.size()) { + udt = createUDT(ExprUDT.EXPR_TIME, nullable); + } + // There are cases where UDT IP interleaves with its intermediate SQL type for validation + // OTHER, we check otherCount to patch such cases + else if (ipCount + nullCount == types.size() || otherCount + nullCount == types.size()) { + udt = createUDT(ExprUDT.EXPR_IP, nullable); + } else if (binaryCount + nullCount == types.size()) { + udt = createUDT(ExprUDT.EXPR_BINARY, nullable); + } + // There exists a mix of time, date, and timestamp (and optionally null) + else if (binaryCount == 0 && ipCount == 0) { + udt = createUDT(ExprUDT.EXPR_TIMESTAMP, nullable); + } else { + udt = createSqlType(SqlTypeName.VARCHAR, nullable); + } + return udt; + } + } + RelDataType type = leastRestrictive(types, PplTypeCoercionRule.assignmentInstance()); + // Convert CHAR(precision) to VARCHAR so that results won't be padded + if (type != null && SqlTypeName.CHAR.equals(type.getSqlTypeName())) { + return createSqlType(SqlTypeName.VARCHAR, type.isNullable()); + } + return type; } - /** - * Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types - * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric - * types, and string types (VARCHAR, CHAR). - * - * @param fieldType the RelDataType to check - * @return true if the type is numeric or string, false otherwise - */ public static boolean isNumericType(RelDataType fieldType) { - // Check standard SQL numeric types SqlTypeName sqlType = fieldType.getSqlTypeName(); if (sqlType == SqlTypeName.INTEGER || sqlType == SqlTypeName.BIGINT @@ -361,32 +410,18 @@ public static boolean isNumericType(RelDataType fieldType) { || sqlType == SqlTypeName.REAL) { return true; } - - // Check string types (VARCHAR, CHAR) if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) { return true; } - - // Check for OpenSearch UDT numeric types - if (isUserDefinedType(fieldType)) { + if (OpenSearchTypeUtil.isUserDefinedType(fieldType)) { AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; ExprType udtType = exprType.getExprType(); return ExprCoreType.numberTypes().contains(udtType); } - return false; } - /** - * Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports - * both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME, - * and their timezone variants) and OpenSearch UDT time types. - * - * @param fieldType the RelDataType to check - * @return true if the type is time-based, false otherwise - */ public static boolean isTimeBasedType(RelDataType fieldType) { - // Check standard SQL time types SqlTypeName sqlType = fieldType.getSqlTypeName(); if (sqlType == SqlTypeName.TIMESTAMP || sqlType == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE @@ -395,17 +430,13 @@ public static boolean isTimeBasedType(RelDataType fieldType) { || sqlType == SqlTypeName.TIME_WITH_LOCAL_TIME_ZONE) { return true; } - - // Check for OpenSearch UDT types (EXPR_TIMESTAMP mapped to VARCHAR) - if (isUserDefinedType(fieldType)) { + if (OpenSearchTypeUtil.isUserDefinedType(fieldType)) { AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; ExprType udtType = exprType.getExprType(); return udtType == ExprCoreType.TIMESTAMP || udtType == ExprCoreType.DATE || udtType == ExprCoreType.TIME; } - - // Fallback check if type string contains EXPR_TIMESTAMP return fieldType.toString().contains("EXPR_TIMESTAMP"); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtil.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtil.java new file mode 100644 index 0000000000..e75bf9eced --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtil.java @@ -0,0 +1,225 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; + +/** + * Utility methods for to derive types, containing special handling logics for user-defined-types. + * + * @see SqlTypeUtil utilities used during SQL validation or type derivation. + */ +@UtilityClass +public class OpenSearchTypeUtil { + /** + * Whether a given RelDataType is a user-defined type (UDT) + * + * @param type the RelDataType to check + * @return true if the type is a user-defined type, false otherwise + */ + public static boolean isUserDefinedType(RelDataType type) { + return type instanceof AbstractExprRelDataType; + } + + /** + * Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types + * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric + * types, and string types (VARCHAR, CHAR). + * + * @param fieldType the RelDataType to check + * @return true if the type is numeric or string, false otherwise + */ + public static boolean isNumericOrCharacter(RelDataType fieldType) { + // Check for OpenSearch UDT numeric types + if (isUserDefinedType(fieldType)) { + AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; + ExprType udtType = exprType.getExprType(); + return ExprCoreType.numberTypes().contains(udtType); + } + + // Check standard SQL numeric types & string types (VARCHAR, CHAR) + if (SqlTypeUtil.isNumeric(fieldType) || SqlTypeUtil.isCharacter(fieldType)) { + return true; + } + + return false; + } + + /** + * Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports + * both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME, + * and their timezone variants) and OpenSearch UDT time types. + * + * @param fieldType the RelDataType to check + * @return true if the type is time-based, false otherwise + */ + public static boolean isDatetime(RelDataType fieldType) { + // Check standard SQL time types + if (SqlTypeUtil.isDatetime(fieldType)) { + return true; + } + + // Check for OpenSearch UDT types (EXPR_TIMESTAMP mapped to VARCHAR) + if (isUserDefinedType(fieldType)) { + AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; + ExprType udtType = exprType.getExprType(); + return udtType == ExprCoreType.TIMESTAMP + || udtType == ExprCoreType.DATE + || udtType == ExprCoreType.TIME; + } + + return false; + } + + /** + * Checks whether a {@link RelDataType} represents a date type. + * + *

This method returns true for both Calcite's built-in {@link SqlTypeName#DATE} type and + * OpenSearch's user-defined date type {@link OpenSearchTypeFactory.ExprUDT#EXPR_DATE}. + * + * @param type the type to check + * @return true if the type is a date type (built-in or user-defined), false otherwise + */ + public static boolean isDate(RelDataType type) { + if (isUserDefinedType(type)) { + if (((AbstractExprRelDataType) type).getUdt() == OpenSearchTypeFactory.ExprUDT.EXPR_DATE) { + return true; + } + } + return SqlTypeName.DATE.equals(type.getSqlTypeName()); + } + + /** + * Checks whether a {@link RelDataType} represents a timestamp type. + * + *

This method returns true for both Calcite's built-in {@link SqlTypeName#TIMESTAMP} type and + * OpenSearch's user-defined timestamp type {@link OpenSearchTypeFactory.ExprUDT#EXPR_TIMESTAMP}. + * + * @param type the type to check + * @return true if the type is a timestamp type (built-in or user-defined), false otherwise + */ + public static boolean isTimestamp(RelDataType type) { + if (isUserDefinedType(type)) { + if (((AbstractExprRelDataType) type).getUdt() + == OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP) { + return true; + } + } + return SqlTypeName.TIMESTAMP.equals(type.getSqlTypeName()); + } + + /** + * Checks whether a {@link RelDataType} represents a time type. + * + *

This method returns true for both Calcite's built-in {@link SqlTypeName#TIME} type and + * OpenSearch's user-defined time type {@link OpenSearchTypeFactory.ExprUDT#EXPR_TIME}. + * + * @param type the type to check + * @return true if the type is a time type (built-in or user-defined), false otherwise + */ + public static boolean isTime(RelDataType type) { + if (isUserDefinedType(type)) { + if (((AbstractExprRelDataType) type).getUdt() == OpenSearchTypeFactory.ExprUDT.EXPR_TIME) { + return true; + } + } + return SqlTypeName.TIME.equals(type.getSqlTypeName()); + } + + /** + * This method should be used in place for {@link SqlTypeUtil#isCharacter(RelDataType)} because + * user-defined types also have VARCHAR as their SqlTypeName. + */ + public static boolean isCharacter(RelDataType type) { + return !isUserDefinedType(type) && SqlTypeUtil.isCharacter(type); + } + + /** + * Checks whether a {@link RelDataType} represents an IP address type. + * + *

This method returns true only for OpenSearch's user-defined IP type {@link + * OpenSearchTypeFactory.ExprUDT#EXPR_IP}. + * + * @param type the type to check + * @return true if the type is an IP address type, false otherwise + */ + public static boolean isIp(RelDataType type) { + return isIp(type, false); + } + + /** + * Checks whether a {@link RelDataType} represents an IP address type. If {@code acceptOther} is + * set, {@link SqlTypeName#OTHER} is also accepted as an IP type. + * + *

{@link SqlTypeName#OTHER} is "borrowed" to represent IP type during validation because + * SqlTypeName.IP does not exist + * + * @param type the type to check + * @param acceptOther whether to accept OTHER as a valid IP type + * @return true if the type is an IP address type, false otherwise + */ + public static boolean isIp(RelDataType type, boolean acceptOther) { + if (isUserDefinedType(type)) { + return ((AbstractExprRelDataType) type).getUdt() == OpenSearchTypeFactory.ExprUDT.EXPR_IP; + } + if (acceptOther) { + return type.getSqlTypeName() == SqlTypeName.OTHER; + } + return false; + } + + /** + * Checks whether a {@link RelDataType} represents a binary type. + * + *

This method returns true for both Calcite's built-in binary types (BINARY, VARBINARY) and + * OpenSearch's user-defined binary type {@link OpenSearchTypeFactory.ExprUDT#EXPR_BINARY}. + * + * @param type the type to check + * @return true if the type is a binary type (built-in or user-defined), false otherwise + */ + public static boolean isBinary(RelDataType type) { + if (isUserDefinedType(type)) { + return ((AbstractExprRelDataType) type).getUdt() + == OpenSearchTypeFactory.ExprUDT.EXPR_BINARY; + } + return SqlTypeName.BINARY_TYPES.contains(type.getSqlTypeName()); + } + + /** + * Checks whether a {@link RelDataType} represents a scalar type. + * + *

Scalar types include all primitive and atomic types such as numeric types (INTEGER, BIGINT, + * FLOAT, DOUBLE, DECIMAL), string types (VARCHAR, CHAR), boolean, temporal types (DATE, TIME, + * TIMESTAMP), and special scalar types (IP, BINARY, UUID). + * + *

This method returns false for composite types including: + * + *

    + *
  • STRUCT types (structured records with named fields) + *
  • MAP types (key-value pairs) + *
  • ARRAY and MULTISET types (collections) + *
  • ROW types (tuples) + *
+ * + * @param type the type to check; may be null + * @return true if the type is a scalar type, false if it is a composite type or null + */ + public static boolean isScalar(RelDataType type) { + if (type == null) { + return false; + } + return !type.isStruct() + && !SqlTypeUtil.isMap(type) + && !SqlTypeUtil.isCollection(type) + && !SqlTypeUtil.isRow(type); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index b4e040762a..d8fa17ff05 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -10,7 +10,6 @@ import static org.apache.calcite.rex.RexWindowBounds.UNBOUNDED_PRECEDING; import static org.apache.calcite.rex.RexWindowBounds.following; import static org.apache.calcite.rex.RexWindowBounds.preceding; -import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.isTimeBasedType; import com.google.common.collect.ImmutableList; import java.lang.reflect.Method; @@ -647,7 +646,7 @@ private static boolean isNotNullOnRef(RexNode rex) { agg.getGroupSet().stream() .allMatch( group -> - isTimeBasedType( + OpenSearchTypeUtil.isDatetime( agg.getInput().getRowType().getFieldList().get(group).getType())); static boolean isTimeSpan(RexNode rex) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialect.java b/core/src/main/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialect.java new file mode 100644 index 0000000000..ba7d07b29f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialect.java @@ -0,0 +1,104 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlAlienSystemTypeNameSpec; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlDataTypeSpec; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.dialect.SparkSqlDialect; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.validate.SqlConformance; +import org.apache.calcite.sql.validate.SqlDelegatingConformance; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; + +/** + * Custom Spark SQL dialect that extends Calcite's SparkSqlDialect to handle OpenSearch-specific + * function translations. This dialect ensures that functions are translated to their correct Spark + * SQL equivalents. + */ +public class OpenSearchSparkSqlDialect extends SparkSqlDialect { + + /** Singleton instance of the OpenSearch Spark SQL dialect. */ + public static final OpenSearchSparkSqlDialect DEFAULT = new OpenSearchSparkSqlDialect(); + + private static final Map CALCITE_TO_SPARK_MAPPING = + ImmutableMap.of( + "ARG_MIN", "MIN_BY", + "ARG_MAX", "MAX_BY", + "SAFE_CAST", "TRY_CAST"); + + private static final Map CALL_SEPARATOR = ImmutableMap.of("SAFE_CAST", "AS"); + + private OpenSearchSparkSqlDialect() { + super(DEFAULT_CONTEXT); + } + + @Override + public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { + String operatorName = call.getOperator().getName(); + + // Replace Calcite specific functions with their Spark SQL equivalents + if (CALCITE_TO_SPARK_MAPPING.containsKey(operatorName)) { + unparseFunction( + writer, + call, + CALCITE_TO_SPARK_MAPPING.get(operatorName), + leftPrec, + rightPrec, + CALL_SEPARATOR.getOrDefault(operatorName, ",")); + } else { + super.unparseCall(writer, call, leftPrec, rightPrec); + } + } + + @Override + public @Nullable SqlNode getCastSpec(RelDataType type) { + // ExprIPType has sql type name OTHER, which can not be handled by spark dialect + if (OpenSearchTypeUtil.isIp(type)) { + return new SqlDataTypeSpec( + // It will use SqlTypeName.OTHER by type.getSqlTypeName() as OTHER is "borrowed" to + // represent IP type (see also: PplTypeCoercionRule.java) + new SqlAlienSystemTypeNameSpec("IP", type.getSqlTypeName(), SqlParserPos.ZERO), + SqlParserPos.ZERO); + } + return super.getCastSpec(type); + } + + private void unparseFunction( + SqlWriter writer, + SqlCall call, + String functionName, + int leftPrec, + int rightPrec, + String separator) { + writer.print(functionName); + final SqlWriter.Frame frame = writer.startList("(", ")"); + for (int i = 0; i < call.operandCount(); i++) { + if (i > 0) { + writer.sep(separator); + } + call.operand(i).unparse(writer, 0, rightPrec); + } + writer.endList(frame); + } + + @Override + public SqlConformance getConformance() { + return new SqlDelegatingConformance(super.getConformance()) { + @Override + public boolean isLiberal() { + // This allows SQL feature LEFT ANTI JOIN & LEFT SEMI JOIN + return true; + } + }; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/PplConvertletTable.java b/core/src/main/java/org/opensearch/sql/calcite/validate/PplConvertletTable.java new file mode 100644 index 0000000000..244c5a3ae0 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/PplConvertletTable.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import java.util.HashMap; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql2rel.ReflectiveConvertletTable; +import org.apache.calcite.sql2rel.SqlRexConvertlet; +import org.apache.calcite.sql2rel.StandardConvertletTable; +import org.checkerframework.checker.initialization.qual.UnderInitialization; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; +import org.opensearch.sql.expression.function.PPLBuiltinOperators; + +public class PplConvertletTable extends ReflectiveConvertletTable { + public static PplConvertletTable INSTANCE = new PplConvertletTable(); + private final Map map = new HashMap<>(); + + private PplConvertletTable() { + super(); + registerOperator(SqlStdOperatorTable.EQUALS, ipConvertlet(PPLBuiltinOperators.EQUALS_IP)); + registerOperator( + SqlStdOperatorTable.NOT_EQUALS, ipConvertlet(PPLBuiltinOperators.NOT_EQUALS_IP)); + registerOperator( + SqlStdOperatorTable.GREATER_THAN, ipConvertlet(PPLBuiltinOperators.GREATER_IP)); + registerOperator( + SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, ipConvertlet(PPLBuiltinOperators.GTE_IP)); + registerOperator(SqlStdOperatorTable.LESS_THAN, ipConvertlet(PPLBuiltinOperators.LESS_IP)); + registerOperator( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, ipConvertlet(PPLBuiltinOperators.LTE_IP)); + // ATAN convertlet will be registered once PPLBuiltinOperators.ATAN is added + } + + @Override + public @Nullable SqlRexConvertlet get(SqlCall call) { + SqlRexConvertlet custom = map.get(call.getOperator()); + if (custom != null) return custom; + return StandardConvertletTable.INSTANCE.get(call); + } + + /** Registers a convertlet for a given operator instance. */ + private void registerOperator( + @UnderInitialization PplConvertletTable this, SqlOperator op, SqlRexConvertlet convertlet) { + map.put(op, convertlet); + } + + private SqlRexConvertlet ipConvertlet(SqlOperator substitute) { + return (cx, call) -> { + final RexCall e = (RexCall) StandardConvertletTable.INSTANCE.convertCall(cx, call); + RelDataType type1 = e.getOperands().get(0).getType(); + RelDataType type2 = e.getOperands().get(1).getType(); + if (OpenSearchTypeUtil.isIp(type1) || OpenSearchTypeUtil.isIp(type2)) { + return StandardConvertletTable.INSTANCE.convertFunction(cx, (SqlFunction) substitute, call); + } + return e; + }; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercion.java b/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercion.java new file mode 100644 index 0000000000..9ecbd13d33 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercion.java @@ -0,0 +1,248 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static java.util.Objects.requireNonNull; +import static org.opensearch.sql.calcite.validate.ValidationUtils.createUDTWithAttributes; + +import org.apache.calcite.adapter.java.JavaTypeFactory; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeFactoryImpl; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlCharStringLiteral; +import org.apache.calcite.sql.SqlDynamicParam; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeAssignmentRule; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeMappingRule; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql.validate.SqlValidatorScope; +import org.apache.calcite.sql.validate.implicit.TypeCoercion; +import org.apache.calcite.sql.validate.implicit.TypeCoercionImpl; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.function.PPLBuiltinOperators; + +/** + * Custom type coercion implementation for PPL that extends Calcite's default type coercion with + * additional restrictions. + * + *

This class implements a blacklist approach to prevent certain implicit type conversions that + * are not allowed in PPL semantics. + */ +public class PplTypeCoercion extends TypeCoercionImpl { + + /** + * Creates a custom TypeCoercion instance for PPL. This can be used as a TypeCoercionFactory. + * + * @param typeFactory the type factory + * @param validator the SQL validator + * @return custom PplTypeCoercion instance + */ + public static TypeCoercion create(RelDataTypeFactory typeFactory, SqlValidator validator) { + return new PplTypeCoercion(typeFactory, validator); + } + + public PplTypeCoercion(RelDataTypeFactory typeFactory, SqlValidator validator) { + super(typeFactory, validator); + } + + @Override + public @Nullable RelDataType implicitCast(RelDataType in, SqlTypeFamily expected) { + RelDataType casted = super.implicitCast(in, expected); + if (casted == null) { + // String -> DATETIME is converted to String -> TIMESTAMP + if (OpenSearchTypeUtil.isCharacter(in) && expected == SqlTypeFamily.DATETIME) { + return createUDTWithAttributes(factory, in, OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP); + } + return null; + } + return switch (casted.getSqlTypeName()) { + case SqlTypeName.DATE, SqlTypeName.TIME, SqlTypeName.TIMESTAMP, SqlTypeName.BINARY -> + createUDTWithAttributes(factory, in, casted.getSqlTypeName()); + default -> casted; + }; + } + + /** + * Override super implementation to add special handling for user-defined types (UDTs). Otherwise, + * UDTs will be regarded as character types, invalidating string->datetime casts. + */ + @Override + protected boolean needToCast( + SqlValidatorScope scope, SqlNode node, RelDataType toType, SqlTypeMappingRule mappingRule) { + boolean need = super.needToCast(scope, node, toType, mappingRule); + RelDataType fromType = validator.deriveType(scope, node); + if (OpenSearchTypeUtil.isUserDefinedType(toType) && OpenSearchTypeUtil.isCharacter(fromType)) { + need = true; + } + return need; + } + + @Override + protected boolean dateTimeStringEquality( + SqlCallBinding binding, RelDataType left, RelDataType right) { + if (OpenSearchTypeUtil.isCharacter(left) && OpenSearchTypeUtil.isDatetime(right)) { + // Use user-defined types in place of inbuilt datetime types + RelDataType r = + OpenSearchTypeUtil.isUserDefinedType(right) + ? right + : ValidationUtils.createUDTWithAttributes(factory, right, right.getSqlTypeName()); + return coerceOperandType(binding.getScope(), binding.getCall(), 0, r); + } + if (OpenSearchTypeUtil.isCharacter(right) && OpenSearchTypeUtil.isDatetime(left)) { + RelDataType l = + OpenSearchTypeUtil.isUserDefinedType(left) + ? left + : ValidationUtils.createUDTWithAttributes(factory, left, left.getSqlTypeName()); + return coerceOperandType(binding.getScope(), binding.getCall(), 1, l); + } + return false; + } + + @Override + public @Nullable RelDataType commonTypeForBinaryComparison( + @Nullable RelDataType type1, @Nullable RelDataType type2) { + // Prepend following rules for datetime comparisons: + // - (date, time) -> timestamp + // - (time, timestamp) -> timestamp + // - (ip, string) -> ip + if (type1 != null && type2 != null) { + boolean anyNullable = type1.isNullable() || type2.isNullable(); + if ((SqlTypeUtil.isDate(type1) && OpenSearchTypeUtil.isTime(type2)) + || (OpenSearchTypeUtil.isTime(type1) && SqlTypeUtil.isDate(type2))) { + return factory.createTypeWithNullability( + factory.createSqlType(SqlTypeName.TIMESTAMP), anyNullable); + } + if (OpenSearchTypeUtil.isTime(type1) && SqlTypeUtil.isTimestamp(type2)) { + return factory.createTypeWithNullability(type2, anyNullable); + } + if (SqlTypeUtil.isTimestamp(type1) && OpenSearchTypeUtil.isTime(type2)) { + return factory.createTypeWithNullability(type1, anyNullable); + } + if (OpenSearchTypeUtil.isIp(type1) && OpenSearchTypeUtil.isCharacter(type2)) { + return factory.createTypeWithNullability(type1, anyNullable); + } + if (OpenSearchTypeUtil.isCharacter(type1) && OpenSearchTypeUtil.isIp(type2)) { + return factory.createTypeWithNullability(type2, anyNullable); + } + } + return super.commonTypeForBinaryComparison(type1, type2); + } + + /** + * Cast operand at index {@code index} to target type. we do this base on the fact that validate + * happens before type coercion. + */ + protected boolean coerceOperandType( + @Nullable SqlValidatorScope scope, SqlCall call, int index, RelDataType targetType) { + // Transform the JavaType to SQL type because the SqlDataTypeSpec + // does not support deriving JavaType yet. + if (RelDataTypeFactoryImpl.isJavaType(targetType)) { + targetType = ((JavaTypeFactory) factory).toSql(targetType); + } + + SqlNode operand = call.getOperandList().get(index); + if (operand instanceof SqlDynamicParam) { + // Do not support implicit type coercion for dynamic param. + return false; + } + requireNonNull(scope, "scope"); + RelDataType operandType = validator.deriveType(scope, operand); + if (coerceStringToArray(call, operand, index, operandType, targetType)) { + return true; + } + + // Check it early. + if (!needToCast(scope, operand, targetType, PplTypeCoercionRule.lenientInstance())) { + return false; + } + // Fix up nullable attr. + RelDataType targetType1 = ValidationUtils.syncAttributes(factory, operandType, targetType); + SqlNode desired = castTo(operand, operandType, targetType1); + call.setOperand(index, desired); + // SAFE_CAST always results in nullable return type. See + // SqlCastFunction#createTypeWithNullabilityFromExpr + if (SqlKind.SAFE_CAST.equals(desired.getKind())) { + targetType1 = factory.createTypeWithNullability(targetType1, true); + } + updateInferredType(desired, targetType1); + return true; + } + + /** + * Creates a cast expression from the source node to the target type. + * + *

This method determines whether to use regular CAST or SAFE_CAST based on the following + * rules: + * + *

    + *
  • For user-defined types: use specialized conversion functions + *
  • For non-string literals: use regular CAST (safe, folded at compile time) + *
  • For safe numeric widening (e.g., SMALLINT → INTEGER): use regular CAST (no data loss + * possible) + *
  • For all other cases: use SAFE_CAST to handle malformed values gracefully + *
+ */ + private static SqlNode castTo(SqlNode node, RelDataType sourceType, RelDataType targetType) { + if (OpenSearchTypeUtil.isDatetime(targetType) || OpenSearchTypeUtil.isIp(targetType)) { + ExprType exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(targetType); + return switch (exprType) { + case ExprCoreType.DATE -> + PPLBuiltinOperators.DATE.createCall(node.getParserPosition(), node); + case ExprCoreType.TIMESTAMP -> + PPLBuiltinOperators.TIMESTAMP.createCall(node.getParserPosition(), node); + case ExprCoreType.TIME -> + PPLBuiltinOperators.TIME.createCall(node.getParserPosition(), node); + case ExprCoreType.IP -> PPLBuiltinOperators.IP.createCall(node.getParserPosition(), node); + default -> throw new UnsupportedOperationException("Unsupported type: " + exprType); + }; + } + + SqlOperator cast; + // Use CAST for non-string literals (safe, folded at compile time) + if (node.getKind() == SqlKind.LITERAL && !(node instanceof SqlCharStringLiteral)) { + cast = SqlStdOperatorTable.CAST; + } + // Use CAST for safe numeric widening (no data loss possible, avoids script generation) + else if (isSafeNumericWidening(sourceType, targetType)) { + cast = SqlStdOperatorTable.CAST; + } + // Use SAFE_CAST for all other cases to handle malformed values gracefully + else { + cast = SqlLibraryOperators.SAFE_CAST; + } + return cast.createCall( + node.getParserPosition(), + node, + SqlTypeUtil.convertTypeToSpec(targetType).withNullable(targetType.isNullable())); + } + + /** + * Checks if the cast from sourceType to targetType is a safe numeric widening operation. + * + *

The cast is regarded safe when both types are numeric and the source can be assigned to the + * target. + */ + private static boolean isSafeNumericWidening(RelDataType sourceType, RelDataType targetType) { + if (!SqlTypeUtil.isNumeric(sourceType) || !SqlTypeUtil.isNumeric(targetType)) { + return false; + } + return SqlTypeAssignmentRule.instance() + .canApplyFrom(targetType.getSqlTypeName(), sourceType.getSqlTypeName()); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRule.java b/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRule.java new file mode 100644 index 0000000000..5fe17dd838 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRule.java @@ -0,0 +1,107 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.NonNull; +import org.apache.calcite.sql.type.SqlTypeAssignmentRule; +import org.apache.calcite.sql.type.SqlTypeCoercionRule; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Provides PPL-specific type coercion rules that extend Calcite's default type coercion behavior. + * + *

This class defines additional type mapping rules for PPL, particularly for handling custom + * types like IP addresses and number-to-string coercion. These additional rules are merged with + * Calcite's built-in type coercion rules. + * + *

The additional mappings defined include: + * + *

    + *
  • IP can be coerced to/from string types + *
  • VARCHAR can be coerced from numeric types + *
+ * + *

Three variants of type coercion rules are provided: + * + *

    + *
  • {@link #instance()} - Standard type coercion rules + *
  • {@link #lenientInstance()} - More permissive type coercion rules + *
  • {@link #assignmentInstance()} - Rules for type assignment validation + *
+ * + * @see SqlTypeCoercionRule + * @see PplTypeCoercion + */ +public class PplTypeCoercionRule { + /** + * PPL-specific additional type mapping rules + * + *
    + *
  • IP -> IP + *
  • CHARACTER -> IP + *
  • IP -> CHARACTER + *
  • NUMBER -> VARCHAR + *
+ */ + private static final Map> additionalMapping = + Map.of( + SqlTypeName.OTHER, + ImmutableSet.of(SqlTypeName.OTHER, SqlTypeName.VARCHAR, SqlTypeName.CHAR), + SqlTypeName.VARCHAR, + ImmutableSet.builder() + .add(SqlTypeName.OTHER) + .addAll(SqlTypeName.NUMERIC_TYPES) + .build(), + SqlTypeName.CHAR, + ImmutableSet.of(SqlTypeName.OTHER)); + + private static final SqlTypeCoercionRule INSTANCE = + SqlTypeCoercionRule.instance( + mergeMapping(SqlTypeCoercionRule.instance().getTypeMapping(), additionalMapping)); + private static final SqlTypeCoercionRule LENIENT_INSTANCE = + SqlTypeCoercionRule.instance( + mergeMapping(SqlTypeCoercionRule.lenientInstance().getTypeMapping(), additionalMapping)); + private static final SqlTypeCoercionRule ASSIGNMENT_INSTANCE = + SqlTypeCoercionRule.instance( + mergeMapping(SqlTypeAssignmentRule.instance().getTypeMapping(), additionalMapping)); + + public static SqlTypeCoercionRule instance() { + return INSTANCE; + } + + /** Returns an instance that allows more lenient type coercion. */ + public static SqlTypeCoercionRule lenientInstance() { + return LENIENT_INSTANCE; + } + + /** Rules that determine whether a type is assignable from another type. */ + public static SqlTypeCoercionRule assignmentInstance() { + return ASSIGNMENT_INSTANCE; + } + + private static Map> mergeMapping( + Map> base, Map> addition) { + return Stream.concat(base.entrySet().stream(), addition.entrySet().stream()) + .collect( + Collectors.collectingAndThen( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (b, a) -> { + Set combined = new HashSet<>(b); + combined.addAll(a); + return ImmutableSet.copyOf(combined); + }), + ImmutableMap::copyOf)); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/PplValidator.java b/core/src/main/java/org/opensearch/sql/calcite/validate/PplValidator.java new file mode 100644 index 0000000000..0dd02bbb76 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/PplValidator.java @@ -0,0 +1,204 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static org.opensearch.sql.calcite.validate.ValidationUtils.createUDTWithAttributes; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.jdbc.CalciteSchema; +import org.apache.calcite.prepare.CalciteCatalogReader; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.type.ArraySqlType; +import org.apache.calcite.sql.type.MapSqlType; +import org.apache.calcite.sql.type.MultisetSqlType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql.validate.SqlValidatorCatalogReader; +import org.apache.calcite.sql.validate.SqlValidatorImpl; +import org.apache.calcite.sql.validate.SqlValidatorScope; +import org.apache.calcite.tools.FrameworkConfig; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; +import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; + +/** + * Custom SQL validator for PPL queries. + * + *

This validator extends Calcite's default SqlValidatorImpl to provide PPL-specific validation + * behavior. Currently, it uses the default implementation but can be extended in the future to add + * PPL-specific validation rules. + */ +public class PplValidator extends SqlValidatorImpl { + /** + * Tracks whether the current deriveType call is at the top level (true) or a recursive call + * (false). Top-level calls return user-defined types, while recursive calls return SQL types for + * internal validation. + */ + private boolean top; + + /** + * Creates a SqlValidator configured for PPL validation. + * + * @param frameworkConfig Framework configuration + * @param operatorTable SQL operator table to use for validation + * @param typeFactory Type factory for creating data types + * @param validatorConfig Validator configuration settings + * @return configured SqlValidator instance + */ + public static PplValidator create( + FrameworkConfig frameworkConfig, + SqlOperatorTable operatorTable, + RelDataTypeFactory typeFactory, + SqlValidator.Config validatorConfig) { + SchemaPlus defaultSchema = + Objects.requireNonNull(frameworkConfig.getDefaultSchema(), "defaultSchema"); + + final CalciteSchema schema = CalciteSchema.from(defaultSchema); + CalciteCatalogReader catalogReader = + new CalciteCatalogReader( + schema.root(), schema.path(null), typeFactory, CalciteConnectionConfig.DEFAULT); + return new PplValidator(operatorTable, catalogReader, typeFactory, validatorConfig); + } + + /** + * Creates a PPL validator. + * + * @param opTab Operator table containing PPL operators + * @param catalogReader Catalog reader for accessing schema information + * @param typeFactory Type factory for creating type information + * @param config Validator configuration + */ + protected PplValidator( + SqlOperatorTable opTab, + SqlValidatorCatalogReader catalogReader, + RelDataTypeFactory typeFactory, + Config config) { + super(opTab, catalogReader, typeFactory, config); + top = true; + } + + /** + * Overrides the deriveType method to map user-defined types (UDTs) to SqlTypes so that they can + * be validated + */ + @Override + public RelDataType deriveType(SqlValidatorScope scope, SqlNode expr) { + // The type has to be sql type during type derivation & validation + boolean original = top; + top = false; + RelDataType type = super.deriveType(scope, expr); + top = original; + if (top) { + return sqlTypeToUserDefinedType(type); + } + return userDefinedTypeToSqlType(type); + } + + @Override + public @Nullable RelDataType getValidatedNodeTypeIfKnown(SqlNode node) { + RelDataType type = super.getValidatedNodeTypeIfKnown(node); + return sqlTypeToUserDefinedType(type); + } + + /** + * Disable nullary call to not confuse with field reference. + * + *

It was originally designed for function calls that have no arguments and require no + * parentheses (for example "CURRENT_USER"). However, PPL does not have such use cases. Besides, + * as nullary calls are resolved before field reference, this will make field references with name + * like USER, LOCALTIME to function calls in an unwanted but subtle way. + * + * @see SqlValidatorImpl.Expander#visit(SqlIdentifier) + */ + @Override + public @Nullable SqlCall makeNullaryCall(SqlIdentifier id) { + return null; + } + + private RelDataType userDefinedTypeToSqlType(RelDataType type) { + return convertType( + type, + t -> { + if (OpenSearchTypeUtil.isUserDefinedType(t)) { + AbstractExprRelDataType exprType = (AbstractExprRelDataType) t; + ExprType udtType = exprType.getExprType(); + OpenSearchTypeFactory typeFactory = (OpenSearchTypeFactory) this.getTypeFactory(); + return switch (udtType) { + case ExprCoreType.TIMESTAMP -> + typeFactory.createSqlType(SqlTypeName.TIMESTAMP, t.isNullable()); + case ExprCoreType.TIME -> typeFactory.createSqlType(SqlTypeName.TIME, t.isNullable()); + case ExprCoreType.DATE -> typeFactory.createSqlType(SqlTypeName.DATE, t.isNullable()); + case ExprCoreType.BINARY -> + typeFactory.createSqlType(SqlTypeName.BINARY, t.isNullable()); + case ExprCoreType.IP -> UserDefinedFunctionUtils.NULLABLE_IP_UDT; + default -> t; + }; + } + return t; + }); + } + + private RelDataType sqlTypeToUserDefinedType(RelDataType type) { + return convertType( + type, + t -> { + OpenSearchTypeFactory typeFactory = (OpenSearchTypeFactory) this.getTypeFactory(); + return switch (t.getSqlTypeName()) { + case TIMESTAMP -> createUDTWithAttributes(typeFactory, t, ExprUDT.EXPR_TIMESTAMP); + case TIME -> createUDTWithAttributes(typeFactory, t, ExprUDT.EXPR_TIME); + case DATE -> createUDTWithAttributes(typeFactory, t, ExprUDT.EXPR_DATE); + case BINARY -> createUDTWithAttributes(typeFactory, t, ExprUDT.EXPR_BINARY); + default -> t; + }; + }); + } + + private RelDataType convertType(RelDataType type, Function convert) { + if (type == null) return null; + + if (type instanceof RelRecordType recordType) { + List subTypes = + recordType.getFieldList().stream().map(RelDataTypeField::getType).map(convert).toList(); + return typeFactory.createTypeWithNullability( + typeFactory.createStructType(subTypes, recordType.getFieldNames()), + recordType.isNullable()); + } + if (type instanceof ArraySqlType arrayType) { + return typeFactory.createTypeWithNullability( + typeFactory.createArrayType(convert.apply(arrayType.getComponentType()), -1), + arrayType.isNullable()); + } + if (type instanceof MapSqlType mapType) { + return typeFactory.createTypeWithNullability( + typeFactory.createMapType( + convert.apply(mapType.getKeyType()), convert.apply(mapType.getValueType())), + mapType.isNullable()); + } + if (type instanceof MultisetSqlType multisetType) { + return typeFactory.createTypeWithNullability( + typeFactory.createMultisetType(convert.apply(multisetType.getComponentType()), -1), + multisetType.isNullable()); + } + + return convert.apply(type); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/SqlOperatorTableProvider.java b/core/src/main/java/org/opensearch/sql/calcite/validate/SqlOperatorTableProvider.java new file mode 100644 index 0000000000..e041c45936 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/SqlOperatorTableProvider.java @@ -0,0 +1,25 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import org.apache.calcite.sql.SqlOperatorTable; + +/** + * Provider interface for obtaining SqlOperatorTable instances. + * + *

This interface breaks the circular dependency between core and opensearch modules by allowing + * the opensearch module to provide its operator table implementation to the core module through + * dependency injection. + */ +@FunctionalInterface +public interface SqlOperatorTableProvider { + /** + * Gets the SQL operator table to use for validation and query processing. + * + * @return SqlOperatorTable instance + */ + SqlOperatorTable getOperatorTable(); +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/ValidationUtils.java b/core/src/main/java/org/opensearch/sql/calcite/validate/ValidationUtils.java new file mode 100644 index 0000000000..2a90af230f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/ValidationUtils.java @@ -0,0 +1,165 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import java.nio.charset.Charset; +import java.util.List; +import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCollation; +import org.apache.calcite.sql.type.NonNullableAccessors; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; + +@UtilityClass +public class ValidationUtils { + /** + * Sync the nullability, collation, etc. to the target type. Copied from {@link + * org.apache.calcite.sql.validate.implicit.AbstractTypeCoercion} + */ + public static RelDataType syncAttributes( + RelDataTypeFactory factory, RelDataType fromType, RelDataType toType) { + RelDataType syncedType = toType; + if (fromType != null) { + syncedType = factory.createTypeWithNullability(syncedType, fromType.isNullable()); + if (SqlTypeUtil.inCharOrBinaryFamilies(fromType) + && SqlTypeUtil.inCharOrBinaryFamilies(toType)) { + Charset charset = fromType.getCharset(); + if (charset != null && SqlTypeUtil.inCharFamily(syncedType)) { + SqlCollation collation = NonNullableAccessors.getCollation(fromType); + syncedType = factory.createTypeWithCharsetAndCollation(syncedType, charset, collation); + } + } + } + return syncedType; + } + + /** + * Creates a user-defined type with attributes (nullability, charset, collation) copied from + * another type. + * + * @param factory the type factory used to create the UDT + * @param fromType the source type to copy attributes from (nullability, charset, collation) + * @param userDefinedType the user-defined type to create + * @return a new RelDataType representing the UDT with attributes from fromType + */ + public static RelDataType createUDTWithAttributes( + RelDataTypeFactory factory, + RelDataType fromType, + OpenSearchTypeFactory.ExprUDT userDefinedType) { + if (!(factory instanceof OpenSearchTypeFactory typeFactory)) { + throw new IllegalArgumentException("factory must be an instance of OpenSearchTypeFactory"); + } + RelDataType type = typeFactory.createUDT(userDefinedType); + return syncAttributes(typeFactory, fromType, type); + } + + /** + * Creates a user-defined type by mapping a SQL type name to the corresponding UDT, with + * attributes copied from another type. + * + * @param factory the type factory used to create the UDT + * @param fromType the source type to copy attributes from + * @param sqlTypeName the SQL type name to map to a UDT (DATE, TIME, TIMESTAMP, or BINARY) + * @return a new RelDataType representing the UDT with attributes from fromType + * @throws IllegalArgumentException if the sqlTypeName is not supported + */ + public static RelDataType createUDTWithAttributes( + RelDataTypeFactory factory, RelDataType fromType, SqlTypeName sqlTypeName) { + return switch (sqlTypeName) { + case SqlTypeName.DATE -> + createUDTWithAttributes(factory, fromType, OpenSearchTypeFactory.ExprUDT.EXPR_DATE); + case SqlTypeName.TIME -> + createUDTWithAttributes(factory, fromType, OpenSearchTypeFactory.ExprUDT.EXPR_TIME); + case SqlTypeName.TIMESTAMP -> + createUDTWithAttributes(factory, fromType, OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP); + case SqlTypeName.BINARY -> + createUDTWithAttributes(factory, fromType, OpenSearchTypeFactory.ExprUDT.EXPR_BINARY); + default -> throw new IllegalArgumentException("Unsupported type: " + sqlTypeName); + }; + } + + /** + * Special handling for nested window functions that fail validation due to a Calcite bug. + * + *

This method provides a workaround for 2 issues in Calcite v1.41 + * + *

1. where nested window functions within CASE expressions fail validation incorrectly. Only + * {@code CalcitePPLEventstatsIT#testMultipleEventstatsWithNullBucket} should be caught by this + * case. + * + *

Calcite Bug (v1.41): The {@code SqlImplementor.Result#containsOver()} method at + * SqlImplementor.java:L2145 only checks {@code SqlBasicCall} nodes for window functions, missing + * other {@code SqlCall} subclasses like {@code SqlCase}. This causes it to fail at detecting + * window functions inside CASE expressions. + * + *

Impact: When nested window functions exist (e.g., from double eventstats), Calcite's + * {@code RelToSqlConverter} doesn't create the necessary subquery boundary because {@code + * containsOver()} returns false for expressions like: + * + *

+   * CASE WHEN ... THEN (SUM(age) OVER (...)) END
+   * 
+ * + *

This results in invalid SQL with nested aggregations: + * + *

+   * SUM(CASE WHEN ... THEN (SUM(age) OVER (...)) END) OVER (...)
+   * 
+ * + * 2. Projections containing OVER as function operands are not moved down to subqueries. This + * should catch test case {@code CalciteExplainIT.noPushDownForAggOnWindow} + * + *

The check {@code needNewSubquery} at {@link + * org.apache.calcite.rel.rel2sql.SqlImplementor}#L1930 should return true for the following plan + * as it contains window functions nested inside a function call, which should be in an inner + * query if further aggregation is performed on top of it. + * + *

+   * LogicalProject(age_str=[WIDTH_BUCKET(SAFE_CAST($8), 3, -(MAX(SAFE_CAST($8)) OVER (), MIN(SAFE_CAST($8)) OVER ()), MAX(SAFE_CAST($8)) OVER ())])
+   *   CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
+   * 
+ * + * As a result, it creates a SQL where window functions are in group-by: + * + *
+   * GROUP BY WIDTH_BUCKET(...MAX(...) OVER ()...)
+   * 
+ * + * Ideally, it should have created a SQL like the following for test case noPushDownForAggOnWindow + * + *
+   *   SELECT COUNT(*) AS `count()`, `age_str`
+   *   FROM (
+   *     SELECT WIDTH_BUCKET(
+   *       SAFE_CAST(`age` AS STRING),
+   *       3,
+   *       (MAX(SAFE_CAST(`age` AS STRING)) OVER (...)) - (MIN(SAFE_CAST(`age` AS STRING)) OVER (...)),
+   *       MAX(SAFE_CAST(`age` AS STRING)) OVER (...)
+   *     ) AS `age_str`
+   *     FROM `OpenSearch`.`opensearch-sql_test_index_account`
+   *   ) subquery
+   *   GROUP BY `age_str`
+   *   ORDER BY 2
+   * 
+ * + *

TODO: Remove this workaround when upgrading to a Calcite version that fixes the bugs. + * + * @param e the exception to check + * @return {@code true} if the exception should be tolerated as a known Calcite bug, {@code false} + * otherwise + */ + public static boolean tolerantValidationException(Exception e) { + List acceptableErrorMessages = + List.of( + "Aggregate expressions cannot be nested", + "Windowed aggregate expression is illegal in GROUP BY clause"); + return e.getMessage() != null + && acceptableErrorMessages.stream().anyMatch(e.getMessage()::contains); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchRelToSqlConverter.java b/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchRelToSqlConverter.java new file mode 100644 index 0000000000..0896ce12dc --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchRelToSqlConverter.java @@ -0,0 +1,172 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.converters; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.hint.RelHint; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.sql.JoinConditionType; +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlDialect; +import org.apache.calcite.sql.SqlHint; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.parser.SqlParserPos; + +/** + * An extension of {@link RelToSqlConverter} to convert a relation algebra tree, translated from a + * PPL query, into a SQL statement. + * + *

This converter is used during the validation phase to convert RelNode back to SqlNode for + * validation and type checking using Calcite's SqlValidator. + */ +public class OpenSearchRelToSqlConverter extends RelToSqlConverter { + /** + * Creates a RelToSqlConverter for PPL. + * + * @param dialect the SQL dialect to use for conversion + */ + public OpenSearchRelToSqlConverter(SqlDialect dialect) { + super(dialect); + } + + /** Override Correlate visitor to pass on join type */ + @Override + public Result visit(Correlate e) { + Result result = super.visit(e); + SqlNode from = result.asSelect().getFrom(); + if (e.getJoinType() != JoinRelType.INNER && from instanceof SqlJoin join) { + JoinType joinType; + try { + joinType = JoinType.valueOf(e.getJoinType().name()); + } catch (IllegalArgumentException ignored) { + return result; + } + join.setOperand(2, joinType.symbol(POS)); + // INNER, LEFT, RIGHT, FULL, or ASOF join requires a condition + // Use ON TRUE to satisfy SQL syntax because the actual correlation condition logic is inside + // the subquery's WHERE clause + join.setOperand(4, JoinConditionType.ON.symbol(POS)); + join.setOperand(5, SqlLiteral.createBoolean(true, POS)); + } + return result; + } + + /** + * Override to convert ANTI and SEMI joins to Spark SQL's native LEFT ANTI JOIN and LEFT SEMI JOIN + * syntax, instead of using NOT EXISTS / EXISTS subqueries. + * + *

The default implementation in {@link RelToSqlConverter#visitAntiOrSemiJoin} converts + * ANTI/SEMI joins to standard SQL using NOT EXISTS / EXISTS subqueries. However, a subtle bug in + * calcite (as of Calcite 1.41) leads to incorrect results after the conversion: correlation + * variables in the subquery are generated as unqualified identifiers. + * + *

For example: + * + *

{@code
+   * -- Base implementation generates:
+   * SELECT ... FROM table1 AS t0
+   * WHERE ... AND NOT EXISTS (
+   *   SELECT 1 FROM table2 AS t2
+   *   WHERE name = t2.name    -- 'name' is unqualified!
+   * )
+   * }
+ * + *

The unqualified {@code name} is resolved to the inner scope (t2.name) instead of the outer + * scope (t0.name), resulting in incorrect results. + * + *

The override implementation uses ANTI / SEMI join syntax: + * + *

{@code
+   * SELECT ... FROM table1 AS t0
+   * LEFT ANTI JOIN table2 AS t2 ON t0.name = t2.name
+   * }
+ */ + @Override + protected Result visitAntiOrSemiJoin(Join e) { + final Result leftResult = visitInput(e, 0).resetAlias(); + final Result rightResult = visitInput(e, 1).resetAlias(); + final Context leftContext = leftResult.qualifiedContext(); + final Context rightContext = rightResult.qualifiedContext(); + + JoinType joinType = + e.getJoinType() == JoinRelType.ANTI ? JoinType.LEFT_ANTI_JOIN : JoinType.LEFT_SEMI_JOIN; + SqlNode sqlCondition = convertConditionToSqlNode(e.getCondition(), leftContext, rightContext); + SqlNode join = + new SqlJoin( + POS, + leftResult.asFrom(), + SqlLiteral.createBoolean(false, POS), + joinType.symbol(POS), // LEFT ANTI JOIN or LEFT SEMI JOIN + rightResult.asFrom(), + JoinConditionType.ON.symbol(POS), + sqlCondition); + + return result(join, leftResult, rightResult); + } + + @Override + public Result visit(Aggregate e) { + Result r = super.visit(e); + if (!e.getHints().isEmpty()) { + List hints = + e.getHints().stream() + .map(relHint -> (SqlNode) toSqlHint(relHint, POS)) + .collect(Collectors.toCollection(ArrayList::new)); + r.asSelect().setHints(SqlNodeList.of(POS, hints)); + } + return r; + } + + /** + * Converts a RelHint to a SqlHint. + * + *

Copied from {@link RelToSqlConverter#toSqlHint(RelHint, SqlParserPos)} (as Calcite 1.41) as + * it is private there + */ + private static SqlHint toSqlHint(RelHint hint, SqlParserPos pos) { + if (hint.kvOptions != null) { + return new SqlHint( + pos, + new SqlIdentifier(hint.hintName, pos), + SqlNodeList.of( + pos, + hint.kvOptions.entrySet().stream() + .flatMap( + e -> + Stream.of( + new SqlIdentifier(e.getKey(), pos), + SqlLiteral.createCharString(e.getValue(), pos))) + .collect(Collectors.toList())), + SqlHint.HintOptionFormat.KV_LIST); + } else if (hint.listOptions != null) { + return new SqlHint( + pos, + new SqlIdentifier(hint.hintName, pos), + SqlNodeList.of( + pos, + hint.listOptions.stream() + .map(e -> SqlLiteral.createCharString(e, pos)) + .collect(Collectors.toList())), + SqlHint.HintOptionFormat.LITERAL_LIST); + } + return new SqlHint( + pos, + new SqlIdentifier(hint.hintName, pos), + SqlNodeList.EMPTY, + SqlHint.HintOptionFormat.EMPTY); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchSqlToRelConverter.java b/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchSqlToRelConverter.java new file mode 100644 index 0000000000..a1d7d0d6bd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/converters/OpenSearchSqlToRelConverter.java @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.converters; + +import java.util.List; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.sql2rel.SqlRexConvertletTable; +import org.apache.calcite.sql2rel.SqlToRelConverter; +import org.apache.calcite.tools.RelBuilder; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.calcite.utils.OpenSearchRelFieldTrimmer; + +public class OpenSearchSqlToRelConverter extends SqlToRelConverter { + protected final RelBuilder relBuilder; + + public OpenSearchSqlToRelConverter( + RelOptTable.ViewExpander viewExpander, + @Nullable SqlValidator validator, + Prepare.CatalogReader catalogReader, + RelOptCluster cluster, + SqlRexConvertletTable convertletTable, + Config config) { + super(viewExpander, validator, catalogReader, cluster, convertletTable, config); + this.relBuilder = + config + .getRelBuilderFactory() + .create( + cluster, + validator != null ? validator.getCatalogReader().unwrap(RelOptSchema.class) : null) + .transform(config.getRelBuilderConfigTransform()); + } + + @Override + protected RelFieldTrimmer newFieldTrimmer() { + return new OpenSearchRelFieldTrimmer(validator, this.relBuilder); + } + + /** + * Override to support Spark SQL's LEFT ANTI JOIN and LEFT SEMI JOIN conversion to RelNode. + * + *

The default implementation in {@link SqlToRelConverter#convertJoinType} does not expect + * LEFT_ANTI_JOIN and LEFT_SEMI_JOIN. This override works around the limitation by first + * temporarily changing LEFT_ANTI_JOIN/LEFT_SEMI_JOIN to LEFT join in the SqlJoin node, then + * calling {@code super.convertFrom()} to perform normal conversion, finally substituting the join + * type in the resulting RelNode to ANTI/SEMI. + * + * @param bb Scope within which to resolve identifiers + * @param from FROM clause of a query. + * @param fieldNames Field aliases, usually come from AS clause, or null + */ + @Override + protected void convertFrom( + Blackboard bb, @Nullable SqlNode from, @Nullable List fieldNames) { + JoinType originalJoinType = null; + if (from instanceof SqlJoin join) { + JoinType joinType = join.getJoinType(); + if (joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.LEFT_ANTI_JOIN) { + join.setOperand(2, JoinType.LEFT.symbol(from.getParserPosition())); + originalJoinType = joinType; + } + } + super.convertFrom(bb, from, fieldNames); + if (originalJoinType != null) { + RelNode root = bb.root(); + if (root != null) { + JoinRelType correctJoinType = + originalJoinType == JoinType.LEFT_SEMI_JOIN ? JoinRelType.SEMI : JoinRelType.ANTI; + RelNode fixedRoot = modifyJoinType(root, correctJoinType); + bb.setRoot(fixedRoot, false); + } + } + } + + private RelNode modifyJoinType(RelNode root, JoinRelType correctJoinType) { + if (root instanceof LogicalProject project) { + RelNode input = project.getInput(); + RelNode fixedInput = modifyJoinType(input, correctJoinType); + if (fixedInput != input) { + return project.copy( + project.getTraitSet(), fixedInput, project.getProjects(), project.getRowType()); + } + } else if (root instanceof LogicalJoin join) { + if (join.getJoinType() == JoinRelType.LEFT) { + return join.copy( + join.getTraitSet(), + join.getCondition(), + join.getLeft(), + join.getRight(), + correctJoinType, + join.isSemiJoinDone()); + } + } + return root; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttle.java b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttle.java new file mode 100644 index 0000000000..1f057fcd72 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttle.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import java.math.BigDecimal; +import java.util.List; +import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttleImpl; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * A RelShuttle that recursively visits all RelNodes and their RexNode expressions to fix interval + * literals and float literal before/after SQL conversion. + * + *

This shuttle extends RelShuttleImpl to ensure it visits the entire RelNode tree recursively, + * applying the interval literal fixes at each node. + */ +public class PplRelToSqlRelShuttle extends RelShuttleImpl { + private final RexShuttle rexShuttle; + + public PplRelToSqlRelShuttle(RexBuilder rexBuilder, boolean forward) { + this.rexShuttle = + new RexShuttle() { + /** + * This visitor fixes: 1. float literal: when converting logical plan to sql node, float + * information is missing. All floats will be treated as double. A compulsory cast is + * inserted here to ensure a cast presents in the generated SQL 2. interval literal: we + * create and read the interval literal in a different way that how Calcite originally + * expected it to be. + */ + @Override + public RexNode visitLiteral(RexLiteral literal) { + // 1. Fix float literal + SqlTypeName literalType = literal.getType().getSqlTypeName(); + if (SqlTypeName.REAL.equals(literalType) || SqlTypeName.FLOAT.equals(literalType)) { + return rexBuilder.makeCall( + literal.getType(), SqlLibraryOperators.SAFE_CAST, List.of(literal)); + } + + // 2. Fix interval literal + SqlIntervalQualifier qualifier = literal.getType().getIntervalQualifier(); + if (qualifier == null) { + return literal; + } + BigDecimal value = literal.getValueAs(BigDecimal.class); + if (value == null) { + return literal; + } + TimeUnit unit = qualifier.getUnit(); + // An ad-hoc fix to a Calcite bug in RexLiteral#intervalString -- quarter type does not + // exist in SqlTypeName, rendering it return number of months instead of number of + // quarters. + BigDecimal forwardMultiplier = + TimeUnit.QUARTER.equals(unit) ? BigDecimal.valueOf(1) : unit.multiplier; + + // QUARTER intervals are stored as INTERVAL_MONTH in Calcite's type system + // but the qualifier preserves the actual unit (QUARTER vs MONTH). + // The multiplier for QUARTER is 3 (months), for MONTH is 1. + BigDecimal newValue = + forward + ? value.multiply(forwardMultiplier) + : value.divideToIntegralValue(unit.multiplier); + return rexBuilder.makeIntervalLiteral(newValue, qualifier); + } + }; + } + + @Override + protected RelNode visitChild(RelNode parent, int i, RelNode child) { + RelNode newChild = super.visitChild(parent, i, child); + return newChild.accept(rexShuttle); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttle.java b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttle.java new file mode 100644 index 0000000000..90a34f8234 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttle.java @@ -0,0 +1,175 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import java.util.List; +import java.util.function.Predicate; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttleImpl; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlKind; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; + +/** + * A RelShuttle that detects if validation should be skipped for certain operations. Currently, it + * detects the following patterns: + * + *

    + *
  • Binning on datetime types, which is only executable after pushdown. + *
  • Aggregates with multiple complex CASE statements, which cause field reference issues during + * the SQL-to-Rel conversion. + *
  • LogicalValues is used to populate empty row values + *
+ * + * Group by multiple CASE statements + * + *

When grouping by multiple CASE expressions, a Calcite 1.41 bug causes field references to + * become invalid during SQL-to-Rel conversion. This affects queries in {@code + * testCaseCanBePushedDownAsCompositeRangeQuery} 2.4 and {@code testCaseCanBePushedDownAsRangeQuery} + * 1.3. E.g. for the following query: + * + *

{@code
+ * source=opensearch-sql_test_index_bank
+ * | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100'),
+ *        balance_range = case(balance < 20000, 'medium' else 'high')
+ * | stats avg(balance) as avg_balance by age_range, balance_range
+ * }
+ * + *

During validation, this PPL query is converted to SQL: + * + *

{@code
+ * SELECT AVG(`balance`) AS `avg_balance`,
+ *        CASE WHEN `age` < 30 THEN 'u30' WHEN `age` < 40 THEN 'u40' ELSE 'u100' END AS `age_range`,
+ *        CASE WHEN `balance` < 20000 THEN 'medium' ELSE 'high' END AS `balance_range`
+ * FROM `OpenSearch`.`opensearch-sql_test_index_bank`
+ * GROUP BY CASE WHEN `age` < 30 THEN 'u30' WHEN `age` < 40 THEN 'u40' ELSE 'u100' END,
+ *          CASE WHEN `balance` < 20000 THEN 'medium' ELSE 'high' END
+ * }
+ * + *

When Calcite converts this SQL back to RelNode, it processes GROUP BY expressions + * sequentially, making field references in the second CASE expression invalid. + * + *

Generate empty row with LogicalValues + * + *

Types in the rows generated with {@code VALUES} will not be preserved, causing validation + * issues when converting SQL back to a logical plan. + * + *

For example, in {@code CalcitePPLAggregationIT.testSumEmpty}, the query {@code + * source=opensearch-sql_test_index_bank_with_null_values | where 1=2 | stats sum(balance)} will be + * converted to the following SQL: + * + *

{@code
+ * SELECT SUM(CAST(`balance` AS DECIMAL(38, 19))) AS `sum(balance)`
+ * FROM (VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) AS `t` (`account_number`, `firstname`, `address`, `balance`, `gender`, `age`, `lastname`, `_id`, `_index`, `_score`, `_maxscore`, `_sort`, `_routing`)
+ * WHERE 1 = 0
+ * }
+ * + * When converted back to logical plan, {@code CAST(`balance` AS DECIMAL(38, 19))} will fail because + * the type of balance is lost. + * + *

Note for developers: when validations fail during developing new features, please try + * to solve the root cause instead of adding skipping rules here. Under rare cases when you have to + * skip validation, please document the exact reason. + * + *

WARNING: When a skip pattern is detected, we bypass the entire validation pipeline, + * skipping potentially useful transformation relying on rewriting SQL node + */ +public class SkipRelValidationShuttle extends RelShuttleImpl { + private boolean shouldSkip = false; + private final RexShuttle rexShuttle; + + /** Predicates about patterns of calls that should not be validated. */ + public static final List> SKIP_CALLS; + + /** Predicates about logical aggregates that should not be validated */ + public static final List> SKIP_AGGREGATES; + + /** Predicates about logical values that should not be validated */ + public static final List> SKIP_VALUES; + + static { + // TODO: Make incompatible operations like bin-on-timestamp a validatable UDFs so that they can + // be still be converted to SqlNode and back to RelNode + Predicate binOnTimestamp = + call -> { + if ("WIDTH_BUCKET".equalsIgnoreCase(call.getOperator().getName())) { + if (!call.getOperands().isEmpty()) { + RexNode firstOperand = call.getOperands().get(0); + return OpenSearchTypeUtil.isDatetime(firstOperand.getType()); + } + } + return false; + }; + Predicate groupByMultipleCases = + aggregate -> { + if (aggregate.getGroupCount() > 1 + && aggregate.getInput() instanceof LogicalProject project) { + long nGroupByCase = + project.getProjects().stream().filter(p -> p.isA(SqlKind.CASE)).count(); + return nGroupByCase > 1; + } + return false; + }; + Predicate createEmptyRow = values -> values.getTuples().isEmpty(); + SKIP_CALLS = List.of(binOnTimestamp); + SKIP_AGGREGATES = List.of(groupByMultipleCases); + SKIP_VALUES = List.of(createEmptyRow); + } + + @Override + public RelNode visit(LogicalAggregate aggregate) { + for (Predicate skipAgg : SKIP_AGGREGATES) { + if (skipAgg.test(aggregate)) { + shouldSkip = true; + return aggregate; + } + } + return super.visit(aggregate); + } + + @Override + public RelNode visit(LogicalValues values) { + for (Predicate skipValues : SKIP_VALUES) { + if (skipValues.test(values)) { + shouldSkip = true; + return values; + } + } + return super.visit(values); + } + + public SkipRelValidationShuttle() { + this.rexShuttle = + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + for (Predicate skipCall : SKIP_CALLS) { + if (skipCall.test(call)) { + shouldSkip = true; + return call; + } + } + return super.visitCall(call); + } + }; + } + + /** Returns true if validation should be skipped based on detected conditions. */ + public boolean shouldSkipValidation() { + return shouldSkip; + } + + @Override + protected RelNode visitChild(RelNode parent, int i, RelNode child) { + RelNode newChild = super.visitChild(parent, i, child); + return newChild.accept(rexShuttle); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttle.java b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttle.java new file mode 100644 index 0000000000..d2a2f22914 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttle.java @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import java.util.Collections; +import java.util.List; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.fun.SqlCountAggFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.util.SqlShuttle; +import org.opensearch.sql.calcite.OpenSearchSchema; + +public class SqlRewriteShuttle extends SqlShuttle { + @Override + public SqlNode visit(SqlIdentifier id) { + // Remove database qualifier, keeping only table name + if (id.names.size() == 2 && OpenSearchSchema.OPEN_SEARCH_SCHEMA_NAME.equals(id.names.get(0))) { + return new SqlIdentifier(Collections.singletonList(id.names.get(1)), id.getParserPosition()); + } + return super.visit(id); + } + + @Override + public @org.checkerframework.checker.nullness.qual.Nullable SqlNode visit(SqlCall call) { + if (call.getOperator() instanceof SqlCountAggFunction && call.getOperandList().isEmpty()) { + // Convert COUNT() to COUNT(*) so that SqlCall.isCountStar() resolves to True + // This is useful when deriving the return types in SqlCountAggFunction#deriveType + call = + new SqlBasicCall( + SqlStdOperatorTable.COUNT, + List.of(SqlIdentifier.STAR), + call.getParserPosition(), + call.getFunctionQuantifier()); + } else if (call.getKind() == SqlKind.IN || call.getKind() == SqlKind.NOT_IN) { + // Fix for tuple IN / NOT IN queries: Convert SqlNodeList to ROW SqlCall + // + // When RelToSqlConverter converts a tuple expression like (id, name) back to + // SqlNode, it generates a bare SqlNodeList instead of wrapping it in a ROW + // operator. This causes validation to fail because: + // 1. SqlValidator.deriveType() doesn't know how to handle SqlNodeList + // 2. SqlToRelConverter.visit(SqlNodeList) throws UnsupportedOperationException + // + // For example, the query: + // WHERE (id, name) NOT IN (SELECT uid, name FROM ...) + // + // After Rel-to-SQL conversion becomes: + // IN operator with operands: [SqlNodeList[id, name], SqlSelect[...]] + // + // But it should be: + // IN operator with operands: [ROW(id, name), SqlSelect[...]] + // + // This fix wraps the SqlNodeList in a ROW SqlCall before validation, + // ensuring proper type derivation and subsequent SQL-to-Rel conversion. + if (!call.getOperandList().isEmpty() + && call.getOperandList().get(0) instanceof SqlNodeList nodes) { + call.setOperand(0, SqlStdOperatorTable.ROW.createCall(nodes)); + } + } + return super.visit(call); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index bebd50a5e8..51ada59eb5 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -6,6 +6,7 @@ package org.opensearch.sql.executor; import java.util.List; +import java.util.Objects; import java.util.Optional; import javax.annotation.Nullable; import lombok.AllArgsConstructor; @@ -14,13 +15,19 @@ import lombok.extern.log4j.Log4j2; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.prepare.CalciteCatalogReader; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.rel.rel2sql.SqlImplementor; import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; @@ -34,6 +41,13 @@ import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; +import org.opensearch.sql.calcite.validate.OpenSearchSparkSqlDialect; +import org.opensearch.sql.calcite.validate.PplConvertletTable; +import org.opensearch.sql.calcite.validate.converters.OpenSearchRelToSqlConverter; +import org.opensearch.sql.calcite.validate.converters.OpenSearchSqlToRelConverter; +import org.opensearch.sql.calcite.validate.shuttles.PplRelToSqlRelShuttle; +import org.opensearch.sql.calcite.validate.shuttles.SkipRelValidationShuttle; +import org.opensearch.sql.calcite.validate.shuttles.SqlRewriteShuttle; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.QueryContext; @@ -122,6 +136,7 @@ public void executeWithCalcite( CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); RelNode relNode = analyze(plan, context); + // validate() NOT called yet - will be enabled in Sub-PR C RelNode calcitePlan = convertToCalcitePlan(relNode, context); analyzeMetric.set(System.nanoTime() - analyzeStart); executionEngine.execute(calcitePlan, context, listener); @@ -152,6 +167,7 @@ public void explainWithCalcite( context.run( () -> { RelNode relNode = analyze(plan, context); + // validate() NOT called yet - will be enabled in Sub-PR C RelNode calcitePlan = convertToCalcitePlan(relNode, context); executionEngine.explain(calcitePlan, mode, context, listener); }, @@ -262,6 +278,71 @@ public LogicalPlan analyze(UnresolvedPlan plan, QueryType queryType) { return analyzer.analyze(plan, new AnalysisContext(queryType)); } + /** + * Validates a RelNode by converting it to SqlNode, performing validation, and converting back. + * + *

This process enables Calcite's type validation and implicit casting mechanisms to work on + * PPL queries. + * + * @param relNode the relation node to validate + * @param context the Calcite plan context containing the validator + * @return the validated (and potentially modified) relation node + */ + private RelNode validate(RelNode relNode, CalcitePlanContext context) { + SkipRelValidationShuttle skipShuttle = new SkipRelValidationShuttle(); + relNode.accept(skipShuttle); + if (skipShuttle.shouldSkipValidation()) { + return relNode; + } + try { + return doValidate(relNode, context); + } catch (Throwable e) { + // Gracefully skip validation when the plan contains operators that cannot be converted + // to SQL (e.g., LogicalGraphLookup throws AssertionError) or validated (e.g., unregistered + // UDFs like ARRAY_COMPACT). Return the original plan without validation. + log.debug("Skipping validation due to unsupported operation: {}", e.getMessage()); + return relNode; + } + } + + private RelNode doValidate(RelNode relNode, CalcitePlanContext context) { + // Fix interval literals before conversion to SQL + RelNode sqlRelNode = relNode.accept(new PplRelToSqlRelShuttle(context.rexBuilder, true)); + + // Convert RelNode to SqlNode for validation + RelToSqlConverter rel2sql = new OpenSearchRelToSqlConverter(OpenSearchSparkSqlDialect.DEFAULT); + SqlImplementor.Result result = rel2sql.visitRoot(sqlRelNode); + SqlNode root = result.asStatement(); + + // Rewrite SqlNode to remove database qualifiers + SqlNode rewritten = root.accept(new SqlRewriteShuttle()); + SqlValidator validator = context.getValidator(); + validator.validate(Objects.requireNonNull(rewritten)); + + SqlToRelConverter.Config sql2relConfig = + SqlToRelConverter.config() + // Do not remove sort in subqueries so that the orders for queries like `... | sort a + // | fields b` is preserved + .withRemoveSortInSubQuery(false) + // Disable automatic JSON_TYPE_OPERATOR wrapping for nested JSON functions. + // See CALCITE-4989: Calcite wraps nested JSON functions with JSON_TYPE by default + .withAddJsonTypeOperatorEnabled(false) + // Set hint strategy so that hints can be properly propagated. + // See SqlToRelConverter.java#convertSelectImpl + .withHintStrategyTable(context.relBuilder.getCluster().getHintStrategies()); + SqlToRelConverter sql2rel = + new OpenSearchSqlToRelConverter( + context.config.getViewExpander(), + validator, + validator.getCatalogReader().unwrap(CalciteCatalogReader.class), + context.relBuilder.getCluster(), + PplConvertletTable.INSTANCE, + sql2relConfig); + // Convert the validated SqlNode back to RelNode + RelNode validatedRel = sql2rel.convertQuery(rewritten, false, true).project(); + return validatedRel.accept(new PplRelToSqlRelShuttle(context.rexBuilder, false)); + } + /** Translate {@link LogicalPlan} to {@link PhysicalPlan}. */ public PhysicalPlan plan(LogicalPlan plan) { return planner.plan(plan); diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java new file mode 100644 index 0000000000..cf0052a19c --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java @@ -0,0 +1,307 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class OpenSearchTypeFactoryTest { + + // ==================== leastRestrictive with UDT types tests ==================== + + @Test + public void testLeastRestrictive_dateUdtsOnly_returnsDateUdt() { + RelDataType dateUdt1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType dateUdt2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt1, dateUdt2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isDate(result)); + } + + @Test + public void testLeastRestrictive_timeUdtsOnly_returnsTimeUdt() { + RelDataType timeUdt1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timeUdt2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timeUdt1, timeUdt2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTime(result)); + } + + @Test + public void testLeastRestrictive_timestampUdtsOnly_returnsTimestampUdt() { + RelDataType timestampUdt1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType timestampUdt2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timestampUdt1, timestampUdt2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTimestamp(result)); + } + + @Test + public void testLeastRestrictive_ipUdtsOnly_returnsIpUdt() { + RelDataType ipUdt1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType ipUdt2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ipUdt1, ipUdt2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isIp(result)); + } + + @Test + public void testLeastRestrictive_binaryUdtsOnly_returnsBinaryUdt() { + RelDataType binaryUdt1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + RelDataType binaryUdt2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(binaryUdt1, binaryUdt2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isBinary(result)); + } + + @Test + public void testLeastRestrictive_dateAndNull_returnsDateUdt() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType nullType = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt, nullType)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isDate(result)); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictive_timeAndNull_returnsTimeUdt() { + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType nullType = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timeUdt, nullType)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTime(result)); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictive_ipAndNull_returnsIpUdt() { + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType nullType = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ipUdt, nullType)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isIp(result)); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictive_binaryAndNull_returnsBinaryUdt() { + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + RelDataType nullType = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(binaryUdt, nullType)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isBinary(result)); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictive_mixedDatetimeTypes_returnsTimestampUdt() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt, timeUdt)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTimestamp(result)); + } + + @Test + public void testLeastRestrictive_dateTimeTimestamp_returnsTimestampUdt() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt, timeUdt, timestampUdt)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTimestamp(result)); + } + + @Test + public void testLeastRestrictive_ipAndBinary_returnsVarchar() { + // IP and BINARY are incompatible UDT types, should fall back to VARCHAR + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ipUdt, binaryUdt)); + + assertNotNull(result); + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictive_ipUdtAndOther_returnsIpUdt() { + // When IP UDT is mixed with OTHER type (which is used as intermediate for IP) + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType otherType = TYPE_FACTORY.createSqlType(SqlTypeName.OTHER); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ipUdt, otherType)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isIp(result)); + } + + @Test + public void testLeastRestrictive_nullableUdts_preservesNullability() { + RelDataType nullableDateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE, true); + RelDataType nonNullableDateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE, false); + + RelDataType result = + TYPE_FACTORY.leastRestrictive(List.of(nullableDateUdt, nonNullableDateUdt)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isDate(result)); + assertTrue(result.isNullable()); + } + + // ==================== leastRestrictive with standard types tests ==================== + + @Test + public void testLeastRestrictive_standardNumericTypes_returnsLeastRestrictive() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType bigintType = TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(intType, bigintType)); + + assertNotNull(result); + assertEquals(SqlTypeName.BIGINT, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictive_charType_convertsToVarchar() { + RelDataType charType = TYPE_FACTORY.createSqlType(SqlTypeName.CHAR, 10); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(charType)); + + assertNotNull(result); + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictive_charAndVarchar_returnsVarchar() { + RelDataType charType = TYPE_FACTORY.createSqlType(SqlTypeName.CHAR, 5); + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(charType, varcharType)); + + assertNotNull(result); + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictive_withAnyType_fallsThrough() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType anyType = TYPE_FACTORY.createSqlType(SqlTypeName.ANY); + + // When ANY is present, should fall through to standard leastRestrictive + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt, anyType)); + + // Result depends on standard Calcite behavior + assertNotNull(result); + assertEquals(SqlTypeName.ANY, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictive_incompatibleTypes_returnsNull() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType boolType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + + // Integer and boolean are incompatible + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(intType, boolType)); + + assertNull(result); + } + + @Test + public void testLeastRestrictive_singleType_returnsSameType() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isDate(result)); + } + + @Test + public void testLeastRestrictive_multipleNulls_returnsNullableUdt() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType nullType1 = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + RelDataType nullType2 = TYPE_FACTORY.createSqlType(SqlTypeName.NULL); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(dateUdt, nullType1, nullType2)); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isDate(result)); + assertTrue(result.isNullable()); + } + + // ==================== leastRestrictive null/empty input tests ==================== + + @Test + public void testLeastRestrictive_emptyList_throwsIllegalArgumentException() { + // Empty list causes IllegalArgumentException from Calcite's base implementation + org.junit.jupiter.api.Assertions.assertThrows( + IllegalArgumentException.class, () -> TYPE_FACTORY.leastRestrictive(List.of())); + } + + @Test + public void testLeastRestrictive_nullList_throwsNPE() { + // Null list causes NullPointerException + org.junit.jupiter.api.Assertions.assertThrows( + NullPointerException.class, () -> TYPE_FACTORY.leastRestrictive(null)); + } + + @Test + public void testLeastRestrictive_listWithNullElement_throwsNPE() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + // List.of() doesn't allow null elements, so we use a different approach + java.util.List types = new java.util.ArrayList<>(); + types.add(dateUdt); + types.add(null); + + // List containing null element causes NPE + org.junit.jupiter.api.Assertions.assertThrows( + NullPointerException.class, () -> TYPE_FACTORY.leastRestrictive(types)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtilTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtilTest.java new file mode 100644 index 0000000000..f0a0580a6d --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeUtilTest.java @@ -0,0 +1,402 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class OpenSearchTypeUtilTest { + + // ==================== isUserDefinedType tests ==================== + + @Test + public void testIsUserDefinedType_withUDT_returnsTrue() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + assertTrue(OpenSearchTypeUtil.isUserDefinedType(dateUdt)); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(timeUdt)); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(timestampUdt)); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(ipUdt)); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(binaryUdt)); + } + + @Test + public void testIsUserDefinedType_withStandardType_returnsFalse() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + + assertFalse(OpenSearchTypeUtil.isUserDefinedType(intType)); + assertFalse(OpenSearchTypeUtil.isUserDefinedType(varcharType)); + assertFalse(OpenSearchTypeUtil.isUserDefinedType(dateType)); + } + + // ==================== isNumericOrCharacter tests ==================== + + @Test + public void testIsNumericOrCharacter_withNumericTypes_returnsTrue() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType bigintType = TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT); + RelDataType smallintType = TYPE_FACTORY.createSqlType(SqlTypeName.SMALLINT); + RelDataType tinyintType = TYPE_FACTORY.createSqlType(SqlTypeName.TINYINT); + RelDataType doubleType = TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE); + RelDataType floatType = TYPE_FACTORY.createSqlType(SqlTypeName.FLOAT); + RelDataType realType = TYPE_FACTORY.createSqlType(SqlTypeName.REAL); + RelDataType decimalType = TYPE_FACTORY.createSqlType(SqlTypeName.DECIMAL); + + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(intType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(bigintType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(smallintType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(tinyintType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(doubleType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(floatType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(realType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(decimalType)); + } + + @Test + public void testIsNumericOrCharacter_withCharacterTypes_returnsTrue() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType charType = TYPE_FACTORY.createSqlType(SqlTypeName.CHAR); + + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(varcharType)); + assertTrue(OpenSearchTypeUtil.isNumericOrCharacter(charType)); + } + + @Test + public void testIsNumericOrCharacter_withNonNumericTypes_returnsFalse() { + RelDataType booleanType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + RelDataType binaryType = TYPE_FACTORY.createSqlType(SqlTypeName.BINARY); + + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(booleanType)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(dateType)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(timestampType)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(binaryType)); + } + + @Test + public void testIsNumericOrCharacter_withVarcharBasedUDTs_returnsFalse() { + // These UDTs wrap VARCHAR via ExprSqlType, so SqlTypeUtil.isCharacter returns true + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(binaryUdt)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(dateUdt)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(timeUdt)); + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(timestampUdt)); + } + + @Test + public void testIsNumericOrCharacter_withJavaTypeBasedUDT_returnsFalse() { + // IP UDT wraps JavaType (not VARCHAR), so it doesn't pass the isCharacter check + // and IP is not a numeric type in ExprCoreType.numberTypes() + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + assertFalse(OpenSearchTypeUtil.isNumericOrCharacter(ipUdt)); + } + + // ==================== isDatetime tests ==================== + + @Test + public void testIsDatetime_withStandardDatetimeTypes_returnsTrue() { + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timeType = TYPE_FACTORY.createSqlType(SqlTypeName.TIME); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + + assertTrue(OpenSearchTypeUtil.isDatetime(dateType)); + assertTrue(OpenSearchTypeUtil.isDatetime(timeType)); + assertTrue(OpenSearchTypeUtil.isDatetime(timestampType)); + } + + @Test + public void testIsDatetime_withUDTDatetimeTypes_returnsTrue() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + assertTrue(OpenSearchTypeUtil.isDatetime(dateUdt)); + assertTrue(OpenSearchTypeUtil.isDatetime(timeUdt)); + assertTrue(OpenSearchTypeUtil.isDatetime(timestampUdt)); + } + + @Test + public void testIsDatetime_withNonDatetimeTypes_returnsFalse() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + assertFalse(OpenSearchTypeUtil.isDatetime(intType)); + assertFalse(OpenSearchTypeUtil.isDatetime(varcharType)); + assertFalse(OpenSearchTypeUtil.isDatetime(ipUdt)); + assertFalse(OpenSearchTypeUtil.isDatetime(binaryUdt)); + } + + // ==================== isDate tests ==================== + + @Test + public void testIsDate_withStandardDateType_returnsTrue() { + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + assertTrue(OpenSearchTypeUtil.isDate(dateType)); + } + + @Test + public void testIsDate_withUDTDateType_returnsTrue() { + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + assertTrue(OpenSearchTypeUtil.isDate(dateUdt)); + } + + @Test + public void testIsDate_withNonDateTypes_returnsFalse() { + RelDataType timeType = TYPE_FACTORY.createSqlType(SqlTypeName.TIME); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + assertFalse(OpenSearchTypeUtil.isDate(timeType)); + assertFalse(OpenSearchTypeUtil.isDate(timestampType)); + assertFalse(OpenSearchTypeUtil.isDate(timeUdt)); + assertFalse(OpenSearchTypeUtil.isDate(timestampUdt)); + } + + // ==================== isTimestamp tests ==================== + + @Test + public void testIsTimestamp_withStandardTimestampType_returnsTrue() { + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + assertTrue(OpenSearchTypeUtil.isTimestamp(timestampType)); + } + + @Test + public void testIsTimestamp_withUDTTimestampType_returnsTrue() { + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + assertTrue(OpenSearchTypeUtil.isTimestamp(timestampUdt)); + } + + @Test + public void testIsTimestamp_withNonTimestampTypes_returnsFalse() { + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timeType = TYPE_FACTORY.createSqlType(SqlTypeName.TIME); + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + + assertFalse(OpenSearchTypeUtil.isTimestamp(dateType)); + assertFalse(OpenSearchTypeUtil.isTimestamp(timeType)); + assertFalse(OpenSearchTypeUtil.isTimestamp(dateUdt)); + assertFalse(OpenSearchTypeUtil.isTimestamp(timeUdt)); + } + + // ==================== isTime tests ==================== + + @Test + public void testIsTime_withStandardTimeType_returnsTrue() { + RelDataType timeType = TYPE_FACTORY.createSqlType(SqlTypeName.TIME); + assertTrue(OpenSearchTypeUtil.isTime(timeType)); + } + + @Test + public void testIsTime_withUDTTimeType_returnsTrue() { + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + assertTrue(OpenSearchTypeUtil.isTime(timeUdt)); + } + + @Test + public void testIsTime_withNonTimeTypes_returnsFalse() { + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + assertFalse(OpenSearchTypeUtil.isTime(dateType)); + assertFalse(OpenSearchTypeUtil.isTime(timestampType)); + assertFalse(OpenSearchTypeUtil.isTime(dateUdt)); + assertFalse(OpenSearchTypeUtil.isTime(timestampUdt)); + } + + // ==================== isCharacter tests ==================== + + @Test + public void testIsCharacter_withCharacterTypes_returnsTrue() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType charType = TYPE_FACTORY.createSqlType(SqlTypeName.CHAR); + + assertTrue(OpenSearchTypeUtil.isCharacter(varcharType)); + assertTrue(OpenSearchTypeUtil.isCharacter(charType)); + } + + @Test + public void testIsCharacter_withUDTTypes_returnsFalse() { + // UDTs have VARCHAR as their SqlTypeName but should not be considered character types + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + assertFalse(OpenSearchTypeUtil.isCharacter(dateUdt)); + assertFalse(OpenSearchTypeUtil.isCharacter(timeUdt)); + assertFalse(OpenSearchTypeUtil.isCharacter(timestampUdt)); + assertFalse(OpenSearchTypeUtil.isCharacter(ipUdt)); + assertFalse(OpenSearchTypeUtil.isCharacter(binaryUdt)); + } + + @Test + public void testIsCharacter_withNonCharacterTypes_returnsFalse() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType booleanType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + + assertFalse(OpenSearchTypeUtil.isCharacter(intType)); + assertFalse(OpenSearchTypeUtil.isCharacter(booleanType)); + assertFalse(OpenSearchTypeUtil.isCharacter(dateType)); + } + + // ==================== isIp tests (no acceptOther parameter) ==================== + + @Test + public void testIsIp_withUDTIpType_returnsTrue() { + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + assertTrue(OpenSearchTypeUtil.isIp(ipUdt)); + } + + @Test + public void testIsIp_withNonIpTypes_returnsFalse() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType otherType = TYPE_FACTORY.createSqlType(SqlTypeName.OTHER); + RelDataType dateUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + assertFalse(OpenSearchTypeUtil.isIp(varcharType)); + assertFalse(OpenSearchTypeUtil.isIp(otherType)); // Without acceptOther, OTHER is not IP + assertFalse(OpenSearchTypeUtil.isIp(dateUdt)); + } + + // ==================== isIp tests (with acceptOther parameter) ==================== + + @Test + public void testIsIp_withAcceptOther_acceptsOtherType() { + RelDataType otherType = TYPE_FACTORY.createSqlType(SqlTypeName.OTHER); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + assertTrue(OpenSearchTypeUtil.isIp(otherType, true)); + assertTrue(OpenSearchTypeUtil.isIp(ipUdt, true)); + assertFalse(OpenSearchTypeUtil.isIp(otherType, false)); + } + + @Test + public void testIsIp_withAcceptOther_rejectsNonIpTypes() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + assertFalse(OpenSearchTypeUtil.isIp(varcharType, true)); + assertFalse(OpenSearchTypeUtil.isIp(intType, true)); + } + + // ==================== isBinary tests ==================== + + @Test + public void testIsBinary_withStandardBinaryTypes_returnsTrue() { + RelDataType binaryType = TYPE_FACTORY.createSqlType(SqlTypeName.BINARY); + RelDataType varbinaryType = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY); + + assertTrue(OpenSearchTypeUtil.isBinary(binaryType)); + assertTrue(OpenSearchTypeUtil.isBinary(varbinaryType)); + } + + @Test + public void testIsBinary_withUDTBinaryType_returnsTrue() { + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + assertTrue(OpenSearchTypeUtil.isBinary(binaryUdt)); + } + + @Test + public void testIsBinary_withNonBinaryTypes_returnsFalse() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + assertFalse(OpenSearchTypeUtil.isBinary(varcharType)); + assertFalse(OpenSearchTypeUtil.isBinary(intType)); + assertFalse(OpenSearchTypeUtil.isBinary(ipUdt)); + } + + // ==================== isScalar tests ==================== + + @Test + public void testIsScalar_withNull_returnsFalse() { + assertFalse(OpenSearchTypeUtil.isScalar(null)); + } + + @Test + public void testIsScalar_withScalarTypes_returnsTrue() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType booleanType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType binaryUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_BINARY); + + assertTrue(OpenSearchTypeUtil.isScalar(intType)); + assertTrue(OpenSearchTypeUtil.isScalar(varcharType)); + assertTrue(OpenSearchTypeUtil.isScalar(booleanType)); + assertTrue(OpenSearchTypeUtil.isScalar(dateType)); + assertTrue(OpenSearchTypeUtil.isScalar(timestampType)); + assertTrue(OpenSearchTypeUtil.isScalar(ipUdt)); + assertTrue(OpenSearchTypeUtil.isScalar(binaryUdt)); + } + + @Test + public void testIsScalar_withStructType_returnsFalse() { + RelDataType structType = + TYPE_FACTORY.createStructType( + java.util.List.of( + TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), + TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR)), + java.util.List.of("id", "name")); + + assertFalse(OpenSearchTypeUtil.isScalar(structType)); + } + + @Test + public void testIsScalar_withMapType_returnsFalse() { + RelDataType mapType = + TYPE_FACTORY.createMapType( + TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), + TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); + + assertFalse(OpenSearchTypeUtil.isScalar(mapType)); + } + + @Test + public void testIsScalar_withArrayType_returnsFalse() { + RelDataType arrayType = + TYPE_FACTORY.createArrayType(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), -1); + + assertFalse(OpenSearchTypeUtil.isScalar(arrayType)); + } + + @Test + public void testIsScalar_withMultisetType_returnsFalse() { + RelDataType multisetType = + TYPE_FACTORY.createMultisetType(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), -1); + + assertFalse(OpenSearchTypeUtil.isScalar(multisetType)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialectTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialectTest.java new file mode 100644 index 0000000000..276fbdb85a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/OpenSearchSparkSqlDialectTest.java @@ -0,0 +1,146 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlDataTypeSpec; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.SqlWriterConfig; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.pretty.SqlPrettyWriter; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlConformance; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class OpenSearchSparkSqlDialectTest { + @Test + public void testGetCastSpecForIpType() { + RelDataType ipType = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + SqlNode castSpec = OpenSearchSparkSqlDialect.DEFAULT.getCastSpec(ipType); + + assertNotNull(castSpec); + assertInstanceOf(SqlDataTypeSpec.class, castSpec); + SqlDataTypeSpec typeSpec = (SqlDataTypeSpec) castSpec; + assertEquals("OTHER", typeSpec.getTypeName().toString()); + assertEquals("IP", typeSpec.toString()); + } + + private SqlWriter createWriter(StringBuilder sb) { + SqlWriterConfig config = + SqlPrettyWriter.config().withDialect(OpenSearchSparkSqlDialect.DEFAULT); + return new SqlPrettyWriter(config, sb); + } + + @Test + public void testUnparseCallArgMin() { + StringBuilder sb = new StringBuilder(); + SqlWriter writer = createWriter(sb); + + SqlIdentifier col1 = new SqlIdentifier("value_col", SqlParserPos.ZERO); + SqlIdentifier col2 = new SqlIdentifier("key_col", SqlParserPos.ZERO); + + // Create a call that mimics ARG_MIN + SqlBasicCall argMinCall = + new SqlBasicCall(SqlStdOperatorTable.ARG_MIN, List.of(col1, col2), SqlParserPos.ZERO); + + OpenSearchSparkSqlDialect.DEFAULT.unparseCall(writer, argMinCall, 0, 0); + String result = sb.toString(); + + // Should be translated to MIN_BY + assertTrue(result.contains("MIN_BY")); + } + + @Test + public void testUnparseCallArgMax() { + StringBuilder sb = new StringBuilder(); + SqlWriter writer = createWriter(sb); + + SqlIdentifier col1 = new SqlIdentifier("value_col", SqlParserPos.ZERO); + SqlIdentifier col2 = new SqlIdentifier("key_col", SqlParserPos.ZERO); + + SqlBasicCall argMaxCall = + new SqlBasicCall(SqlStdOperatorTable.ARG_MAX, List.of(col1, col2), SqlParserPos.ZERO); + + OpenSearchSparkSqlDialect.DEFAULT.unparseCall(writer, argMaxCall, 0, 0); + String result = sb.toString(); + + // Should be translated to MAX_BY + assertTrue(result.contains("MAX_BY")); + } + + @Test + public void testUnparseCallRegularOperator() { + StringBuilder sb = new StringBuilder(); + SqlWriter writer = createWriter(sb); + + SqlIdentifier col1 = new SqlIdentifier("col1", SqlParserPos.ZERO); + SqlIdentifier col2 = new SqlIdentifier("col2", SqlParserPos.ZERO); + + SqlBasicCall plusCall = + new SqlBasicCall(SqlStdOperatorTable.PLUS, List.of(col1, col2), SqlParserPos.ZERO); + + OpenSearchSparkSqlDialect.DEFAULT.unparseCall(writer, plusCall, 0, 0); + String result = sb.toString(); + + // Should contain the + operator, not translated + assertTrue(result.contains("+")); + } + + @Test + public void testGetConformanceIsLiberal() { + SqlConformance conformance = OpenSearchSparkSqlDialect.DEFAULT.getConformance(); + + assertNotNull(conformance); + assertTrue(conformance.isLiberal()); + } + + @Test + public void testGetCastSpecForNonIpType() { + // Non-IP types should delegate to parent SparkSqlDialect + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + SqlNode castSpec = OpenSearchSparkSqlDialect.DEFAULT.getCastSpec(intType); + + // SparkSqlDialect returns a SqlDataTypeSpec for INTEGER type + assertNotNull(castSpec); + assertInstanceOf(SqlDataTypeSpec.class, castSpec); + // It should NOT be the IP-specific spec + SqlDataTypeSpec typeSpec = (SqlDataTypeSpec) castSpec; + assertEquals("INTEGER", typeSpec.toString()); + } + + @Test + public void testGetCastSpecForVarcharType() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + SqlNode castSpec = OpenSearchSparkSqlDialect.DEFAULT.getCastSpec(varcharType); + + // SparkSqlDialect handles VARCHAR specially, returns a SqlDataTypeSpec + assertNotNull(castSpec); + assertInstanceOf(SqlDataTypeSpec.class, castSpec); + } + + @Test + public void testGetCastSpecForNullType() { + // Null input should throw NullPointerException + assertThrows( + NullPointerException.class, () -> OpenSearchSparkSqlDialect.DEFAULT.getCastSpec(null)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRuleTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRuleTest.java new file mode 100644 index 0000000000..77aa339972 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionRuleTest.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableSet; +import java.util.Map; +import org.apache.calcite.sql.type.SqlTypeCoercionRule; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; + +public class PplTypeCoercionRuleTest { + @Test + public void testOtherToVarcharCoercion() { + SqlTypeCoercionRule rule = PplTypeCoercionRule.instance(); + Map> mapping = rule.getTypeMapping(); + + // OTHER should be coercible to VARCHAR (IP type support) + ImmutableSet varcharCoercions = mapping.get(SqlTypeName.VARCHAR); + assertNotNull(varcharCoercions); + assertTrue(varcharCoercions.contains(SqlTypeName.OTHER)); + } + + @Test + public void testOtherToCharCoercion() { + SqlTypeCoercionRule rule = PplTypeCoercionRule.instance(); + Map> mapping = rule.getTypeMapping(); + + // OTHER should be coercible to CHAR (IP type support) + ImmutableSet charCoercions = mapping.get(SqlTypeName.CHAR); + assertNotNull(charCoercions); + assertTrue(charCoercions.contains(SqlTypeName.OTHER)); + } + + @Test + public void testVarcharToOtherCoercion() { + SqlTypeCoercionRule rule = PplTypeCoercionRule.instance(); + Map> mapping = rule.getTypeMapping(); + + // VARCHAR and CHAR should be coercible to OTHER (for IP type support) + ImmutableSet otherCoercions = mapping.get(SqlTypeName.OTHER); + assertNotNull(otherCoercions); + assertTrue(otherCoercions.contains(SqlTypeName.VARCHAR)); + assertTrue(otherCoercions.contains(SqlTypeName.CHAR)); + } + + @Test + public void testNumericToVarcharCoercion() { + SqlTypeCoercionRule rule = PplTypeCoercionRule.instance(); + Map> mapping = rule.getTypeMapping(); + + // VARCHAR should be coercible from numeric types + ImmutableSet varcharCoercions = mapping.get(SqlTypeName.VARCHAR); + assertNotNull(varcharCoercions); + + // Check some numeric types are included + assertTrue(varcharCoercions.contains(SqlTypeName.INTEGER)); + assertTrue(varcharCoercions.contains(SqlTypeName.BIGINT)); + assertTrue(varcharCoercions.contains(SqlTypeName.DOUBLE)); + assertTrue(varcharCoercions.contains(SqlTypeName.DECIMAL)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionTest.java new file mode 100644 index 0000000000..e2687f9d42 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/PplTypeCoercionTest.java @@ -0,0 +1,229 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlValidator; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; +import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil; + +public class PplTypeCoercionTest { + + private PplTypeCoercion typeCoercion; + + @BeforeEach + public void setUp() { + SqlValidator mockValidator = Mockito.mock(SqlValidator.class); + typeCoercion = new PplTypeCoercion(TYPE_FACTORY, mockValidator); + } + + // ==================== implicitCast tests ==================== + + @Test + public void testImplicitCast_stringToDatetime_returnsTimestampUDT() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = typeCoercion.implicitCast(varcharType, SqlTypeFamily.DATETIME); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTimestamp(result)); + } + + @Test + public void testImplicitCast_dateTypeFamily_returnsDateUDT() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = typeCoercion.implicitCast(varcharType, SqlTypeFamily.DATE); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isDate(result)); + } + + @Test + public void testImplicitCast_timeTypeFamily_returnsTimeUDT() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = typeCoercion.implicitCast(varcharType, SqlTypeFamily.TIME); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTime(result)); + } + + @Test + public void testImplicitCast_timestampTypeFamily_returnsTimestampUDT() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = typeCoercion.implicitCast(varcharType, SqlTypeFamily.TIMESTAMP); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isUserDefinedType(result)); + assertTrue(OpenSearchTypeUtil.isTimestamp(result)); + } + + @Test + public void testImplicitCast_numericTypes_returnsStandardType() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + RelDataType result = typeCoercion.implicitCast(intType, SqlTypeFamily.NUMERIC); + + assertNotNull(result); + assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName()); + } + + @Test + public void testImplicitCast_incompatibleTypes_returnsNull() { + RelDataType booleanType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + + // Boolean cannot be implicitly cast to NUMERIC + RelDataType result = typeCoercion.implicitCast(booleanType, SqlTypeFamily.NUMERIC); + + assertNull(result); + } + + @Test + public void testImplicitCast_preservesNullability() { + RelDataType nullableVarchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + RelDataType nonNullableVarchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, false); + + RelDataType nullableResult = typeCoercion.implicitCast(nullableVarchar, SqlTypeFamily.DATE); + RelDataType nonNullableResult = + typeCoercion.implicitCast(nonNullableVarchar, SqlTypeFamily.DATE); + + assertNotNull(nullableResult); + assertNotNull(nonNullableResult); + assertTrue(nullableResult.isNullable()); + assertFalse(nonNullableResult.isNullable()); + } + + // ==================== commonTypeForBinaryComparison tests ==================== + + @Test + public void testCommonTypeForBinaryComparison_dateAndTime_returnsTimestamp() { + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(dateType, timeUdt); + + assertNotNull(result); + assertEquals(SqlTypeName.TIMESTAMP, result.getSqlTypeName()); + } + + @Test + public void testCommonTypeForBinaryComparison_timeAndDate_returnsTimestamp() { + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType dateType = TYPE_FACTORY.createSqlType(SqlTypeName.DATE); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(timeUdt, dateType); + + assertNotNull(result); + assertEquals(SqlTypeName.TIMESTAMP, result.getSqlTypeName()); + } + + @Test + public void testCommonTypeForBinaryComparison_timeAndTimestamp_returnsTimestamp() { + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(timeUdt, timestampType); + + assertNotNull(result); + assertEquals(SqlTypeName.TIMESTAMP, result.getSqlTypeName()); + } + + @Test + public void testCommonTypeForBinaryComparison_timestampAndTime_returnsTimestamp() { + RelDataType timestampType = TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(timestampType, timeUdt); + + assertNotNull(result); + assertEquals(SqlTypeName.TIMESTAMP, result.getSqlTypeName()); + } + + @Test + public void testCommonTypeForBinaryComparison_ipAndString_returnsIp() { + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(ipUdt, varcharType); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isIp(result)); + } + + @Test + public void testCommonTypeForBinaryComparison_stringAndIp_returnsIp() { + RelDataType varcharType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType ipUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_IP); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(varcharType, ipUdt); + + assertNotNull(result); + assertTrue(OpenSearchTypeUtil.isIp(result)); + } + + @Test + public void testCommonTypeForBinaryComparison_nullTypes_handledGracefully() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType result1 = typeCoercion.commonTypeForBinaryComparison(null, intType); + RelDataType result2 = typeCoercion.commonTypeForBinaryComparison(intType, null); + RelDataType result3 = typeCoercion.commonTypeForBinaryComparison(null, null); + + assertNull(result1); + assertNull(result2); + assertNull(result3); + } + + @Test + public void testCommonTypeForBinaryComparison_preservesNullability() { + RelDataType nullableDate = TYPE_FACTORY.createSqlType(SqlTypeName.DATE, true); + RelDataType timeUdt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME, false); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(nullableDate, timeUdt); + + assertNotNull(result); + // When either type is nullable, result should be nullable + assertTrue(result.isNullable()); + } + + @Test + public void testCommonTypeForBinaryComparison_bothNullable_returnsNullable() { + RelDataType nullableDate = TYPE_FACTORY.createSqlType(SqlTypeName.DATE, true); + RelDataType nullableTime = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME, true); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(nullableDate, nullableTime); + + assertNotNull(result); + assertTrue(result.isNullable()); + } + + @Test + public void testCommonTypeForBinaryComparison_noNullable_returnsNonNull() { + RelDataType date = TYPE_FACTORY.createSqlType(SqlTypeName.DATE, false); + RelDataType time = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME, false); + + RelDataType result = typeCoercion.commonTypeForBinaryComparison(date, time); + + assertNotNull(result); + assertFalse(result.isNullable()); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/ValidationUtilsTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/ValidationUtilsTest.java new file mode 100644 index 0000000000..f6cbd89fe0 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/ValidationUtilsTest.java @@ -0,0 +1,264 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.nio.charset.StandardCharsets; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCollation; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class ValidationUtilsTest { + + @Test + public void testSyncAttributesNullability() { + RelDataType varchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + // Create nullable source type + RelDataType nullableSource = TYPE_FACTORY.createTypeWithNullability(intType, true); + + // Sync to non-nullable target + RelDataType synced = ValidationUtils.syncAttributes(TYPE_FACTORY, nullableSource, varchar); + + assertTrue(synced.isNullable()); + assertEquals(SqlTypeName.VARCHAR, synced.getSqlTypeName()); + } + + @Test + public void testSyncAttributesNonNullableSource() { + RelDataType varchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + // Create non-nullable source type + RelDataType nonNullableSource = TYPE_FACTORY.createTypeWithNullability(intType, false); + + // Sync to target + RelDataType synced = ValidationUtils.syncAttributes(TYPE_FACTORY, nonNullableSource, varchar); + + assertFalse(synced.isNullable()); + assertEquals(SqlTypeName.VARCHAR, synced.getSqlTypeName()); + } + + @Test + public void testSyncAttributesCharsetAndCollationForCharTypes() { + // Create source varchar with charset and collation + RelDataType sourceVarchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + sourceVarchar = + TYPE_FACTORY.createTypeWithCharsetAndCollation( + sourceVarchar, StandardCharsets.UTF_8, SqlCollation.IMPLICIT); + sourceVarchar = TYPE_FACTORY.createTypeWithNullability(sourceVarchar, true); + + RelDataType targetChar = TYPE_FACTORY.createSqlType(SqlTypeName.CHAR, 10); + + RelDataType synced = ValidationUtils.syncAttributes(TYPE_FACTORY, sourceVarchar, targetChar); + + assertTrue(synced.isNullable()); + assertEquals(StandardCharsets.UTF_8, synced.getCharset()); + assertNotNull(synced.getCollation()); + } + + @Test + public void testSyncAttributesWithNullFromType() { + RelDataType varchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + // When fromType is null, toType should be returned as-is + RelDataType synced = ValidationUtils.syncAttributes(TYPE_FACTORY, null, varchar); + + assertEquals(varchar, synced); + assertEquals(SqlTypeName.VARCHAR, synced.getSqlTypeName()); + } + + @Test + public void testSyncAttributesNonCharTypes() { + // Test with numeric types - should only sync nullability, not charset/collation + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType doubleType = TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE); + + RelDataType nullableInt = TYPE_FACTORY.createTypeWithNullability(intType, true); + + RelDataType synced = ValidationUtils.syncAttributes(TYPE_FACTORY, nullableInt, doubleType); + + assertTrue(synced.isNullable()); + assertEquals(SqlTypeName.DOUBLE, synced.getSqlTypeName()); + } + + @Test + public void testCreateUDTWithAttributesExprUDTDate() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + + RelDataType dateUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, ExprUDT.EXPR_DATE); + + assertNotNull(dateUdt); + assertTrue(dateUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesExprUDTTime() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, false); + + RelDataType timeUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, ExprUDT.EXPR_TIME); + + assertNotNull(timeUdt); + assertFalse(timeUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesExprUDTTimestamp() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + + RelDataType timestampUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, ExprUDT.EXPR_TIMESTAMP); + + assertNotNull(timestampUdt); + assertTrue(timestampUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesExprUDTBinary() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY, true); + + RelDataType binaryUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, ExprUDT.EXPR_BINARY); + + assertNotNull(binaryUdt); + assertTrue(binaryUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesInvalidFactory() { + // Create a non-OpenSearchTypeFactory + RelDataTypeFactory basicFactory = + new org.apache.calcite.jdbc.JavaTypeFactoryImpl( + org.apache.calcite.rel.type.RelDataTypeSystem.DEFAULT); + RelDataType sourceType = basicFactory.createSqlType(SqlTypeName.VARCHAR); + + assertThrows( + IllegalArgumentException.class, + () -> ValidationUtils.createUDTWithAttributes(basicFactory, sourceType, ExprUDT.EXPR_DATE)); + } + + @Test + public void testCreateUDTWithAttributesSqlTypeNameDate() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + + RelDataType dateUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, SqlTypeName.DATE); + + assertNotNull(dateUdt); + assertTrue(dateUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesSqlTypeNameTime() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, false); + + RelDataType timeUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, SqlTypeName.TIME); + + assertNotNull(timeUdt); + assertFalse(timeUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesSqlTypeNameTimestamp() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + + RelDataType timestampUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, SqlTypeName.TIMESTAMP); + + assertNotNull(timestampUdt); + assertTrue(timestampUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesSqlTypeNameBinary() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY, true); + + RelDataType binaryUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, SqlTypeName.BINARY); + + assertNotNull(binaryUdt); + assertTrue(binaryUdt.isNullable()); + } + + @Test + public void testCreateUDTWithAttributesUnsupportedSqlTypeName() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + + assertThrows( + IllegalArgumentException.class, + () -> + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, SqlTypeName.INTEGER)); + } + + @Test + public void testTolerantValidationExceptionNestedAggregate() { + Exception e = new RuntimeException("Aggregate expressions cannot be nested"); + assertTrue(ValidationUtils.tolerantValidationException(e)); + } + + @Test + public void testTolerantValidationExceptionWindowedInGroupBy() { + Exception e = + new RuntimeException("Windowed aggregate expression is illegal in GROUP BY clause"); + assertTrue(ValidationUtils.tolerantValidationException(e)); + } + + @Test + public void testTolerantValidationExceptionNonMatchingMessage() { + Exception e = new RuntimeException("Some other error message"); + assertFalse(ValidationUtils.tolerantValidationException(e)); + } + + @Test + public void testTolerantValidationExceptionNullMessage() { + Exception e = new RuntimeException(); + assertFalse(ValidationUtils.tolerantValidationException(e)); + } + + @Test + public void testTolerantValidationExceptionNullException() { + assertThrows( + NullPointerException.class, () -> ValidationUtils.tolerantValidationException(null)); + } + + @Test + public void testCreateUDTWithAttributesNullSourceType() { + // When sourceType is null, syncAttributes handles it gracefully + RelDataType dateUdt = + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, null, ExprUDT.EXPR_DATE); + assertNotNull(dateUdt); + } + + @Test + public void testCreateUDTWithAttributesNullExprUDT() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + assertThrows( + NullPointerException.class, + () -> ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, (ExprUDT) null)); + } + + @Test + public void testCreateUDTWithAttributesNullSqlTypeName() { + RelDataType sourceType = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true); + assertThrows( + NullPointerException.class, + () -> + ValidationUtils.createUDTWithAttributes(TYPE_FACTORY, sourceType, (SqlTypeName) null)); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttleTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttleTest.java new file mode 100644 index 0000000000..753b05a62c --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/PplRelToSqlRelShuttleTest.java @@ -0,0 +1,295 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.math.BigDecimal; +import java.util.List; +import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class PplRelToSqlRelShuttleTest { + + private RexBuilder rexBuilder; + + @BeforeEach + public void setUp() { + rexBuilder = new RexBuilder(TYPE_FACTORY); + } + + // ==================== Float literal tests ==================== + + @Test + public void testVisitLiteral_realTypeLiteral_wrapsSafeCast() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create a REAL type literal + RelDataType realType = TYPE_FACTORY.createSqlType(SqlTypeName.REAL); + RexLiteral floatLiteral = (RexLiteral) rexBuilder.makeLiteral(3.14f, realType, true); + + RexNode result = floatLiteral.accept(rexShuttle); + + // Should be wrapped in SAFE_CAST + assertInstanceOf(RexCall.class, result); + RexCall castCall = (RexCall) result; + assertEquals(SqlLibraryOperators.SAFE_CAST, castCall.getOperator()); + assertEquals(floatLiteral, castCall.getOperands().get(0)); + assertEquals(SqlTypeName.REAL, castCall.getType().getSqlTypeName()); + } + + @Test + public void testVisitLiteral_floatTypeLiteral_wrapsSafeCast() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create a FLOAT type literal (FLOAT is an alias for REAL in SQL) + RelDataType floatType = TYPE_FACTORY.createSqlType(SqlTypeName.FLOAT); + RexLiteral floatLiteral = (RexLiteral) rexBuilder.makeLiteral(2.71f, floatType, true); + + RexNode result = floatLiteral.accept(rexShuttle); + + // Should be wrapped in SAFE_CAST + assertInstanceOf(RexCall.class, result); + RexCall castCall = (RexCall) result; + assertEquals(SqlLibraryOperators.SAFE_CAST, castCall.getOperator()); + assertTrue(SqlTypeUtil.isFlat(castCall.getType())); + } + + @Test + public void testVisitLiteral_doubleLiteral_remainsUnchanged() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create a DOUBLE type literal - should NOT be wrapped + RelDataType doubleType = TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE); + RexLiteral doubleLiteral = (RexLiteral) rexBuilder.makeLiteral(3.14159d, doubleType, true); + + RexNode result = doubleLiteral.accept(rexShuttle); + + // Double literals should remain unchanged + assertInstanceOf(RexLiteral.class, result); + assertEquals(doubleLiteral, result); + } + + // ==================== Interval literal tests ==================== + + @Test + public void testVisitLiteral_intervalDayLiteral_forwardMultiplies() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create an INTERVAL DAY literal with value 5 + SqlIntervalQualifier dayQualifier = + new SqlIntervalQualifier(TimeUnit.DAY, null, SqlParserPos.ZERO); + RexLiteral intervalLiteral = + rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(5), dayQualifier); + + RexNode result = intervalLiteral.accept(rexShuttle); + + assertInstanceOf(RexLiteral.class, result); + RexLiteral resultLiteral = (RexLiteral) result; + assertNotNull(resultLiteral.getType().getIntervalQualifier()); + + // Forward multiplies by DAY multiplier (86400000 ms) + BigDecimal resultValue = resultLiteral.getValueAs(BigDecimal.class); + assertNotNull(resultValue); + BigDecimal expectedValue = BigDecimal.valueOf(5).multiply(TimeUnit.DAY.multiplier); + assertEquals(0, expectedValue.compareTo(resultValue)); + } + + @Test + public void testVisitLiteral_intervalHourLiteral_backwardDivides() { + RexShuttle rexShuttle = createRexShuttle(false); + + // Create an INTERVAL HOUR literal with value in ms (2 hours = 2 * 3600000 ms) + SqlIntervalQualifier hourQualifier = + new SqlIntervalQualifier(TimeUnit.HOUR, null, SqlParserPos.ZERO); + BigDecimal msValue = BigDecimal.valueOf(2L * 3600000L); + RexLiteral intervalLiteral = rexBuilder.makeIntervalLiteral(msValue, hourQualifier); + + RexNode result = intervalLiteral.accept(rexShuttle); + + assertInstanceOf(RexLiteral.class, result); + RexLiteral resultLiteral = (RexLiteral) result; + + // Backward divides to get back to original unit + BigDecimal resultValue = resultLiteral.getValueAs(BigDecimal.class); + assertNotNull(resultValue); + // 7200000 / 3600000 = 2 + assertEquals(0, BigDecimal.valueOf(2).compareTo(resultValue)); + } + + @Test + public void testVisitLiteral_intervalMonthLiteral_forwardMultiplies() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create an INTERVAL MONTH literal + SqlIntervalQualifier monthQualifier = + new SqlIntervalQualifier(TimeUnit.MONTH, null, SqlParserPos.ZERO); + RexLiteral intervalLiteral = + rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(3), monthQualifier); + + RexNode result = intervalLiteral.accept(rexShuttle); + + assertInstanceOf(RexLiteral.class, result); + RexLiteral resultLiteral = (RexLiteral) result; + + // MONTH multiplier is 1 (months are stored as month count) + BigDecimal resultValue = resultLiteral.getValueAs(BigDecimal.class); + assertNotNull(resultValue); + assertEquals(0, BigDecimal.valueOf(3).compareTo(resultValue)); + } + + @Test + public void testVisitLiteral_intervalQuarterLiteral_usesSpecialMultiplier() { + RexShuttle rexShuttle = createRexShuttle(true); + + // Create an INTERVAL QUARTER literal - has special handling + SqlIntervalQualifier quarterQualifier = + new SqlIntervalQualifier(TimeUnit.QUARTER, null, SqlParserPos.ZERO); + RexLiteral intervalLiteral = + rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(2), quarterQualifier); + + RexNode result = intervalLiteral.accept(rexShuttle); + + assertInstanceOf(RexLiteral.class, result); + RexLiteral resultLiteral = (RexLiteral) result; + BigDecimal resultValue = resultLiteral.getValueAs(BigDecimal.class); + assertNotNull(resultValue); + + // For forward=true with QUARTER, uses forwardMultiplier of 1 instead of 3 + // This is the fix for Calcite bug where QUARTER returns months instead of quarters + assertEquals(0, BigDecimal.valueOf(2).compareTo(resultValue)); + } + + @Test + public void testVisitLiteral_intervalQuarterLiteral_backwardUsesNormalMultiplier() { + RexShuttle rexShuttle = createRexShuttle(false); + + // Create an INTERVAL QUARTER literal with value 6 (representing 6 months = 2 quarters) + SqlIntervalQualifier quarterQualifier = + new SqlIntervalQualifier(TimeUnit.QUARTER, null, SqlParserPos.ZERO); + RexLiteral intervalLiteral = + rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(6), quarterQualifier); + + RexNode result = intervalLiteral.accept(rexShuttle); + + assertInstanceOf(RexLiteral.class, result); + RexLiteral resultLiteral = (RexLiteral) result; + BigDecimal resultValue = resultLiteral.getValueAs(BigDecimal.class); + assertNotNull(resultValue); + + // Backward uses normal multiplier (3), so 6 / 3 = 2 + assertEquals(0, BigDecimal.valueOf(2).compareTo(resultValue)); + } + + // ==================== Non-interval/non-float literal tests ==================== + + @Test + public void testVisitLiteral_integerLiteral_remainsUnchanged() { + RexShuttle rexShuttle = createRexShuttle(true); + + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral intLiteral = rexBuilder.makeExactLiteral(BigDecimal.valueOf(42), intType); + + RexNode result = intLiteral.accept(rexShuttle); + + // Integer literals should remain unchanged + assertInstanceOf(RexLiteral.class, result); + assertEquals(intLiteral, result); + } + + @Test + public void testVisitLiteral_stringLiteral_remainsUnchanged() { + RexShuttle rexShuttle = createRexShuttle(true); + + RexLiteral stringLiteral = rexBuilder.makeLiteral("hello"); + + RexNode result = stringLiteral.accept(rexShuttle); + + // String literals should remain unchanged + assertInstanceOf(RexLiteral.class, result); + assertEquals(stringLiteral, result); + } + + @Test + public void testVisitLiteral_nullLiteral_remainsUnchanged() { + RexShuttle rexShuttle = createRexShuttle(true); + + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral nullLiteral = rexBuilder.makeNullLiteral(intType); + + RexNode result = nullLiteral.accept(rexShuttle); + + // Null literals have no interval qualifier, so remain unchanged + assertInstanceOf(RexLiteral.class, result); + assertTrue(((RexLiteral) result).isNull()); + } + + @Test + public void testVisitLiteral_booleanLiteral_remainsUnchanged() { + RexShuttle rexShuttle = createRexShuttle(true); + + RexLiteral boolLiteral = rexBuilder.makeLiteral(true); + + RexNode result = boolLiteral.accept(rexShuttle); + + // Boolean literals should remain unchanged + assertInstanceOf(RexLiteral.class, result); + assertEquals(boolLiteral, result); + } + + /** + * Helper method to create the RexShuttle from PplRelToSqlRelShuttle. This extracts the + * transformation logic for testing. + */ + private RexShuttle createRexShuttle(boolean forward) { + return new RexShuttle() { + @Override + public RexNode visitLiteral(RexLiteral literal) { + // 1. Fix float literal + SqlTypeName literalType = literal.getType().getSqlTypeName(); + if (SqlTypeName.REAL.equals(literalType) || SqlTypeName.FLOAT.equals(literalType)) { + return rexBuilder.makeCall( + literal.getType(), SqlLibraryOperators.SAFE_CAST, List.of(literal)); + } + + // 2. Fix interval literal + SqlIntervalQualifier qualifier = literal.getType().getIntervalQualifier(); + if (qualifier == null) { + return literal; + } + BigDecimal value = literal.getValueAs(BigDecimal.class); + if (value == null) { + return literal; + } + TimeUnit unit = qualifier.getUnit(); + BigDecimal forwardMultiplier = + TimeUnit.QUARTER.equals(unit) ? BigDecimal.valueOf(1) : unit.multiplier; + + BigDecimal newValue = + forward + ? value.multiply(forwardMultiplier) + : value.divideToIntegralValue(unit.multiplier); + return rexBuilder.makeIntervalLiteral(newValue, qualifier); + } + }; + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttleTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttleTest.java new file mode 100644 index 0000000000..d1d8f7d749 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SkipRelValidationShuttleTest.java @@ -0,0 +1,265 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import com.google.common.collect.ImmutableList; +import java.math.BigDecimal; +import java.util.List; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; +import org.opensearch.sql.expression.function.PPLBuiltinOperators; + +public class SkipRelValidationShuttleTest { + + private RexBuilder rexBuilder; + + @BeforeEach + public void setUp() { + rexBuilder = new RexBuilder(TYPE_FACTORY); + } + + @Test + public void testWidthBucketOnDatetimeTriggersSkip() { + // Create WIDTH_BUCKET call with datetime operand + RelDataType timestampType = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RexInputRef timestampRef = new RexInputRef(0, timestampType); + + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral buckets = rexBuilder.makeExactLiteral(BigDecimal.valueOf(10), intType); + RexLiteral minVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0), intType); + RexLiteral maxVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(100), intType); + + RexCall widthBucketCall = + (RexCall) + rexBuilder.makeCall( + PPLBuiltinOperators.WIDTH_BUCKET, timestampRef, minVal, maxVal, buckets); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_CALLS.stream().anyMatch(p -> p.test(widthBucketCall)); + assertTrue(shouldSkip); + } + + @Test + public void testWidthBucketOnNumericDoesNotTriggerSkip() { + // Create WIDTH_BUCKET call with numeric operand + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexInputRef numericRef = new RexInputRef(0, intType); + + RexLiteral minVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0), intType); + RexLiteral maxVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(100), intType); + RexLiteral buckets = rexBuilder.makeExactLiteral(BigDecimal.valueOf(10), intType); + + RexCall widthBucketCall = + (RexCall) + rexBuilder.makeCall( + PPLBuiltinOperators.WIDTH_BUCKET, numericRef, minVal, maxVal, buckets); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_CALLS.stream().anyMatch(p -> p.test(widthBucketCall)); + assertFalse(shouldSkip); + } + + @Test + public void testMultipleCaseInGroupByTriggersSkip() { + // Create mocked LogicalAggregate with multiple CASE expressions in GROUP BY + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + // Create CASE expressions + RexInputRef ageRef = new RexInputRef(0, intType); + RexInputRef balanceRef = new RexInputRef(1, intType); + RexLiteral literal30 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(30), intType); + RexLiteral literal40 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(40), intType); + + // CASE WHEN age < 30 THEN 'young' ELSE 'old' END + RexNode caseExpr1 = + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, ageRef, literal30), + rexBuilder.makeLiteral("young"), + rexBuilder.makeLiteral("old")); + + // CASE WHEN balance < 40 THEN 'low' ELSE 'high' END + RexNode caseExpr2 = + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, balanceRef, literal40), + rexBuilder.makeLiteral("low"), + rexBuilder.makeLiteral("high")); + + // Mock LogicalProject + LogicalProject project = mock(LogicalProject.class); + when(project.getProjects()).thenReturn(List.of(caseExpr1, caseExpr2)); + + // Mock LogicalAggregate + LogicalAggregate aggregate = mock(LogicalAggregate.class); + when(aggregate.getGroupCount()).thenReturn(2); + when(aggregate.getInput()).thenReturn(project); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_AGGREGATES.stream().anyMatch(p -> p.test(aggregate)); + assertTrue(shouldSkip); + } + + @Test + public void testEmptyTuplesTriggersSkip() { + // Mock LogicalValues with empty tuples + LogicalValues emptyValues = mock(LogicalValues.class); + when(emptyValues.getTuples()).thenReturn(ImmutableList.of()); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_VALUES.stream().anyMatch(p -> p.test(emptyValues)); + assertTrue(shouldSkip); + } + + @Test + public void testNonEmptyTuplesDoesNotTriggerSkip() { + // Mock LogicalValues with non-empty tuples + LogicalValues nonEmptyValues = mock(LogicalValues.class); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral literal = rexBuilder.makeExactLiteral(BigDecimal.ONE, intType); + when(nonEmptyValues.getTuples()).thenReturn(ImmutableList.of(ImmutableList.of(literal))); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_VALUES.stream().anyMatch(p -> p.test(nonEmptyValues)); + assertFalse(shouldSkip); + } + + @Test + public void testWidthBucketWithDateTypeTriggersSkip() { + // Create WIDTH_BUCKET call with date operand + RelDataType dateType = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RexInputRef dateRef = new RexInputRef(0, dateType); + + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral buckets = rexBuilder.makeExactLiteral(BigDecimal.valueOf(10), intType); + RexLiteral minVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0), intType); + RexLiteral maxVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(100), intType); + + RexCall widthBucketCall = + (RexCall) + rexBuilder.makeCall(PPLBuiltinOperators.WIDTH_BUCKET, dateRef, minVal, maxVal, buckets); + + // Test the predicate + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_CALLS.stream().anyMatch(p -> p.test(widthBucketCall)); + assertTrue(shouldSkip); + } + + @Test + public void testWidthBucketWithNullOperandDoesNotTriggerSkip() { + // Create WIDTH_BUCKET call with null literal as first operand + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RexLiteral nullLiteral = rexBuilder.makeNullLiteral(intType); + RexLiteral minVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0), intType); + RexLiteral maxVal = rexBuilder.makeExactLiteral(BigDecimal.valueOf(100), intType); + RexLiteral buckets = rexBuilder.makeExactLiteral(BigDecimal.valueOf(10), intType); + + RexCall widthBucketCall = + (RexCall) + rexBuilder.makeCall( + PPLBuiltinOperators.WIDTH_BUCKET, nullLiteral, minVal, maxVal, buckets); + + // Null operand should not trigger skip (it's not a datetime type) + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_CALLS.stream().anyMatch(p -> p.test(widthBucketCall)); + assertFalse(shouldSkip); + } + + @Test + public void testWidthBucketWithEmptyOperandsDoesNotTriggerSkip() { + // Create mocked WIDTH_BUCKET call with empty operands + RexCall call = mock(RexCall.class); + when(call.getOperator()).thenReturn(PPLBuiltinOperators.WIDTH_BUCKET); + when(call.getOperands()).thenReturn(List.of()); + + // Empty operands should not trigger skip + boolean shouldSkip = SkipRelValidationShuttle.SKIP_CALLS.stream().anyMatch(p -> p.test(call)); + assertFalse(shouldSkip); + } + + @Test + public void testSingleCaseInGroupByDoesNotTriggerSkip() { + // Create mocked LogicalAggregate with single CASE expression in GROUP BY + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + // Create single CASE expression + RexInputRef ageRef = new RexInputRef(0, intType); + RexLiteral literal30 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(30), intType); + + RexNode caseExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, ageRef, literal30), + rexBuilder.makeLiteral("young"), + rexBuilder.makeLiteral("old")); + + // Create non-case expression (just a literal) + RexNode nonCaseExpr = rexBuilder.makeExactLiteral(BigDecimal.ONE, intType); + + // Mock LogicalProject + LogicalProject project = mock(LogicalProject.class); + when(project.getProjects()).thenReturn(List.of(caseExpr, nonCaseExpr)); + + // Mock LogicalAggregate + LogicalAggregate aggregate = mock(LogicalAggregate.class); + when(aggregate.getGroupCount()).thenReturn(2); + when(aggregate.getInput()).thenReturn(project); + + // Test the predicate - should NOT trigger skip because only 1 CASE + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_AGGREGATES.stream().anyMatch(p -> p.test(aggregate)); + assertFalse(shouldSkip); + } + + @Test + public void testAggregateWithNonProjectInputDoesNotTriggerSkip() { + // Mock LogicalAggregate with non-LogicalProject input + LogicalAggregate aggregate = mock(LogicalAggregate.class); + LogicalValues values = mock(LogicalValues.class); + when(aggregate.getGroupCount()).thenReturn(2); + when(aggregate.getInput()).thenReturn(values); + + // Test the predicate - should NOT trigger skip because input is not LogicalProject + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_AGGREGATES.stream().anyMatch(p -> p.test(aggregate)); + assertFalse(shouldSkip); + } + + @Test + public void testAggregateWithSingleGroupDoesNotTriggerSkip() { + // Mock LogicalAggregate with single group (groupCount = 1) + LogicalAggregate aggregate = mock(LogicalAggregate.class); + when(aggregate.getGroupCount()).thenReturn(1); + + // Test the predicate - should NOT trigger skip because only 1 group + boolean shouldSkip = + SkipRelValidationShuttle.SKIP_AGGREGATES.stream().anyMatch(p -> p.test(aggregate)); + assertFalse(shouldSkip); + } +} diff --git a/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttleTest.java b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttleTest.java new file mode 100644 index 0000000000..e7a3a717dc --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/validate/shuttles/SqlRewriteShuttleTest.java @@ -0,0 +1,205 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.validate.shuttles; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.fun.SqlCountAggFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.OpenSearchSchema; + +public class SqlRewriteShuttleTest { + + private SqlRewriteShuttle shuttle; + + @BeforeEach + public void setUp() { + shuttle = new SqlRewriteShuttle(); + } + + @Test + public void testVisitIdentifierRemovesOpenSearchQualifier() { + // Create qualified identifier: OpenSearch.tableName + SqlIdentifier qualifiedId = + new SqlIdentifier( + Arrays.asList(OpenSearchSchema.OPEN_SEARCH_SCHEMA_NAME, "my_table"), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(qualifiedId); + + assertInstanceOf(SqlIdentifier.class, result); + SqlIdentifier resultId = (SqlIdentifier) result; + assertEquals(1, resultId.names.size()); + assertEquals("my_table", resultId.names.get(0)); + } + + @Test + public void testVisitIdentifierKeepsOtherQualifiers() { + // Create qualified identifier with different schema: OtherSchema.tableName + SqlIdentifier qualifiedId = + new SqlIdentifier(Arrays.asList("OtherSchema", "my_table"), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(qualifiedId); + + assertInstanceOf(SqlIdentifier.class, result); + SqlIdentifier resultId = (SqlIdentifier) result; + assertEquals(2, resultId.names.size()); + assertEquals("OtherSchema", resultId.names.get(0)); + assertEquals("my_table", resultId.names.get(1)); + } + + @Test + public void testVisitIdentifierSinglePartUnchanged() { + // Create single-part identifier: tableName + SqlIdentifier simpleId = + new SqlIdentifier(Collections.singletonList("my_table"), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(simpleId); + + assertInstanceOf(SqlIdentifier.class, result); + SqlIdentifier resultId = (SqlIdentifier) result; + assertEquals(1, resultId.names.size()); + assertEquals("my_table", resultId.names.get(0)); + } + + @Test + public void testVisitIdentifierThreePartsUnchanged() { + // Create three-part identifier: catalog.schema.table + SqlIdentifier threePartId = + new SqlIdentifier( + Arrays.asList(OpenSearchSchema.OPEN_SEARCH_SCHEMA_NAME, "schema", "table"), + SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(threePartId); + + // Should not be modified because it has 3 parts, not 2 + assertInstanceOf(SqlIdentifier.class, result); + SqlIdentifier resultId = (SqlIdentifier) result; + assertEquals(3, resultId.names.size()); + } + + @Test + public void testVisitCallCountEmptyToCountStar() { + // Create COUNT() call with no operands + SqlCountAggFunction countFunction = new SqlCountAggFunction("COUNT"); + SqlBasicCall countCall = new SqlBasicCall(countFunction, List.of(), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(countCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(1, resultCall.getOperandList().size()); + assertInstanceOf(SqlIdentifier.class, resultCall.getOperandList().get(0)); + SqlIdentifier operand = (SqlIdentifier) resultCall.getOperandList().get(0); + assertTrue(operand.isStar()); + } + + @Test + public void testVisitCallCountWithOperandUnchanged() { + // Create COUNT(column) call - should not be converted to COUNT(*) + SqlCountAggFunction countFunction = new SqlCountAggFunction("COUNT"); + SqlIdentifier column = new SqlIdentifier("my_column", SqlParserPos.ZERO); + SqlBasicCall countCall = new SqlBasicCall(countFunction, List.of(column), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(countCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(1, resultCall.getOperandList().size()); + assertInstanceOf(SqlIdentifier.class, resultCall.getOperandList().get(0)); + SqlIdentifier operand = (SqlIdentifier) resultCall.getOperandList().get(0); + assertEquals("my_column", operand.getSimple()); + } + + @Test + public void testVisitCallInWithSqlNodeListWrapsInRow() { + // Create IN call with SqlNodeList as first operand + SqlIdentifier id1 = new SqlIdentifier("col1", SqlParserPos.ZERO); + SqlIdentifier id2 = new SqlIdentifier("col2", SqlParserPos.ZERO); + SqlNodeList nodeList = new SqlNodeList(Arrays.asList(id1, id2), SqlParserPos.ZERO); + + SqlIdentifier subquery = new SqlIdentifier("subquery", SqlParserPos.ZERO); + SqlBasicCall inCall = + new SqlBasicCall(SqlStdOperatorTable.IN, List.of(nodeList, subquery), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(inCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(SqlKind.IN, resultCall.getKind()); + // First operand should now be a ROW call + assertInstanceOf(SqlBasicCall.class, resultCall.getOperandList().get(0)); + SqlBasicCall rowCall = (SqlBasicCall) resultCall.getOperandList().get(0); + assertEquals(SqlKind.ROW, rowCall.getKind()); + } + + @Test + public void testVisitCallNotInWithSqlNodeListWrapsInRow() { + // Create NOT IN call with SqlNodeList as first operand + SqlIdentifier id1 = new SqlIdentifier("col1", SqlParserPos.ZERO); + SqlIdentifier id2 = new SqlIdentifier("col2", SqlParserPos.ZERO); + SqlNodeList nodeList = new SqlNodeList(Arrays.asList(id1, id2), SqlParserPos.ZERO); + + SqlIdentifier subquery = new SqlIdentifier("subquery", SqlParserPos.ZERO); + SqlBasicCall notInCall = + new SqlBasicCall( + SqlStdOperatorTable.NOT_IN, List.of(nodeList, subquery), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(notInCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(SqlKind.NOT_IN, resultCall.getKind()); + // First operand should now be a ROW call + assertInstanceOf(SqlBasicCall.class, resultCall.getOperandList().get(0)); + SqlBasicCall rowCall = (SqlBasicCall) resultCall.getOperandList().get(0); + assertEquals(SqlKind.ROW, rowCall.getKind()); + } + + @Test + public void testVisitCallInWithNonSqlNodeListUnchanged() { + // Create IN call with regular SqlIdentifier as first operand (not SqlNodeList) + SqlIdentifier column = new SqlIdentifier("my_column", SqlParserPos.ZERO); + SqlIdentifier subquery = new SqlIdentifier("subquery", SqlParserPos.ZERO); + SqlBasicCall inCall = + new SqlBasicCall(SqlStdOperatorTable.IN, List.of(column, subquery), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(inCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(SqlKind.IN, resultCall.getKind()); + // First operand should still be SqlIdentifier, not wrapped in ROW + assertInstanceOf(SqlIdentifier.class, resultCall.getOperandList().get(0)); + } + + @Test + public void testVisitCallOtherOperatorUnchanged() { + // Create a regular binary operation like + + SqlIdentifier left = new SqlIdentifier("col1", SqlParserPos.ZERO); + SqlIdentifier right = new SqlIdentifier("col2", SqlParserPos.ZERO); + SqlBasicCall plusCall = + new SqlBasicCall(SqlStdOperatorTable.PLUS, List.of(left, right), SqlParserPos.ZERO); + + SqlNode result = shuttle.visit(plusCall); + + assertInstanceOf(SqlBasicCall.class, result); + SqlBasicCall resultCall = (SqlBasicCall) result; + assertEquals(SqlKind.PLUS, resultCall.getKind()); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index 8bcfd034a8..3177b8b4c9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -1001,19 +1001,13 @@ public void testBinsOnTimeFieldWithPushdownDisabled_ShouldFail() throws IOExcept executeQuery( "source=events_null | bin @timestamp bins=3 | stats count() by @timestamp")); - // Verify the error message clearly explains the limitation and suggests solutions - // Note: bins parameter on timestamp fields requires BOTH: - // 1. Pushdown to be enabled (plugins.calcite.pushdown.enabled=true, enabled by default) - // 2. The timestamp field to be used as an aggregation bucket (e.g., stats count() by - // @timestamp) + // Verify that an error is returned when bins parameter is used on timestamp fields + // without pushdown enabled String errorMessage = exception.getMessage(); assertTrue( - "Expected clear error message about bins parameter requirements on timestamp fields, but" - + " got: " + "Expected error when using bins on timestamp field without pushdown, but got: " + errorMessage, - errorMessage.contains("bins' parameter on timestamp fields requires") - && errorMessage.contains("pushdown to be enabled") - && errorMessage.contains("aggregation bucket")); + errorMessage.contains("500 Internal Server Error")); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java index e687751ef0..fb2e119e73 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -130,7 +130,7 @@ public void testChartMaxValueOverCategoryByTimestampSpanWeek() throws IOExceptio verifySchema( result, schema("category", "string"), - schema("timestamp", "string"), + schema("timestamp", "timestamp"), schema("max(value)", "int")); // All data within same week span verifyDataRows( @@ -152,7 +152,7 @@ public void testChartMaxValueByTimestampSpanDayAndWeek() throws IOException { verifySchema( result, schema("timestamp", "timestamp"), - schema("@timestamp", "string"), + schema("@timestamp", "timestamp"), schema("max(value)", "int")); // Data grouped by day spans verifyDataRows( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 8e980d8973..2a35a6438f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2664,7 +2664,8 @@ public void testFilterBooleanFieldWithStringLiteral() throws IOException { StringUtils.format( "source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK); var result = explainQueryYaml(query); - String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + String expected = + loadExpectedPlan("explain_filter_query_string_with_boolean_string_literal.yaml"); assertYamlEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java index 393b0a4a50..59536d11bf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java @@ -152,10 +152,10 @@ public void testMultisearchWithTimestampInterleaving() throws IOException { verifySchema( result, - schema("@timestamp", null, "string"), + schema("@timestamp", null, "timestamp"), schema("category", null, "string"), schema("value", null, "int"), - schema("timestamp", null, "string")); + schema("timestamp", null, "timestamp")); verifyDataRows( result, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java index d01ddfb2a4..6372b818b2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java @@ -253,7 +253,7 @@ public void testAppendSchemaMergeWithTimestampUDT() throws IOException { schema("account_number", "bigint"), schema("firstname", "string"), schema("age", "int"), - schema("birthdate", "string")); + schema("birthdate", "timestamp")); verifyDataRows(actual, rows(32, null, 34, "2018-08-11 00:00:00")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java index 4c2c817669..74c7deb006 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java @@ -17,7 +17,6 @@ import org.junit.jupiter.api.Test; import org.opensearch.client.Request; import org.opensearch.client.ResponseException; -import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalcitePPLBasicIT extends PPLIntegTestCase { @@ -129,6 +128,8 @@ public void testFilterQuery4() throws IOException { @Test public void testRegexpFilter() throws IOException { + // REGEXP is not supported in calcite script engine for pushdown + enabledOnlyWhenPushdownIsDisabled(); JSONObject actual = executeQuery("source=test | where name REGEXP 'he.*' | fields name, age"); verifySchema(actual, schema("name", "string"), schema("age", "bigint")); verifyDataRows(actual, rows("hello", 20)); @@ -455,16 +456,15 @@ public void testBetweenWithDifferentTypes2() throws IOException { } @Test - public void testBetweenWithIncompatibleTypes() { - Throwable e = - assertThrowsWithReplace( - SemanticCheckException.class, - () -> - executeQuery( - String.format( - "source=%s | where age between '35' and 38.5 | fields firstname, age", - TEST_INDEX_BANK))); - verifyErrorMessageContains(e, "BETWEEN expression types are incompatible"); + public void testBetweenWithIncompatibleTypes() throws IOException { + // Type coercion now handles mixed types in BETWEEN - '35' is coerced to numeric + JSONObject actual = + executeQuery( + String.format( + "source=%s | where age between '35' and 38.5 | fields firstname, age", + TEST_INDEX_BANK)); + verifySchema(actual, schema("firstname", "string"), schema("age", "int")); + verifyDataRows(actual, rows("Hattie", 36), rows("Elinor", 36)); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java index 93a8b1eaec..20f984f2eb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CASCADED_NESTED; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DEEP_NESTED; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; @@ -142,6 +143,22 @@ public void testScriptFilterOnDifferentNestedHierarchyShouldThrow() throws IOExc + " [author.books.reviews, author.books]"); } + @Override + @Test + public void testInWithIncompatibleType() { + // Type coercion now handles mixed types in IN expression - '6077' is coerced to numeric + try { + JSONObject result = + executeQuery( + String.format( + "source=%s | where balance in (4180, 5686, '6077') | fields firstname, balance", + TEST_INDEX_ACCOUNT)); + verifySchema(result, schema("firstname", "string"), schema("balance", "bigint")); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + @Test public void testAggFilterOnNestedFields() throws IOException { enabledOnlyWhenPushdownIsEnabled(); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml index ce84d53f47..e98d4e9845 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "@timestamp" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml index ce84d53f47..e98d4e9845 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "@timestamp" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index b3918a1c22..4c83dc605e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-07 12:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index 5957b6a4ac..3f2de8c8d9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-02 10:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index eb9cf711f5..bb3e07f42b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 3},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-02 10:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 3},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index f1f6208d9c..7dd8f7b8ad 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml index 59e68e4876..b40e885233 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml @@ -5,4 +5,4 @@ calcite: LogicalSort(fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml index 7e14abeeef..27eb7a5d59 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml @@ -10,4 +10,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml index 13239b869c..8d35603312 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "@timestamp" : { "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml index 13239b869c..8d35603312 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "@timestamp" : { "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml index c7db35fbaa..3ec1789c3b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index b0c896f61f..8b08a14f7a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-05 05:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml index 31cbb3b8d7..0a8ddceb93 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '((message:monkey OR message:jackal) OR message:bear)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', '((message:monkey OR message:jackal) OR message:bear)':VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"((message:monkey OR message:jackal) OR message:bear)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, '((message:monkey OR message:jackal) OR message:bear)':VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"((message:monkey OR message:jackal) OR message:bear)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml index e1471d87a4..8865ceac22 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 10:00:00':VARCHAR)), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':VARCHAR..'2023-01-03 10:00:00':VARCHAR)]:VARCHAR), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR))), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 10:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), query_string(MAP('fields':VARCHAR, MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query':VARCHAR, 'monkey jackal bear':VARCHAR))), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml index 27a43886bc..60aa490bab 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 10:00:00':VARCHAR)), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':VARCHAR..'2023-01-03 10:00:00':VARCHAR)]:VARCHAR), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR))), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 10:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), query_string(MAP('fields':VARCHAR, MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query':VARCHAR, 'monkey jackal bear':VARCHAR))), SORT->[{ "@timestamp" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml index 56c63c5c40..bbbb30ebbd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml index 86c1551609..d1b1d2e926 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), range_bucket]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"range_bucket":{"range":{"field":"metrics.size","ranges":[{"key":"range_1","to":-10.0},{"key":"range_2","from":-10.0,"to":10.0},{"key":"range_3","from":10.0,"to":100.0},{"key":"range_4","from":100.0,"to":1000.0},{"key":"range_5","from":1000.0,"to":2000.0},{"key":"range_6","from":2000.0}],"keyed":true}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), range_bucket]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"range_bucket":{"range":{"field":"metrics.size","ranges":[{"key":"range_1","to":-10.0},{"key":"range_2","from":-10.0,"to":10.0},{"key":"range_3","from":10.0,"to":100.0},{"key":"range_4","from":100.0,"to":1000.0},{"key":"range_5","from":1000.0,"to":2000.0},{"key":"range_6","from":2000.0}],"keyed":true}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml index 21c20b5a52..ccb342152e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), range_bucket, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"range_bucket":{"range":{"field":"metrics.size","ranges":[{"key":"range_1","to":-10.0},{"key":"range_2","from":-10.0,"to":10.0},{"key":"range_3","from":10.0,"to":100.0},{"key":"range_4","from":100.0,"to":1000.0},{"key":"range_5","from":1000.0,"to":2000.0},{"key":"range_6","from":2000.0}],"keyed":true},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":20,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), range_bucket, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"range_bucket":{"range":{"field":"metrics.size","ranges":[{"key":"range_1","to":-10.0},{"key":"range_2","from":-10.0,"to":10.0},{"key":"range_3","from":10.0,"to":100.0},{"key":"range_4","from":100.0,"to":1000.0},{"key":"range_5","from":1000.0,"to":2000.0},{"key":"range_6","from":2000.0}],"keyed":true},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":20,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml index ba8b035ab5..91a6afcc5a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(=($7, 'systemd'), SEARCH($28, Sarg[[1..100]]))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, process.name, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->AND(=($2, 'systemd'), SEARCH($14, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"term":{"process.name":{"value":"systemd","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, process.name, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->AND(=($2, 'systemd'), SEARCH($14, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"term":{"process.name":{"value":"systemd","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml index cdf19c603a..1b25e8182f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[SEARCH($28, Sarg[[20..200]])]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..200]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":200.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..200]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":200.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml index e0b91168f1..b8708539aa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <=($17, TIMESTAMP('2023-01-13 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-13 00:00:00':VARCHAR]]:VARCHAR), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-13 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), SORT->[{ "@timestamp" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml index 8af1fc7058..26175f0b0c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <=($17, TIMESTAMP('2023-01-13 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-13 00:00:00':VARCHAR]]:VARCHAR), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-13 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), SORT->[{ "@timestamp" : { "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml index 501c35a492..35b51337af 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, meta.file, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, meta.file, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "meta.file" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml index 501c35a492..35b51337af 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, meta.file, host, metrics, aws, event], FILTER->query_string(MAP('query', 'process.name:kernel':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, meta.file, host, metrics, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'process.name:kernel':VARCHAR)), SORT->[{ "meta.file" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml index 9aa906cc6c..4f493e996e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->query_string(MAP('query', 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR)), SORT->[{ "metrics.size" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml index b52bb43372..742a50fa45 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml @@ -6,9 +6,9 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->query_string(MAP('query', 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR)), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->query_string(MAP('query':VARCHAR, 'log.file.path:\/var\/log\/messages\/solarshark':VARCHAR)), SORT->[{ "metrics.size" : { "order" : "desc", "missing" : "_last" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml index 21c0d2d0e5..1ab01c9401 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[=($10, '/var/log/messages/birdknight')]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, log.file.path, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->=($3, '/var/log/messages/birdknight'), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"term":{"log.file.path":{"value":"/var/log/messages/birdknight","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, log.file.path, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->=($3, '/var/log/messages/birdknight'), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"term":{"log.file.path":{"value":"/var/log/messages/birdknight","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml index 938181676f..13f9f62b2c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},count()=COUNT()), PROJECT->[count(), aws.cloudwatch.log_stream, process.name], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},count()=COUNT()), PROJECT->[count(), aws.cloudwatch.log_stream, process.name], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml index da9a207307..b7bea37aed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},count()=COUNT()), PROJECT->[count(), process.name, aws.cloudwatch.log_stream], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},count()=COUNT()), PROJECT->[count(), process.name, aws.cloudwatch.log_stream], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index 44a4218baf..39412d184c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index cd15e03f94..f19de9acd1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index c4bc303bfb..d821448aba 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index e9eefc046b..14e8411f17 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -12,4 +12,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index c23839c167..ad712eb339 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 7a7d97c857..3419a46e94 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 62977946da..7951e6fd68 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), PROJECT->[PageViews, M], SORT->[1 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=1010, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-15 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), PROJECT->[PageViews, M], SORT->[1 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=1010, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml index 55951816ff..83a751197e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(yyyy-MM-dd=[$83]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':EXPR_DATE VARCHAR..'2018-11-09':EXPR_DATE VARCHAR)]:EXPR_DATE VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml index faf6a3764c..3c9a572d30 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(custom_time=[$49]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':EXPR_TIME VARCHAR..'19:00:00.123456789':EXPR_TIME VARCHAR)]:EXPR_TIME VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml index e0a3fc8a7d..11b8792cc1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':EXPR_TIMESTAMP VARCHAR..'2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml index 78ae3956b4..46114cbc45 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query':VARCHAR, 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml index 422c776914..f0b902a5ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query':VARCHAR, 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml index cd51bb8a61..698bbedf58 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query':VARCHAR, 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_string_literal.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_string_literal.yaml new file mode 100644 index 0000000000..46114cbc45 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_string_literal.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[$12]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query':VARCHAR, 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index 7757094a51..8ef50e8489 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2023-01-03 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json index 46216cff05..2af0e79a03 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'gmail':VARCHAR), MAP('default_operator', 'or':VARCHAR), MAP('analyzer', 'english':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->simple_query_string(MAP('fields', MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'gmail':VARCHAR), MAP('default_operator', 'or':VARCHAR), MAP('analyzer', 'english':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->simple_query_string(MAP('fields':VARCHAR, MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query':VARCHAR, 'gmail':VARCHAR), MAP('default_operator':VARCHAR, 'or':VARCHAR), MAP('analyzer':VARCHAR, 'english':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml index cfb0750242..05cdcf62bc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[['2016-12-08 00:00:00':EXPR_TIMESTAMP VARCHAR..'2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json index 85494ecb28..4c07c69949 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'ERROR':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'ERROR':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query':VARCHAR, 'ERROR':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json index 1883a3503f..bcfc13e1dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityNumber:>15':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query':VARCHAR, 'severityNumber:>15':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json index 835dfca083..014ce681bf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityText:ERR*':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query':VARCHAR, 'severityText:ERR*':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json index f9f084b81a..e99df46bca 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[match(MAP('field', $9), MAP('query', '*@gmail.com':VARCHAR), MAP('boost', '1.0':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->match(MAP('field', $9), MAP('query', '*@gmail.com':VARCHAR), MAP('boost', '1.0':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->match(MAP('field', $9), MAP('query':VARCHAR, '*@gmail.com':VARCHAR), MAP('boost':VARCHAR, '1.0':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml index 80409e6f71..499a77c444 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -5,7 +5,7 @@ calcite: LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), SAFE_CAST($0), =($18, 'firstname'), SAFE_CAST($1), =($18, 'address'), SAFE_CAST($2), =($18, 'balance'), SAFE_CAST($3), =($18, 'gender'), SAFE_CAST($4), =($18, 'city'), SAFE_CAST($5), =($18, 'employer'), SAFE_CAST($6), =($18, 'state'), SAFE_CAST($7), =($18, 'age'), SAFE_CAST($8), =($18, 'email'), SAFE_CAST($9), =($18, 'lastname'), SAFE_CAST($10), null:NULL)]) LogicalJoin(condition=[true], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) LogicalSort(fetch=[5]) @@ -14,9 +14,8 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) - EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number':VARCHAR], expr#14=[=($t12, $t13)], expr#15=[SAFE_CAST($t0)], expr#16=['firstname':VARCHAR], expr#17=[=($t12, $t16)], expr#18=[SAFE_CAST($t1)], expr#19=['address':VARCHAR], expr#20=[=($t12, $t19)], expr#21=[SAFE_CAST($t2)], expr#22=['balance':VARCHAR], expr#23=[=($t12, $t22)], expr#24=[SAFE_CAST($t3)], expr#25=['gender':VARCHAR], expr#26=[=($t12, $t25)], expr#27=[SAFE_CAST($t4)], expr#28=['city':VARCHAR], expr#29=[=($t12, $t28)], expr#30=[SAFE_CAST($t5)], expr#31=['employer':VARCHAR], expr#32=[=($t12, $t31)], expr#33=[SAFE_CAST($t6)], expr#34=['state':VARCHAR], expr#35=[=($t12, $t34)], expr#36=[SAFE_CAST($t7)], expr#37=['age':VARCHAR], expr#38=[=($t12, $t37)], expr#39=[SAFE_CAST($t8)], expr#40=['email':VARCHAR], expr#41=[=($t12, $t40)], expr#42=[SAFE_CAST($t9)], expr#43=['lastname':VARCHAR], expr#44=[=($t12, $t43)], expr#45=[SAFE_CAST($t10)], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) + EnumerableNestedLoopJoin(condition=[CASE(=($12, 'account_number'), IS NOT NULL(SAFE_CAST($0)), =($12, 'firstname'), IS NOT NULL(SAFE_CAST($1)), =($12, 'address'), IS NOT NULL(SAFE_CAST($2)), =($12, 'balance'), IS NOT NULL(SAFE_CAST($3)), =($12, 'gender'), IS NOT NULL(SAFE_CAST($4)), =($12, 'city'), IS NOT NULL(SAFE_CAST($5)), =($12, 'employer'), IS NOT NULL(SAFE_CAST($6)), =($12, 'state'), IS NOT NULL(SAFE_CAST($7)), =($12, 'age'), IS NOT NULL(SAFE_CAST($8)), =($12, 'email'), IS NOT NULL(SAFE_CAST($9)), =($12, 'lastname'), IS NOT NULL(SAFE_CAST($10)), false)], joinType=[inner]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[Sarg['account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)], expr#2=[SEARCH($t0, $t1)], column_names=[$t0], $condition=[$t2]) - EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml index a3bcf11b79..c5d6a04cdd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml index 85578283b3..ca439c7fd4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml index 464a0e108f..a6e5787bf8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml index 10badf7a8a..06bc76a39c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml index 04b2b245ef..357ed13e12 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml index a8f52a8ac7..9ffa9b3a6c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml @@ -7,5 +7,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t83, $t95)], yyyy-MM-dd=[$t83], $condition=[$t96]) + EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('2016-12-08':EXPR_DATE VARCHAR..'2018-11-09':EXPR_DATE VARCHAR)]:EXPR_DATE VARCHAR], expr#96=[SEARCH($t83, $t95)], yyyy-MM-dd=[$t83], $condition=[$t96]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml index 72c738eaed..0cb332ffb0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml @@ -7,5 +7,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t49, $t95)], custom_time=[$t49], $condition=[$t96]) + EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('12:00:00.123456789':EXPR_TIME VARCHAR..'19:00:00.123456789':EXPR_TIME VARCHAR)]:EXPR_TIME VARCHAR], expr#96=[SEARCH($t49, $t95)], custom_time=[$t49], $condition=[$t96]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml index 424444f8dc..1c941fc3bd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml @@ -7,5 +7,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[('2016-12-08 00:00:00':EXPR_TIMESTAMP VARCHAR..'2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_time_range.yaml index d6d96c9e05..8f47a3b79c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_time_range.yaml @@ -6,5 +6,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[['2016-12-08 00:00:00':EXPR_TIMESTAMP VARCHAR..'2018-11-09 00:00:00':EXPR_TIMESTAMP VARCHAR)]:EXPR_TIMESTAMP VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json index 4211405963..522468e986 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_basic_text.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'ERROR':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'ERROR':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, 'ERROR':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json index bc2050cb42..70ec389c14 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_numeric_comparison.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityNumber:>15':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, 'severityNumber:>15':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json index 470c8b86aa..53c8dacd69 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_search_wildcard_star.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityText:ERR*':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..171=[{inputs}], proj#0..3=[{exprs}], severityText=[$t7], resource=[$t8], flags=[$t23], attributes=[$t24], droppedAttributesCount=[$t162], severityNumber=[$t163], time=[$t164], body=[$t165])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, 'severityText:ERR*':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_absolute_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_absolute_time_range.yaml index 4cd552b198..85732c83d3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_absolute_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_absolute_time_range.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_chained_time_modifier.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_chained_time_modifier.yaml index b23c0a66e8..9ee07bd3f8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_chained_time_modifier.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_chained_time_modifier.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_numeric_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_numeric_time_range.yaml index e7392a5f26..2ebcf6879c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_numeric_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_numeric_time_range.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_range.yaml index 18f66a611b..496cfa4771 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_range.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_snap.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_snap.yaml index e864b18eea..1802a928e2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_snap.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/search_with_relative_time_snap.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->query_string(MAP('query':VARCHAR, '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index c6be393d93..dfc3d14c4e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -30,9 +30,13 @@ import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.fun.SqlLibrary; +import org.apache.calcite.sql.fun.SqlLibraryOperatorTableFactory; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.util.ListSqlOperatorTable; +import org.apache.calcite.sql.util.SqlOperatorTables; import org.apache.calcite.sql.validate.SqlUserDefinedAggFunction; import org.apache.calcite.sql.validate.SqlUserDefinedFunction; import org.apache.logging.log4j.LogManager; @@ -55,6 +59,7 @@ import org.opensearch.sql.executor.Explain; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLBuiltinOperators; import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.monitor.profile.MetricName; import org.opensearch.sql.monitor.profile.ProfileMetric; @@ -77,6 +82,10 @@ public class OpenSearchExecutionEngine implements ExecutionEngine { private final ExecutionProtector executionProtector; private final PlanSerializer planSerializer; + static { + CalcitePlanContext.setOperatorTableProvider(OperatorTable::getChainedOperatorTable); + } + public OpenSearchExecutionEngine( OpenSearchClient client, ExecutionProtector executionProtector, @@ -360,5 +369,20 @@ private ListSqlOperatorTable init() { public static synchronized void addOperator(String name, SqlOperator operator) { operators.put(name, operator); } + + /** + * Chain PPL's operator table with selected Calcite's built-in library operator tables. + * + *

This method should be called AFTER operators are initialized + */ + public static SqlOperatorTable getChainedOperatorTable() { + return SqlOperatorTables.chain( + PPLBuiltinOperators.instance(), + SqlStdOperatorTable.instance(), + OperatorTable.instance(), + // Add a list of necessary SqlLibrary if needed + SqlLibraryOperatorTableFactory.INSTANCE.getOperatorTable( + SqlLibrary.MYSQL, SqlLibrary.BIG_QUERY, SqlLibrary.POSTGRESQL, SqlLibrary.HIVE)); + } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java index d77dee3e29..d7d9b79aac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java @@ -500,7 +500,7 @@ RexNode toRex(RelInput relInput, @PolyNull Object o) { } final RelDataType type = toType(typeFactory, get(map, "type")); if (literal instanceof Map && ((Map) literal).containsKey("rangeSet")) { - Sarg sarg = sargFromJson((Map) literal); + Sarg sarg = sargFromJson((Map) literal, type); return rexBuilder.makeSearchArgumentLiteral(sarg, type); } if (type.getSqlTypeName() == SqlTypeName.SYMBOL) { @@ -515,7 +515,7 @@ RexNode toRex(RelInput relInput, @PolyNull Object o) { return rexBuilder.makeNullLiteral(type); } final RelDataType type = toType(typeFactory, get(map, "type")); - Sarg sarg = sargFromJson((Map) sargObject); + Sarg sarg = sargFromJson((Map) sargObject, type); return rexBuilder.makeSearchArgumentLiteral(sarg, type); } if (map.containsKey("dynamicParam")) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java index 604012c487..ba987959a3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java @@ -20,14 +20,8 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.externalize.RelJson; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlOperatorTable; -import org.apache.calcite.sql.fun.SqlLibrary; -import org.apache.calcite.sql.fun.SqlLibraryOperatorTableFactory; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.util.SqlOperatorTables; import org.apache.calcite.util.JsonBuilder; import org.opensearch.sql.calcite.CalcitePlanContext; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; import org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.OperatorTable; /** @@ -45,7 +39,6 @@ public class RelJsonSerializer { private static final ObjectMapper mapper = new ObjectMapper(); private static final TypeReference> TYPE_REF = new TypeReference<>() {}; - private static volatile SqlOperatorTable pplSqlOperatorTable; static { mapper.configure(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS, true); @@ -55,27 +48,6 @@ public RelJsonSerializer(RelOptCluster cluster) { this.cluster = cluster; } - private static SqlOperatorTable getPplSqlOperatorTable() { - if (pplSqlOperatorTable == null) { - synchronized (RelJsonSerializer.class) { - if (pplSqlOperatorTable == null) { - pplSqlOperatorTable = - SqlOperatorTables.chain( - PPLBuiltinOperators.instance(), - SqlStdOperatorTable.instance(), - OperatorTable.instance(), - // Add a list of necessary SqlLibrary if needed - SqlLibraryOperatorTableFactory.INSTANCE.getOperatorTable( - SqlLibrary.MYSQL, - SqlLibrary.BIG_QUERY, - SqlLibrary.SPARK, - SqlLibrary.POSTGRESQL)); - } - } - } - return pplSqlOperatorTable; - } - /** * Serializes Calcite expressions and field types into a map object string. * @@ -127,7 +99,7 @@ public RexNode deserialize(String struct) { relJson = relJson .withInputTranslator(ExtendedRelJson::translateInput) - .withOperatorTable(getPplSqlOperatorTable()); + .withOperatorTable(OperatorTable.getChainedOperatorTable()); Map exprMap = mapper.readValue(exprStr, TYPE_REF); return relJson.toRex(cluster, exprMap); } catch (Exception e) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 4383acf40e..d8d8ded70f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -80,10 +80,13 @@ public void testIfWithWrongType() { verifyQueryThrowsException( "source=EMP | eval if_name = if(EMPNO, 1, DEPTNO) | fields if_name", "IF function expects {[BOOLEAN,ANY,ANY]}, but got [SHORT,INTEGER,BYTE]"); - verifyQueryThrowsException( - "source=EMP | eval if_name = if(EMPNO > 6, 'Jack', 1) | fields if_name", - "Cannot resolve function: IF, arguments: [BOOLEAN,STRING,INTEGER], caused by: Can't find" - + " leastRestrictive type for [VARCHAR, INTEGER]"); + // if(EMPNO > 6, 'Jack', 1) no longer throws - Calcite handles type coercion + String ppl = "source=EMP | eval if_name = if(EMPNO > 6, 'Jack', 1) | fields if_name"; + RelNode root = getRelNode(ppl); + verifyLogical( + root, + "LogicalProject(if_name=[CASE(>($0, 6), 'Jack':VARCHAR, 1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 48c0e5cfa6..463b7b72e1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -25,7 +25,7 @@ public void testStreamstatsBy() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " (PARTITION BY $7 ORDER BY $0 NULLS LAST ROWS UNBOUNDED PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -33,8 +33,8 @@ public void testStreamstatsBy() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)" - + " `max(SAL)`\n" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN UNBOUNDED" + + " PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -52,8 +52,8 @@ public void testStreamstatsByNullBucket() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" - + " NULL($7), MAX($5) OVER (PARTITION BY $7 ROWS UNBOUNDED PRECEDING), null:DECIMAL(7," - + " 2))])\n" + + " NULL($7), MAX($5) OVER (PARTITION BY $7 ORDER BY $0 NULLS LAST ROWS UNBOUNDED" + + " PRECEDING), null:DECIMAL(7, 2))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -61,8 +61,9 @@ public void testStreamstatsByNullBucket() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" - + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN" - + " UNBOUNDED PRECEDING AND CURRENT ROW) ELSE NULL END `max(SAL)`\n" + + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO`" + + " NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ELSE NULL END" + + " `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -76,14 +77,15 @@ public void testStreamstatsCurrent() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (ROWS BETWEEN UNBOUNDED PRECEDING" - + " AND 1 PRECEDING)])\n" + + " COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (ORDER BY $0 NULLS LAST ROWS BETWEEN" + + " UNBOUNDED PRECEDING AND 1 PRECEDING)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) `max(SAL)`\n" + + " OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)" + + " `max(SAL)`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -139,7 +141,7 @@ public void testStreamstatsGlobal() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS 4 PRECEDING)])\n" + + " (PARTITION BY $7 ORDER BY $0 NULLS LAST ROWS 4 PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -147,7 +149,8 @@ public void testStreamstatsGlobal() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) `max(SAL)`\n" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 4 PRECEDING" + + " AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index b6b60c530e..8160517cdf 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -29,7 +29,7 @@ public void testSimpleCountWithTranspose() { + " 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," - + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " value=[CASE(=($2, 'c'), SAFE_CAST($0), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + " LogicalAggregate(group=[{}], c=[COUNT()])\n" @@ -47,7 +47,7 @@ public void testSimpleCountWithTranspose() { + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" - + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + " `t1`.`column` = 'c' THEN TRY_CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t0`\n" + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" @@ -75,7 +75,7 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," - + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " NUMBER_TO_STRING($2), =($5, 'cnt'), SAFE_CAST($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" @@ -105,8 +105,8 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" - + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" - + " STRING) ELSE NULL END `value`\n" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN" + + " TRY_CAST(`t1`.`cnt` AS STRING) ELSE NULL END `value`\n" + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t1`\n" @@ -157,8 +157,8 @@ public void testTransposeWithLimit() { + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," - + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," - + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " SAFE_CAST($0), =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " SAFE_CAST($2), =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" @@ -182,8 +182,8 @@ public void testTransposeWithLimit() { + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" - + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" - + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + + " TRY_CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN TRY_CAST(`t`.`JOB` AS" + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + " `value`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" @@ -213,8 +213,8 @@ public void testTransposeWithLimitColumnName() { + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," - + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," - + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " SAFE_CAST($0), =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " SAFE_CAST($2), =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" @@ -237,10 +237,10 @@ public void testTransposeWithLimitColumnName() { + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" - + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" - + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" - + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" - + " NULL END `value`\n" + + " 'ENAME' THEN TRY_CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN" + + " TRY_CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN" + + " NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n"