From 4bc4dec848eb4c052be4f739c1ed3102f315695b Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 12 May 2026 23:33:04 -0700 Subject: [PATCH 1/4] =?UTF-8?q?Add=20geometry=E2=86=94Box2D=20Catalyst=20c?= =?UTF-8?q?ast=20(#2927)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spark's `Cast.canCast` rejects arbitrary UDT-to-UDT casts, so today users have to call `ST_Box2D(geom)` / `ST_GeomFromBox2D(box)` explicitly. This PR teaches Catalyst to handle the conversions as ordinary SQL casts. Implementation: - New `Box2DCastResolutionRule` analyzer rule rewrites `Cast(geom, Box2DUDT)` → `ST_Box2D(geom)` and `Cast(box, GeometryUDT)` → `ST_GeomFromBox2D(box)`. It runs before `CheckAnalysis`, so the downstream optimizer/codegen see the rewritten expression and never observe the rejected Cast. - Registered via `SparkSessionExtensions.injectResolutionRule` from `SedonaSqlExtensions`. - `SedonaSqlAstBuilder.visitPrimitiveDataType` now recognizes `BOX2D` as a type keyword across all supported Spark versions, so SQL `CAST(... AS box2d)` parses to `Box2DUDT`. Tests: - Unit test of the rule (rewrite for both directions, no-op for unrelated casts). - SQL end-to-end suite under spark-3.5 covering SQL CAST in both directions, DataFrame `.cast(Box2DUDT)` / `.cast(GeometryUDT())`, round-trip Geometry → Box2D → Geometry, and NULL propagation. Closes #2927. --- .../sedona/sql/SedonaSqlExtensions.scala | 6 ++ .../sedona_sql/expressions/Constructors.scala | 5 +- .../Box2DCastResolutionRule.scala | 57 ++++++++++++ .../sql/Box2DCastResolutionRuleSuite.scala | 70 +++++++++++++++ .../sql/parser/SedonaSqlAstBuilder.scala | 8 +- .../sql/parser/SedonaSqlAstBuilder.scala | 8 +- .../apache/sedona/sql/Box2DCastSuite.scala | 89 +++++++++++++++++++ .../sql/parser/SedonaSqlAstBuilder.scala | 8 +- .../sql/parser/SedonaSqlAstBuilder.scala | 8 +- 9 files changed, 240 insertions(+), 19 deletions(-) create mode 100644 spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/optimization/Box2DCastResolutionRule.scala create mode 100644 spark/common/src/test/scala/org/apache/sedona/sql/Box2DCastResolutionRuleSuite.scala create mode 100644 spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/SedonaSqlExtensions.scala b/spark/common/src/main/scala/org/apache/sedona/sql/SedonaSqlExtensions.scala index 2856a735bf3..33182a7f90a 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/SedonaSqlExtensions.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/SedonaSqlExtensions.scala @@ -22,6 +22,7 @@ import org.apache.sedona.spark.SedonaContext import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSessionExtensions import org.apache.spark.sql.parser.ParserFactory +import org.apache.spark.sql.sedona_sql.optimization.Box2DCastResolutionRule import org.slf4j.{Logger, LoggerFactory} class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) { @@ -36,6 +37,11 @@ class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) { _ => () }) + // Resolve geometry↔Box2D casts during analysis so the analyzer accepts + // `CAST(geom AS box2d)` / `CAST(box AS geometry)` despite Spark's stock cast resolver + // refusing arbitrary UDT-to-UDT casts. + e.injectResolutionRule(_ => new Box2DCastResolutionRule) + // Inject Sedona SQL parser if (enableParser) { // Try to inject the Sedona SQL parser but gracefully handle initialization failures. diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala index 57549812512..beac34727dd 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala @@ -554,9 +554,8 @@ private[apache] case class ST_MakeBox2D(inputExpressions: Seq[Expression]) /** * Convert a Box2D to a closed rectangular polygon Geometry. Equivalent to PostGIS {@code - * box2d::geometry}. Exposed as a function rather than a Catalyst implicit cast because UDT-to-UDT - * implicit casts require Catalyst-level work; ST_GeomFromBox2D lives alongside the other - * ST_GeomFrom* constructors. + * box2d::geometry}. `CAST(box AS geometry)` is also accepted (resolved to this expression by the + * Box2D cast resolution rule). * * @param inputExpressions */ diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/optimization/Box2DCastResolutionRule.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/optimization/Box2DCastResolutionRule.scala new file mode 100644 index 00000000000..580fd952752 --- /dev/null +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/optimization/Box2DCastResolutionRule.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.spark.sql.sedona_sql.optimization + +import org.apache.spark.sql.catalyst.expressions.Cast +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} +import org.apache.spark.sql.sedona_sql.expressions.{ST_Box2D, ST_GeomFromBox2D} + +/** + * Analyzer rule that resolves Catalyst casts between Sedona UDTs that Spark's stock cast resolver + * does not handle. Specifically: + * + * - `CAST(geom AS box2d)` → `ST_Box2D(geom)` (planar bounding box of the geometry) + * - `CAST(box AS geometry)` → `ST_GeomFromBox2D(box)` (rectangular polygon from a Box2D) + * + * Spark's `Cast.canCast` returns `false` for arbitrary UDT-to-UDT casts, so without this rule the + * analyzer would reject the cast. We rewrite during analysis (before `CheckAnalysis`) so the + * downstream optimizer and codegen path see the expression tree of an ordinary Sedona expression. + * + * Implicit type coercion (e.g. passing a Geometry into a Box2D-typed function argument without an + * explicit cast) is intentionally out of scope here; it requires hooking into Catalyst's type + * coercion rules and is tracked separately. + */ +class Box2DCastResolutionRule extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions { + case c: Cast + if c.child.resolved + && c.child.dataType.isInstanceOf[GeometryUDT] + && c.dataType.isInstanceOf[Box2DUDT] => + ST_Box2D(Seq(c.child)) + + case c: Cast + if c.child.resolved + && c.child.dataType.isInstanceOf[Box2DUDT] + && c.dataType.isInstanceOf[GeometryUDT] => + ST_GeomFromBox2D(Seq(c.child)) + } +} diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/Box2DCastResolutionRuleSuite.scala b/spark/common/src/test/scala/org/apache/sedona/sql/Box2DCastResolutionRuleSuite.scala new file mode 100644 index 00000000000..322dd370fac --- /dev/null +++ b/spark/common/src/test/scala/org/apache/sedona/sql/Box2DCastResolutionRuleSuite.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} +import org.apache.spark.sql.sedona_sql.expressions.{ST_Box2D, ST_GeomFromBox2D} +import org.apache.spark.sql.sedona_sql.optimization.Box2DCastResolutionRule +import org.apache.spark.sql.types.LongType +import org.scalatest.funspec.AnyFunSpec + +class Box2DCastResolutionRuleSuite extends AnyFunSpec { + + private val rule = new Box2DCastResolutionRule + + private def projectExprPlan(input: AttributeReference, expr: Expression): LogicalPlan = { + val rel = LocalRelation(input) + Project(Seq(Alias(expr, "out")()), rel) + } + + describe("Box2DCastResolutionRule") { + it("rewrites Cast(geometry-typed expression, Box2DUDT) into ST_Box2D") { + val geomAttr = AttributeReference("g", GeometryUDT(), nullable = true)() + val cast = Cast(geomAttr, Box2DUDT) + val rewritten = rule(projectExprPlan(geomAttr, cast)) + val outExpr = + rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child + assert(outExpr.isInstanceOf[ST_Box2D]) + assert(outExpr.asInstanceOf[ST_Box2D].inputExpressions == Seq(geomAttr)) + assert(outExpr.dataType.isInstanceOf[Box2DUDT]) + } + + it("rewrites Cast(Box2D-typed expression, GeometryUDT) into ST_GeomFromBox2D") { + val boxAttr = AttributeReference("b", Box2DUDT, nullable = true)() + val cast = Cast(boxAttr, GeometryUDT()) + val rewritten = rule(projectExprPlan(boxAttr, cast)) + val outExpr = + rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child + assert(outExpr.isInstanceOf[ST_GeomFromBox2D]) + assert(outExpr.asInstanceOf[ST_GeomFromBox2D].inputExpressions == Seq(boxAttr)) + assert(outExpr.dataType.isInstanceOf[GeometryUDT]) + } + + it("leaves unrelated casts untouched") { + val geomAttr = AttributeReference("g", GeometryUDT(), nullable = true)() + val cast = Cast(Literal(1), LongType) + val rewritten = rule(projectExprPlan(geomAttr, cast)) + val outExpr = + rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child + assert(outExpr.isInstanceOf[Cast]) + } + } +} diff --git a/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index b56ed11c875..d92f55b29fa 100644 --- a/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -20,19 +20,19 @@ package org.apache.sedona.sql.parser import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Override the method to handle the geometry data type - * @param ctx - * @return + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL + * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { case "GEOMETRY" => GeometryUDT() + case "BOX2D" => Box2DUDT case _ => super.visitPrimitiveDataType(ctx) } } diff --git a/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index b56ed11c875..d92f55b29fa 100644 --- a/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -20,19 +20,19 @@ package org.apache.sedona.sql.parser import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Override the method to handle the geometry data type - * @param ctx - * @return + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL + * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { case "GEOMETRY" => GeometryUDT() + case "BOX2D" => Box2DUDT case _ => super.visitPrimitiveDataType(ctx) } } diff --git a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala new file mode 100644 index 00000000000..512924f05d5 --- /dev/null +++ b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} +class Box2DCastSuite extends TestBaseScala { + + describe("Geometry ↔ Box2D Catalyst cast") { + + it("SQL CAST(geom AS box2d) returns the planar bbox") { + val row = sparkSession + .sql( + "SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .collect() + .head + val box = row.getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("SQL CAST(box AS geometry) returns the rectangular polygon") { + val wkt = sparkSession + .sql( + "SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 2 0, 2 4, 0 4, 0 0))") + } + + it("DataFrame API .cast(Box2DUDT) rewrites to ST_Box2D") { + import sparkSession.implicits._ + val df = Seq("POINT (3 7)").toDF("wkt") + val out = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + val box = out.head.getAs[Box2D]("b") + assert(box == new Box2D(3.0, 7.0, 3.0, 7.0)) + } + + it("DataFrame API .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { + import sparkSession.implicits._ + val df = sparkSession.sql( + "SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(1.0, 1.0)) AS b") + val out = df + .select(col("b").cast(GeometryUDT()).alias("g")) + .selectExpr("ST_AsText(g) AS wkt") + .collect() + assert(out.head.getString(0) == "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))") + } + + it("Round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + val wkt = sparkSession + .sql( + "SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 5 0, 5 10, 0 10, 0 0))") + } + + it("CAST(NULL geometry AS box2d) returns null") { + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + } +} diff --git a/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index b56ed11c875..d92f55b29fa 100644 --- a/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -20,19 +20,19 @@ package org.apache.sedona.sql.parser import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Override the method to handle the geometry data type - * @param ctx - * @return + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL + * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { case "GEOMETRY" => GeometryUDT() + case "BOX2D" => Box2DUDT case _ => super.visitPrimitiveDataType(ctx) } } diff --git a/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index b56ed11c875..d92f55b29fa 100644 --- a/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -20,19 +20,19 @@ package org.apache.sedona.sql.parser import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Override the method to handle the geometry data type - * @param ctx - * @return + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL + * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { case "GEOMETRY" => GeometryUDT() + case "BOX2D" => Box2DUDT case _ => super.visitPrimitiveDataType(ctx) } } From 20857badad7ee98cad7b30dc0ffdd3441f9bb6b1 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 12 May 2026 23:53:54 -0700 Subject: [PATCH 2/4] Apply spotless formatting --- .../sedona/sql/parser/SedonaSqlAstBuilder.scala | 4 ++-- .../sedona/sql/parser/SedonaSqlAstBuilder.scala | 4 ++-- .../org/apache/sedona/sql/Box2DCastSuite.scala | 13 +++++-------- .../sedona/sql/parser/SedonaSqlAstBuilder.scala | 4 ++-- .../sedona/sql/parser/SedonaSqlAstBuilder.scala | 4 ++-- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index d92f55b29fa..eaabd227713 100644 --- a/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -26,8 +26,8 @@ import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL - * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS + * geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { diff --git a/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index d92f55b29fa..eaabd227713 100644 --- a/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -26,8 +26,8 @@ import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL - * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS + * geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { diff --git a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index 512924f05d5..4abaf7fa664 100644 --- a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -27,8 +27,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(geom AS box2d) returns the planar bbox") { val row = sparkSession - .sql( - "SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") .collect() .head val box = row.getAs[Box2D]("b") @@ -37,8 +36,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(box AS geometry) returns the rectangular polygon") { val wkt = sparkSession - .sql( - "SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") .collect() .head .getString(0) @@ -58,8 +56,8 @@ class Box2DCastSuite extends TestBaseScala { it("DataFrame API .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { import sparkSession.implicits._ - val df = sparkSession.sql( - "SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(1.0, 1.0)) AS b") + val df = + sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(1.0, 1.0)) AS b") val out = df .select(col("b").cast(GeometryUDT()).alias("g")) .selectExpr("ST_AsText(g) AS wkt") @@ -69,8 +67,7 @@ class Box2DCastSuite extends TestBaseScala { it("Round-trip Geometry → Box2D → Geometry yields the envelope polygon") { val wkt = sparkSession - .sql( - "SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") + .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") .collect() .head .getString(0) diff --git a/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index d92f55b29fa..eaabd227713 100644 --- a/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-4.0/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -26,8 +26,8 @@ import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL - * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS + * geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { diff --git a/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala b/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala index d92f55b29fa..eaabd227713 100644 --- a/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala +++ b/spark/spark-4.1/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala @@ -26,8 +26,8 @@ import org.apache.spark.sql.types.DataType class SedonaSqlAstBuilder extends SparkSqlAstBuilder { /** - * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL - * `CAST(... AS geometry)` / `CAST(... AS box2d)` parse to the matching UDT. + * Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS + * geometry)` / `CAST(... AS box2d)` parse to the matching UDT. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = { ctx.getText.toUpperCase() match { From 0f40fcd0ebbe6e681c636d377d9778f0611d06df Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 13 May 2026 14:56:57 -0700 Subject: [PATCH 3/4] Replicate cast tests across Spark versions; gate SQL tests on parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups on the cast suite: - The SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` syntax requires Sedona's `SedonaSqlAstBuilder` to be the active parser. The test base randomizes `spark.sedona.enableParserExtension` across CI runs, so the SQL-level tests now `assume(parserExtensionEnabled)` and are skipped (rather than failed) when the stock Spark parser is in effect. The DataFrame `.cast(...)` tests run unconditionally because the resolution rule is always injected. - The polygon vertex order produced by `ST_GeomFromBox2D` walks the envelope as (xmin,ymin) → (xmin,ymax) → (xmax,ymax) → (xmax,ymin); the expected WKTs are corrected to match. - The suite is now replicated across spark-3.4 / 3.5 / 4.0 / 4.1 so each supported Spark version exercises the SQL + DataFrame surfaces of the geometry↔Box2D cast. --- .../apache/sedona/sql/Box2DCastSuite.scala | 134 ++++++++++++++++++ .../apache/sedona/sql/Box2DCastSuite.scala | 96 +++++++++---- .../apache/sedona/sql/Box2DCastSuite.scala | 134 ++++++++++++++++++ .../apache/sedona/sql/Box2DCastSuite.scala | 134 ++++++++++++++++++ 4 files changed, 474 insertions(+), 24 deletions(-) create mode 100644 spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala create mode 100644 spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala create mode 100644 spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala diff --git a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala new file mode 100644 index 00000000000..c54134abc42 --- /dev/null +++ b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} + +class Box2DCastSuite extends TestBaseScala { + + /** + * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's + * `SedonaSqlAstBuilder` to be active. The test base randomizes + * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on + * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is + * always injected. + */ + private def parserExtensionEnabled: Boolean = + sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + + describe("Geometry ↔ Box2D Catalyst cast") { + + it("DataFrame .cast(Box2DUDT) rewrites to ST_Box2D") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 10 20)").toDF("wkt") + val box = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("DataFrame .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { + val df = + sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS b") + val wkt = df + .select(col("b").cast(GeometryUDT()).alias("g")) + .selectExpr("ST_AsText(g) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("DataFrame round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 5 10)").toDF("wkt") + val wkt = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).cast(GeometryUDT()).alias("env")) + .selectExpr("ST_AsText(env) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("DataFrame .cast(Box2DUDT) on NULL geometry returns null") { + val box = sparkSession + .sql("SELECT ST_GeomFromText(NULL) AS g") + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + + it("SQL CAST(geom AS box2d) returns the planar bbox") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("SQL CAST(box AS geometry) returns the rectangular polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("SQL CAST(NULL geometry AS box2d) returns null") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + } +} diff --git a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index 4abaf7fa664..c54134abc42 100644 --- a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -21,60 +21,108 @@ package org.apache.sedona.sql import org.apache.sedona.common.geometryObjects.Box2D import org.apache.spark.sql.functions.{col, expr} import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} + class Box2DCastSuite extends TestBaseScala { + /** + * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's + * `SedonaSqlAstBuilder` to be active. The test base randomizes + * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on + * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is + * always injected. + */ + private def parserExtensionEnabled: Boolean = + sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + describe("Geometry ↔ Box2D Catalyst cast") { - it("SQL CAST(geom AS box2d) returns the planar bbox") { - val row = sparkSession - .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + it("DataFrame .cast(Box2DUDT) rewrites to ST_Box2D") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 10 20)").toDF("wkt") + val box = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).alias("b")) .collect() .head - val box = row.getAs[Box2D]("b") + .getAs[Box2D]("b") assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) } - it("SQL CAST(box AS geometry) returns the rectangular polygon") { - val wkt = sparkSession - .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + it("DataFrame .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { + val df = + sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS b") + val wkt = df + .select(col("b").cast(GeometryUDT()).alias("g")) + .selectExpr("ST_AsText(g) AS wkt") .collect() .head .getString(0) - assert(wkt == "POLYGON ((0 0, 2 0, 2 4, 0 4, 0 0))") + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") } - it("DataFrame API .cast(Box2DUDT) rewrites to ST_Box2D") { + it("DataFrame round-trip Geometry → Box2D → Geometry yields the envelope polygon") { import sparkSession.implicits._ - val df = Seq("POINT (3 7)").toDF("wkt") - val out = df + val df = Seq("LINESTRING (0 0, 5 10)").toDF("wkt") + val wkt = df .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).cast(GeometryUDT()).alias("env")) + .selectExpr("ST_AsText(env) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("DataFrame .cast(Box2DUDT) on NULL geometry returns null") { + val box = sparkSession + .sql("SELECT ST_GeomFromText(NULL) AS g") .select(col("g").cast(Box2DUDT).alias("b")) .collect() - val box = out.head.getAs[Box2D]("b") - assert(box == new Box2D(3.0, 7.0, 3.0, 7.0)) + .head + .getAs[Box2D]("b") + assert(box == null) } - it("DataFrame API .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { - import sparkSession.implicits._ - val df = - sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(1.0, 1.0)) AS b") - val out = df - .select(col("b").cast(GeometryUDT()).alias("g")) - .selectExpr("ST_AsText(g) AS wkt") + it("SQL CAST(geom AS box2d) returns the planar bbox") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("SQL CAST(box AS geometry) returns the rectangular polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") .collect() - assert(out.head.getString(0) == "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))") + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") } - it("Round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") .collect() .head .getString(0) - assert(wkt == "POLYGON ((0 0, 5 0, 5 10, 0 10, 0 0))") + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") } - it("CAST(NULL geometry AS box2d) returns null") { + it("SQL CAST(NULL geometry AS box2d) returns null") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") .collect() diff --git a/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala new file mode 100644 index 00000000000..c54134abc42 --- /dev/null +++ b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} + +class Box2DCastSuite extends TestBaseScala { + + /** + * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's + * `SedonaSqlAstBuilder` to be active. The test base randomizes + * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on + * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is + * always injected. + */ + private def parserExtensionEnabled: Boolean = + sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + + describe("Geometry ↔ Box2D Catalyst cast") { + + it("DataFrame .cast(Box2DUDT) rewrites to ST_Box2D") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 10 20)").toDF("wkt") + val box = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("DataFrame .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { + val df = + sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS b") + val wkt = df + .select(col("b").cast(GeometryUDT()).alias("g")) + .selectExpr("ST_AsText(g) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("DataFrame round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 5 10)").toDF("wkt") + val wkt = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).cast(GeometryUDT()).alias("env")) + .selectExpr("ST_AsText(env) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("DataFrame .cast(Box2DUDT) on NULL geometry returns null") { + val box = sparkSession + .sql("SELECT ST_GeomFromText(NULL) AS g") + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + + it("SQL CAST(geom AS box2d) returns the planar bbox") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("SQL CAST(box AS geometry) returns the rectangular polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("SQL CAST(NULL geometry AS box2d) returns null") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + } +} diff --git a/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala new file mode 100644 index 00000000000..c54134abc42 --- /dev/null +++ b/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT} + +class Box2DCastSuite extends TestBaseScala { + + /** + * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's + * `SedonaSqlAstBuilder` to be active. The test base randomizes + * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on + * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is + * always injected. + */ + private def parserExtensionEnabled: Boolean = + sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + + describe("Geometry ↔ Box2D Catalyst cast") { + + it("DataFrame .cast(Box2DUDT) rewrites to ST_Box2D") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 10 20)").toDF("wkt") + val box = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("DataFrame .cast(GeometryUDT) rewrites to ST_GeomFromBox2D") { + val df = + sparkSession.sql("SELECT ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS b") + val wkt = df + .select(col("b").cast(GeometryUDT()).alias("g")) + .selectExpr("ST_AsText(g) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("DataFrame round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + import sparkSession.implicits._ + val df = Seq("LINESTRING (0 0, 5 10)").toDF("wkt") + val wkt = df + .select(expr("ST_GeomFromText(wkt)").alias("g")) + .select(col("g").cast(Box2DUDT).cast(GeometryUDT()).alias("env")) + .selectExpr("ST_AsText(env) AS wkt") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("DataFrame .cast(Box2DUDT) on NULL geometry returns null") { + val box = sparkSession + .sql("SELECT ST_GeomFromText(NULL) AS g") + .select(col("g").cast(Box2DUDT).alias("b")) + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + + it("SQL CAST(geom AS box2d) returns the planar bbox") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == new Box2D(0.0, 0.0, 10.0, 20.0)) + } + + it("SQL CAST(box AS geometry) returns the rectangular polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 4, 2 4, 2 0, 0 0))") + } + + it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") + val wkt = sparkSession + .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") + .collect() + .head + .getString(0) + assert(wkt == "POLYGON ((0 0, 0 10, 5 10, 5 0, 0 0))") + } + + it("SQL CAST(NULL geometry AS box2d) returns null") { + assume( + parserExtensionEnabled, + "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") + val box = sparkSession + .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") + .collect() + .head + .getAs[Box2D]("b") + assert(box == null) + } + } +} From 27bf2812f1885065546f1faa7e82a1a8b99c5d38 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 13 May 2026 15:28:10 -0700 Subject: [PATCH 4/4] Probe parser support directly when gating SQL CAST tests The previous gate read `spark.sedona.enableParserExtension` from `sparkSession.conf`, but `SparkContext` is JVM-singleton, so the session-level config in this suite can differ from the value `SedonaSqlExtensions` actually saw when it decided whether to inject `SedonaSqlAstBuilder`. CI repeatedly hit a mismatch where the assume returned true while the active parser was Spark's stock one, causing the SQL CAST tests to fail with ParseException. Switch to a behavioral probe: try to parse a tiny `CAST(... AS box2d)` SELECT once and cache the outcome. This matches what the SQL tests actually depend on regardless of which config layer holds the truth. DataFrame `.cast(...)` tests remain unconditional. Verified locally for spark-3.4 / 3.5 across both parser states (succeeded 8, canceled 0) and (succeeded 4, canceled 4). --- .../apache/sedona/sql/Box2DCastSuite.scala | 28 +++++++++++++------ .../apache/sedona/sql/Box2DCastSuite.scala | 28 +++++++++++++------ .../apache/sedona/sql/Box2DCastSuite.scala | 28 +++++++++++++------ .../apache/sedona/sql/Box2DCastSuite.scala | 28 +++++++++++++------ 4 files changed, 76 insertions(+), 36 deletions(-) diff --git a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index c54134abc42..d9a62dabbe5 100644 --- a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -27,12 +27,22 @@ class Box2DCastSuite extends TestBaseScala { /** * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's * `SedonaSqlAstBuilder` to be active. The test base randomizes - * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on - * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is - * always injected. + * `spark.sedona.enableParserExtension` across CI runs, and `SparkContext` is JVM-singleton so + * the active value can differ from this suite's session-level config. Probe directly by parsing + * a tiny CAST: this matches the behavior the SQL tests actually depend on, and caches the + * answer for the rest of the suite. DataFrame `.cast(...)` tests run unconditionally because + * the resolution rule is always injected. */ - private def parserExtensionEnabled: Boolean = - sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + private lazy val sqlCastSupported: Boolean = { + try { + sparkSession + .sql("SELECT CAST(ST_GeomFromText('POINT (0 0)') AS box2d) AS b") + .collect() + true + } catch { + case _: org.apache.spark.sql.catalyst.parser.ParseException => false + } + } describe("Geometry ↔ Box2D Catalyst cast") { @@ -85,7 +95,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(geom AS box2d) returns the planar bbox") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") @@ -97,7 +107,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(box AS geometry) returns the rectangular polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") @@ -109,7 +119,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") @@ -121,7 +131,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(NULL geometry AS box2d) returns null") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") diff --git a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index c54134abc42..d9a62dabbe5 100644 --- a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -27,12 +27,22 @@ class Box2DCastSuite extends TestBaseScala { /** * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's * `SedonaSqlAstBuilder` to be active. The test base randomizes - * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on - * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is - * always injected. + * `spark.sedona.enableParserExtension` across CI runs, and `SparkContext` is JVM-singleton so + * the active value can differ from this suite's session-level config. Probe directly by parsing + * a tiny CAST: this matches the behavior the SQL tests actually depend on, and caches the + * answer for the rest of the suite. DataFrame `.cast(...)` tests run unconditionally because + * the resolution rule is always injected. */ - private def parserExtensionEnabled: Boolean = - sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + private lazy val sqlCastSupported: Boolean = { + try { + sparkSession + .sql("SELECT CAST(ST_GeomFromText('POINT (0 0)') AS box2d) AS b") + .collect() + true + } catch { + case _: org.apache.spark.sql.catalyst.parser.ParseException => false + } + } describe("Geometry ↔ Box2D Catalyst cast") { @@ -85,7 +95,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(geom AS box2d) returns the planar bbox") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") @@ -97,7 +107,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(box AS geometry) returns the rectangular polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") @@ -109,7 +119,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") @@ -121,7 +131,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(NULL geometry AS box2d) returns null") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") diff --git a/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index c54134abc42..d9a62dabbe5 100644 --- a/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -27,12 +27,22 @@ class Box2DCastSuite extends TestBaseScala { /** * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's * `SedonaSqlAstBuilder` to be active. The test base randomizes - * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on - * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is - * always injected. + * `spark.sedona.enableParserExtension` across CI runs, and `SparkContext` is JVM-singleton so + * the active value can differ from this suite's session-level config. Probe directly by parsing + * a tiny CAST: this matches the behavior the SQL tests actually depend on, and caches the + * answer for the rest of the suite. DataFrame `.cast(...)` tests run unconditionally because + * the resolution rule is always injected. */ - private def parserExtensionEnabled: Boolean = - sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + private lazy val sqlCastSupported: Boolean = { + try { + sparkSession + .sql("SELECT CAST(ST_GeomFromText('POINT (0 0)') AS box2d) AS b") + .collect() + true + } catch { + case _: org.apache.spark.sql.catalyst.parser.ParseException => false + } + } describe("Geometry ↔ Box2D Catalyst cast") { @@ -85,7 +95,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(geom AS box2d) returns the planar bbox") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") @@ -97,7 +107,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(box AS geometry) returns the rectangular polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") @@ -109,7 +119,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") @@ -121,7 +131,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(NULL geometry AS box2d) returns null") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b") diff --git a/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala b/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala index c54134abc42..d9a62dabbe5 100644 --- a/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala +++ b/spark/spark-4.1/src/test/scala/org/apache/sedona/sql/Box2DCastSuite.scala @@ -27,12 +27,22 @@ class Box2DCastSuite extends TestBaseScala { /** * SQL `CAST(... AS box2d)` / `CAST(... AS geometry)` parsing requires Sedona's * `SedonaSqlAstBuilder` to be active. The test base randomizes - * `spark.sedona.enableParserExtension` across CI runs, so SQL-level CAST tests are gated on - * that flag. DataFrame `.cast(...)` tests run unconditionally because the resolution rule is - * always injected. + * `spark.sedona.enableParserExtension` across CI runs, and `SparkContext` is JVM-singleton so + * the active value can differ from this suite's session-level config. Probe directly by parsing + * a tiny CAST: this matches the behavior the SQL tests actually depend on, and caches the + * answer for the rest of the suite. DataFrame `.cast(...)` tests run unconditionally because + * the resolution rule is always injected. */ - private def parserExtensionEnabled: Boolean = - sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean + private lazy val sqlCastSupported: Boolean = { + try { + sparkSession + .sql("SELECT CAST(ST_GeomFromText('POINT (0 0)') AS box2d) AS b") + .collect() + true + } catch { + case _: org.apache.spark.sql.catalyst.parser.ParseException => false + } + } describe("Geometry ↔ Box2D Catalyst cast") { @@ -85,7 +95,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(geom AS box2d) returns the planar bbox") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 10 20)') AS box2d) AS b") @@ -97,7 +107,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(box AS geometry) returns the rectangular polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS geometry)` syntax") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(ST_MakeBox2D(ST_Point(0.0, 0.0), ST_Point(2.0, 4.0)) AS geometry)) AS w") @@ -109,7 +119,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL round-trip Geometry → Box2D → Geometry yields the envelope polygon") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS ...)` between UDTs") val wkt = sparkSession .sql("SELECT ST_AsText(CAST(CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box2d) AS geometry)) AS w") @@ -121,7 +131,7 @@ class Box2DCastSuite extends TestBaseScala { it("SQL CAST(NULL geometry AS box2d) returns null") { assume( - parserExtensionEnabled, + sqlCastSupported, "Sedona SQL parser extension is required for `CAST(... AS box2d)` syntax") val box = sparkSession .sql("SELECT CAST(ST_GeomFromText(NULL) AS box2d) AS b")