diff --git a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/geoparquet/GeoParquetMetaData.scala b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/geoparquet/GeoParquetMetaData.scala index 7517b8e5fc4..4068b3f69a8 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/geoparquet/GeoParquetMetaData.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/geoparquet/GeoParquetMetaData.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.geoparquet import scala.util.control.NonFatal +import org.apache.spark.sql.sedona_sql.UDT.Box2DUDT import org.apache.spark.sql.types.{DoubleType, FloatType, StructType} import org.datasyslab.proj4sedona.core.Proj import org.datasyslab.proj4sedona.parser.CRSSerializer @@ -236,6 +237,18 @@ object GeoParquetMetaData { schema(coveringColumnIndex).dataType match { case coveringColumnType: StructType => coveringColumnTypeToCovering(coveringColumnName, coveringColumnType) + case udt: Box2DUDT => + // Box2DUDT exposes a struct sqlType, which is the exact + // shape required by GeoParquet 1.1 bbox covering columns. Treat the underlying struct as + // the covering struct so users can write a Box2D column and have it referenced as a + // covering column in GeoParquet metadata without any manual struct construction. + udt.sqlType match { + case structType: StructType => + coveringColumnTypeToCovering(coveringColumnName, structType) + case other => + throw new IllegalStateException( + s"Box2DUDT.sqlType is expected to be a StructType, got $other") + } case _ => throw new IllegalArgumentException( s"Covering column $coveringColumnName is not a struct type") diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala index 5d2a13f4867..2924292cd5f 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala @@ -1021,6 +1021,53 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll { } } + it("GeoParquet supports writing covering metadata from a Box2D column") { + // User-provided Box2D column referenced via the geoparquet.covering option. + val df = sparkSession + .range(0, 100) + .toDF("id") + .withColumn("id", expr("CAST(id AS DOUBLE)")) + .withColumn("geometry", expr("ST_Point(id, id + 1)")) + .withColumn("test_cov", expr("ST_Box2D(geometry)")) + val geoParquetSavePath = geoparquetoutputlocation + "/gp_with_box2d_covering.parquet" + df.write + .format("geoparquet") + .option("geoparquet.covering.geometry", "test_cov") + .mode("overwrite") + .save(geoParquetSavePath) + validateGeoParquetMetadata(geoParquetSavePath) { geo => + implicit val formats: org.json4s.Formats = org.json4s.DefaultFormats + val coveringJsValue = geo \ "columns" \ "geometry" \ "covering" + val covering = coveringJsValue.extract[Covering] + assert(covering.bbox.xmin == Seq("test_cov", "xmin")) + assert(covering.bbox.ymin == Seq("test_cov", "ymin")) + assert(covering.bbox.xmax == Seq("test_cov", "xmax")) + assert(covering.bbox.ymax == Seq("test_cov", "ymax")) + } + } + + it("GeoParquet auto populates covering metadata for a Box2D _bbox column") { + // Auto-detect path: when a column named _bbox is a Box2D, reuse it as the + // covering column instead of synthesizing a separate float64 struct. + val df = sparkSession + .range(0, 100) + .toDF("id") + .withColumn("id", expr("CAST(id AS DOUBLE)")) + .withColumn("geometry", expr("ST_Point(id, id + 1)")) + .withColumn("geometry_bbox", expr("ST_Box2D(geometry)")) + val geoParquetSavePath = geoparquetoutputlocation + "/gp_box2d_auto_covering.parquet" + df.write.format("geoparquet").mode("overwrite").save(geoParquetSavePath) + validateGeoParquetMetadata(geoParquetSavePath) { geo => + implicit val formats: org.json4s.Formats = org.json4s.DefaultFormats + val coveringJsValue = geo \ "columns" \ "geometry" \ "covering" + val covering = coveringJsValue.extract[Covering] + assert(covering.bbox.xmin == Seq("geometry_bbox", "xmin")) + assert(covering.bbox.ymin == Seq("geometry_bbox", "ymin")) + assert(covering.bbox.xmax == Seq("geometry_bbox", "xmax")) + assert(covering.bbox.ymax == Seq("geometry_bbox", "ymax")) + } + } + it("GeoParquet auto populates covering metadata for single geometry column") { val df = sparkSession .range(0, 100)