diff --git a/packages/bigframes/bigframes/bigquery/_operations/geo.py b/packages/bigframes/bigframes/bigquery/_operations/geo.py index cd1d22b16489..e9ea711c9690 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/geo.py +++ b/packages/bigframes/bigframes/bigquery/_operations/geo.py @@ -99,7 +99,7 @@ def st_area( bigframes.pandas.Series: Series of float representing the areas. """ - series = series._apply_unary_op(ops.geo_area_op) + series = series._apply_nary_op(ops.googlesql.ST_AREA, []) series.name = None return series @@ -223,7 +223,7 @@ def st_centroid( bigframes.pandas.Series: A series of geography objects representing the centroids. """ - series = series._apply_unary_op(ops.geo_st_centroid_op) + series = series._apply_nary_op(ops.googlesql.ST_CENTROID, []) series.name = None return series @@ -753,6 +753,4 @@ def st_simplify( Returns: a Series containing the simplified GEOGRAPHY data. """ - return geography._apply_unary_op( - ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters) - ) + return geography._apply_nary_op(ops.googlesql.ST_SIMPLIFY, [tolerance_meters]) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 476d012bea4d..5e6a299f83f3 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -20,6 +20,7 @@ import bigframes.core.expression from bigframes import dtypes from bigframes import operations as ops +from bigframes.operations import googlesql def rand() -> bigframes.core.col.Expression: @@ -47,12 +48,9 @@ def rand() -> bigframes.core.col.Expression: :func:`~bigframes.pandas.DataFrame.assign` and other methods. See :func:`bigframes.pandas.col`. """ - op = ops.SqlScalarOp( - _output_type=dtypes.FLOAT_DTYPE, - sql_template="RAND()", - is_deterministic=False, + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(googlesql.RAND, ()) ) - return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py index d52a9e381b53..938759ae1814 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py @@ -30,6 +30,7 @@ import bigframes.core.nodes as nodes import bigframes.core.ordering as bf_ordering import bigframes.core.rewrite as rewrites +import bigframes.core.rewrite.schema_binding as schema_binding from bigframes import dtypes, operations from bigframes.core import bq_data, expression, pyarrow_utils from bigframes.core.logging import data_types as data_type_logger @@ -59,6 +60,11 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: if request.sort_rows: result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # Have to bind schema as the final step before compilation. + # Probably, should defer even further + result_node = typing.cast( + nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node) + ) sql = compile_result_node(result_node) return configs.CompileResult( sql, @@ -72,6 +78,11 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node)) result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # Have to bind schema as the final step before compilation. + # Probably, should defer even further + result_node = typing.cast( + nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node) + ) sql = compile_result_node(result_node) # Return the ordering iff no extra columns are needed to define the row order if ordering is not None: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py index 5cfc8237be2e..772752112a40 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -30,11 +30,6 @@ # Geo Ops -@register_unary_op(ops.geo_area_op) -def geo_area_op_impl(x: ibis_types.Value): - return cast(ibis_types.GeoSpatialValue, x).area() - - @register_unary_op(ops.geo_st_astext_op) def geo_st_astext_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).as_text() @@ -55,11 +50,6 @@ def geo_st_buffer_op_impl(x: ibis_types.Value, op: ops.GeoStBufferOp): ) -@register_unary_op(ops.geo_st_centroid_op, pass_op=False) -def geo_st_centroid_op_impl(x: ibis_types.Value): - return cast(ibis_types.GeoSpatialValue, x).centroid() - - @register_unary_op(ops.geo_st_convexhull_op, pass_op=False) def geo_st_convexhull_op_impl(x: ibis_types.Value): return st_convexhull(x) @@ -132,12 +122,6 @@ def geo_st_regionstats_op_impl( ).to_expr() -@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) -def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp): - x = cast(ibis_types.GeoSpatialValue, x) - return st_simplify(x, op.tolerance_meters) - - @register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).x() diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index 07e3110a968e..4108675bc11f 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -21,11 +21,13 @@ from typing import TYPE_CHECKING import bigframes_vendored.ibis +import bigframes_vendored.ibis.expr.operations.generic as ibis_generic import bigframes_vendored.ibis.expr.types as ibis_types import bigframes.core.compile.ibis_types import bigframes.core.expression as ex from bigframes.core import agg_expressions, ordering +from bigframes.operations import googlesql as gsql_ops from bigframes.operations import numeric_ops if TYPE_CHECKING: @@ -92,8 +94,20 @@ def _( self.compile_expression(sub_expr, bindings) for sub_expr in expression.inputs ] + if isinstance(expression.op, gsql_ops.GoogleSqlScalarOp): + return googlesql_scalar_op_impl( + *inputs, op=expression.op, output_type=expression.output_type + ) return self.compile_row_op(expression.op, inputs) + @compile_expression.register + def _( + self, + expression: ex.OmittedArg, + bindings: typing.Dict[str, ibis_types.Value], + ) -> ibis_types.Value: + return bigframes_vendored.ibis.omitted() + def compile_row_op( self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value] ) -> ibis_types.Value: @@ -278,3 +292,39 @@ def isnanornull(arg): @scalar_op_compiler.register_unary_op(numeric_ops.isfinite_op) def isfinite(arg): return arg.isinf().negate() & arg.isnan().negate() + + +def googlesql_scalar_op_impl( + *operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type +): + final_operands: list[ibis_types.Value] = [] + arg_templates = [] + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) + arg_spec = op.args[-1] + if isinstance(operand.op(), ibis_generic.OmittedArg): + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + + target_idx = len(final_operands) + final_operands.append(operand) + if arg_spec.arg_name: + arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") + else: + arg_templates.append(f"{{{target_idx}}}") + args_template = ", ".join(arg_templates) + sql_template = f"{op.sql_name}({args_template})" + return ibis_generic.SqlScalar( + sql_template, + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + output_type + ), + ).to_expr() diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py index e7b492c9fb92..d07cb2186fbc 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py @@ -90,9 +90,10 @@ def _(self, expr: agg_exprs.WindowExpression) -> sge.Expression: @compile_expression.register def _(self, expr: ex.OpExpression) -> sge.Expression: - # Non-recursively compiles the children scalar expressions. inputs = tuple( TypedExpr(self.compile_expression(sub_expr), sub_expr.output_type) + if not isinstance(sub_expr, ex.OmittedArg) + else TypedExpr(sge.Null(), None, is_omitted=True) for sub_expr in expr.inputs ) return self.compile_row_op(expr.op, inputs) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 637c18074eef..644e7bc365f7 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -82,6 +82,27 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.BitwiseNot(this=sge.paren(expr.expr)) +@register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) +def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression: + args: list[sge.Expression] = [] + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) + arg_spec = op.args[-1] + if operand.is_omitted: + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + elif arg_spec.arg_name: + args.append(sge.Kwarg(this=arg_spec.arg_name, expression=operand.expr)) + else: + args.append(operand.expr) + return sg.func(op.sql_name, *args) + + @register_nary_op(ops.SqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: return sg.parse_one( diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py index 1c9e559b8975..8c353988ae3e 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py @@ -24,11 +24,6 @@ register_binary_op = expression_compiler.expression_compiler.register_binary_op -@register_unary_op(ops.geo_area_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_AREA", expr.expr) - - @register_unary_op(ops.geo_st_astext_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_ASTEXT", expr.expr) @@ -50,11 +45,6 @@ def _(expr: TypedExpr, op: ops.GeoStBufferOp) -> sge.Expression: ) -@register_unary_op(ops.geo_st_centroid_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_CENTROID", expr.expr) - - @register_unary_op(ops.geo_st_convexhull_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_CONVEXHULL", expr.expr) @@ -97,15 +87,6 @@ def _( return sge.func("ST_REGIONSTATS", *args) -@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) -def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression: - return sge.func( - "ST_SIMPLIFY", - expr.expr, - sge.convert(op.tolerance_meters), - ) - - @register_unary_op(ops.geo_x_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_X", expr.expr) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py index 4623b8c9b43b..d8c38c2e7188 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py @@ -25,3 +25,6 @@ class TypedExpr: expr: sge.Expression dtype: dtypes.ExpressionType + + # kludge to support optional args in argument lists + is_omitted: bool = False diff --git a/packages/bigframes/bigframes/core/expression.py b/packages/bigframes/bigframes/core/expression.py index 3ad5a9308185..6c27dfc120b6 100644 --- a/packages/bigframes/bigframes/core/expression.py +++ b/packages/bigframes/bigframes/core/expression.py @@ -364,6 +364,56 @@ def output_type(self) -> dtypes.ExpressionType: return self.dtype +@dataclasses.dataclass(frozen=True) +class OmittedArg(Expression): + """Represents an omitted optional arg used calling a function.""" + + @property + def free_variables(self) -> typing.Tuple[Hashable, ...]: + return () + + @property + def is_const(self) -> bool: + return True + + @property + def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: + return () + + @property + def is_resolved(self): + return True # vacuously + + @property + def output_type(self) -> dtypes.ExpressionType: + return None + + def bind_refs( + self, + bindings: Mapping[ids.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> OmittedArg: + return self + + def bind_variables( + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, + ) -> Expression: + return self + + @property + def is_bijective(self) -> bool: + return True + + @property + def is_identity(self) -> bool: + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + return self + + @dataclasses.dataclass(frozen=True) class OpExpression(Expression): """An expression representing a scalar operation applied to 1 or more argument sub-expressions.""" diff --git a/packages/bigframes/bigframes/geopandas/geoseries.py b/packages/bigframes/bigframes/geopandas/geoseries.py index 1da9d00a6ce6..dc373216b650 100644 --- a/packages/bigframes/bigframes/geopandas/geoseries.py +++ b/packages/bigframes/bigframes/geopandas/geoseries.py @@ -107,7 +107,7 @@ def buffer(self: GeoSeries, distance: float) -> bigframes.series.Series: # type @property def centroid(self: GeoSeries) -> bigframes.series.Series: # type: ignore - return self._apply_unary_op(ops.geo_st_centroid_op) + return self._apply_nary_op(ops.googlesql.ST_CENTROID, []) @property def convex_hull(self: GeoSeries) -> bigframes.series.Series: # type: ignore diff --git a/packages/bigframes/bigframes/operations/__init__.py b/packages/bigframes/bigframes/operations/__init__.py index bcc21e18cce0..5e0a23005a24 100644 --- a/packages/bigframes/bigframes/operations/__init__.py +++ b/packages/bigframes/bigframes/operations/__init__.py @@ -115,10 +115,8 @@ GeoStLengthOp, GeoStRegionStatsOp, GeoStSimplifyOp, - geo_area_op, geo_st_astext_op, geo_st_boundary_op, - geo_st_centroid_op, geo_st_convexhull_op, geo_st_difference_op, geo_st_geogfromtext_op, @@ -231,6 +229,7 @@ timestamp_add_op, timestamp_sub_op, ) +from bigframes.operations.googlesql import GoogleSqlScalarOp __all__ = [ # Base ops @@ -413,9 +412,7 @@ "euclidean_distance_op", "manhattan_distance_op", # Geo ops - "geo_area_op", "geo_st_boundary_op", - "geo_st_centroid_op", "geo_st_convexhull_op", "geo_st_difference_op", "geo_st_astext_op", @@ -429,7 +426,6 @@ "GeoStDistanceOp", "GeoStLengthOp", "GeoStRegionStatsOp", - "GeoStSimplifyOp", # AI ops "AIClassify", "AIGenerate", @@ -446,4 +442,6 @@ "ToArrayOp", "ArrayReduceOp", "ArrayMapOp", + # GoogleSql + "GoogleSqlScalarOp", ] diff --git a/packages/bigframes/bigframes/operations/geo_ops.py b/packages/bigframes/bigframes/operations/geo_ops.py index 85344bc2fbe7..72f3ab48de5b 100644 --- a/packages/bigframes/bigframes/operations/geo_ops.py +++ b/packages/bigframes/bigframes/operations/geo_ops.py @@ -19,13 +19,6 @@ from bigframes import dtypes from bigframes.operations import base_ops -GeoAreaOp = base_ops.create_unary_op( - name="geo_area", - type_signature=op_typing.FixedOutputType( - dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" - ), -) -geo_area_op = GeoAreaOp() GeoStAstextOp = base_ops.create_unary_op( name="geo_st_astext", diff --git a/packages/bigframes/bigframes/operations/googlesql.py b/packages/bigframes/bigframes/operations/googlesql.py new file mode 100644 index 000000000000..0100784bda1d --- /dev/null +++ b/packages/bigframes/bigframes/operations/googlesql.py @@ -0,0 +1,109 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import annotations + +import dataclasses +import typing +from enum import Enum, auto +from typing import Callable, Iterable + +import bigframes.operations as ops +import bigframes.operations.type as op_typing +from bigframes import dtypes + + +@dataclasses.dataclass(frozen=True) +class ArgSpec: + arg_name: str | None = None + optional: bool = False + is_vararg: bool = False + const_only: bool = False + + +@dataclasses.dataclass(frozen=True) +class OpSignature: + # Detailed specs for each parameter. This is particularly relevant for ren + arg_specs: typing.Sequence[ArgSpec] + resolve_return_type: typing.Any + has_varargs: bool = False + + +# Eventually we should migrate every op over to this that can be directly emitted 1:1 as a sql op +# This will allow us to fully lower to pure SQL dialect expressions and emitting sql text is trivial. +@dataclasses.dataclass(frozen=True) +class GoogleSqlScalarOp(ops.NaryOp): + name: typing.ClassVar[str] = "googlesql_scalar" + + # syntax + sql_name: str + args: tuple[ArgSpec, ...] + # typing + signature: typing.Callable[..., dtypes.ExpressionType] + + # semantics + is_deterministic: bool = True + + @property + def deterministic(self) -> bool: + return self.is_deterministic + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return self.signature(*input_types) + + +RAND = GoogleSqlScalarOp( + "RAND", args=(), is_deterministic=False, signature=lambda: dtypes.FLOAT_DTYPE +) + + +def _check_geo_input( + t: dtypes.ExpressionType, out: dtypes.ExpressionType +) -> dtypes.ExpressionType: + if t is not None and not dtypes.is_geo_like(t): + raise TypeError(f"Type {t} is not supported. Type must be geo-like") + return out + + +def _check_simplify_inputs( + geo: dtypes.ExpressionType, tol: dtypes.ExpressionType +) -> dtypes.ExpressionType: + if geo is not None and not dtypes.is_geo_like(geo): + raise TypeError(f"Type {geo} is not supported. Type must be geo-like") + if tol is not None and not dtypes.is_numeric(tol): + raise TypeError(f"Type {tol} is not supported. Type must be numeric") + return dtypes.GEO_DTYPE + + +ST_AREA = GoogleSqlScalarOp( + "ST_AREA", + args=(ArgSpec(),), + is_deterministic=True, + signature=lambda geo: _check_geo_input(geo, dtypes.FLOAT_DTYPE), +) + +ST_CENTROID = GoogleSqlScalarOp( + "ST_CENTROID", + args=(ArgSpec(),), + is_deterministic=True, + signature=lambda geo: _check_geo_input(geo, dtypes.GEO_DTYPE), +) + +ST_SIMPLIFY = GoogleSqlScalarOp( + "ST_SIMPLIFY", + args=(ArgSpec(), ArgSpec()), + is_deterministic=True, + signature=_check_simplify_inputs, +) diff --git a/packages/bigframes/bigframes/operations/type.py b/packages/bigframes/bigframes/operations/type.py index b53e6cd41ede..0ddf3a113fc0 100644 --- a/packages/bigframes/bigframes/operations/type.py +++ b/packages/bigframes/bigframes/operations/type.py @@ -34,6 +34,9 @@ def as_method(self): """Convert the signature into an object method. Convenience function for constructing ops that use the signature.""" ... + def __call__(self, *args, **kwargs): + return self.as_method(*args, **kwargs) + class UnaryTypeSignature(TypeSignature): @abc.abstractmethod diff --git a/packages/bigframes/tests/system/small/engines/conftest.py b/packages/bigframes/tests/system/small/engines/conftest.py index cea505dd28a6..e930e98ef26c 100644 --- a/packages/bigframes/tests/system/small/engines/conftest.py +++ b/packages/bigframes/tests/system/small/engines/conftest.py @@ -44,21 +44,44 @@ def fake_session() -> Generator[bigframes.Session, None, None]: yield session +@pytest.fixture(scope="session") +def pyarrow_engine(): + return local_scan_executor.LocalScanExecutor() + + +@pytest.fixture(scope="session") +def polars_engine(): + return polars_executor.PolarsExecutor() + + +@pytest.fixture(scope="session") +def bq_engine(bigquery_client): + publisher = events.Publisher() + return direct_gbq_execution.DirectGbqExecutor( + bigquery_client, compiler="ibis", publisher=publisher + ) + + +@pytest.fixture(scope="session") +def sqlglot_engine(bigquery_client): + publisher = events.Publisher() + return direct_gbq_execution.DirectGbqExecutor( + bigquery_client, compiler="sqlglot", publisher=publisher + ) + + @pytest.fixture(scope="session", params=["pyarrow", "polars", "bq", "bq-sqlglot"]) -def engine(request, bigquery_client: bigquery.Client) -> semi_executor.SemiExecutor: +def engine( + request, pyarrow_engine, polars_engine, bq_engine, sqlglot_engine +) -> semi_executor.SemiExecutor: if request.param == "pyarrow": - return local_scan_executor.LocalScanExecutor() + return pyarrow_engine if request.param == "polars": - return polars_executor.PolarsExecutor() - publisher = events.Publisher() + return polars_engine if request.param == "bq": - return direct_gbq_execution.DirectGbqExecutor( - bigquery_client, publisher=publisher - ) + return bq_engine if request.param == "bq-sqlglot": - return direct_gbq_execution.DirectGbqExecutor( - bigquery_client, compiler="sqlglot", publisher=publisher - ) + return sqlglot_engine raise ValueError(f"Unrecognized param: {request.param}") diff --git a/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py new file mode 100644 index 000000000000..2ea7070bbd30 --- /dev/null +++ b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py @@ -0,0 +1,38 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import pytest + +import bigframes.dtypes +import bigframes.operations.googlesql as gsql_ops +from bigframes.core import array_value, expression +from bigframes.session import polars_executor +from bigframes.testing.engine_utils import assert_equivalence_execution + +polars = pytest.importorskip("polars") + +# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree. +REFERENCE_ENGINE = polars_executor.PolarsExecutor() + + +def test_engines_googlesql_st_area( + scalars_array_value: array_value.ArrayValue, bq_engine, sqlglot_engine +): + expr = gsql_ops.ST_AREA.as_expr("geography_col") + + arr, _ = scalars_array_value.compute_values([expr]) + + assert_equivalence_execution(arr.node, bq_engine, sqlglot_engine) diff --git a/packages/bigframes/tests/unit/bigquery/test_mathematical.py b/packages/bigframes/tests/unit/bigquery/test_mathematical.py index a39aeb103c04..f0cb16ae145f 100644 --- a/packages/bigframes/tests/unit/bigquery/test_mathematical.py +++ b/packages/bigframes/tests/unit/bigquery/test_mathematical.py @@ -26,8 +26,8 @@ def test_rand_returns_expression(): node = expr._value assert isinstance(node, ex.OpExpression) op = node.op - assert isinstance(op, ops.SqlScalarOp) - assert op.sql_template == "RAND()" - assert op._output_type == dtypes.FLOAT_DTYPE + assert isinstance(op, ops.GoogleSqlScalarOp) + assert op.sql_name == "RAND" + assert op.output_type() == dtypes.FLOAT_DTYPE assert not op.is_deterministic assert len(node.inputs) == 0 diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py index 2d9373967c52..85e374c76dbd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -21,16 +21,6 @@ pytest.importorskip("pytest_snapshot") -def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] @@ -61,16 +51,6 @@ def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") -def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql index 1c146e1e1be5..177cb5292b3f 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql @@ -1,7 +1,7 @@ WITH `bfcte_0` AS ( SELECT * - FROM UNNEST(ARRAY>[STRUCT('POINT(1 1)', 0)]) + FROM UNNEST(ARRAY>[STRUCT(ST_GEOGFROMTEXT('LINESTRING(0 0, 1 1, 2 0)'), 0)]) ) SELECT ST_SIMPLIFY(`bfcol_0`, 123.125) AS `0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py index 50de1488e6c6..4aad2dfa3153 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py @@ -13,6 +13,7 @@ # limitations under the License. import pytest +from shapely.geometry import LineString # type: ignore import bigframes.bigquery as bbq import bigframes.geopandas as gpd @@ -44,7 +45,9 @@ def test_st_regionstats_without_optional_args(compiler_session, snapshot): def test_st_simplify(compiler_session, snapshot): - geos = gpd.GeoSeries(["POINT(1 1)"], session=compiler_session) + geos = gpd.GeoSeries( + [LineString([(0, 0), (1, 1), (2, 0)])], session=compiler_session + ) result = bbq.st_simplify( geos, tolerance_meters=123.125, diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py index af85e937d561..953ecb2979fa 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py @@ -2467,3 +2467,7 @@ def least(*args: Any) -> ir.Value: └────────────┘ """ return ops.Least(args).to_expr() + + +def omitted() -> ir.Value: + return ops.Omitted().to_expr() diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py index 5ad1c885a4a2..ad0bd095b6c7 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py @@ -136,6 +136,10 @@ def to_expr(self): return getattr(ir, typename)(self) + @property + def omitted(self) -> bool: + return False + # convenience aliases Scalar = Value[T, ds.Scalar] diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py index 3933a70a9630..cc0caf21b2e1 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -188,6 +188,11 @@ class Impure(Value): pass +@public +class OmittedArg(Value): + pass + + @public class TimestampNow(Constant): """Return the current timestamp."""