From 10baeafd02f3cdc70f1e2253f9f7b23d1e94d2d0 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Mon, 11 May 2026 22:30:46 +0000 Subject: [PATCH 01/11] refactor(bigframes): Introduce GoogleSqlScalarOp --- .../bigframes/bigquery/_operations/geo.py | 8 +- .../bigquery/_operations/mathematical.py | 8 +- .../ibis_compiler/scalar_op_compiler.py | 8 ++ .../ibis_compiler/scalar_op_registry.py | 34 ++++++ .../compile/sqlglot/expression_compiler.py | 3 +- .../sqlglot/expressions/generic_ops.py | 30 +++++ .../compile/sqlglot/expressions/typed_expr.py | 3 + .../bigframes/bigframes/core/expression.py | 49 ++++++++ .../bigframes/operations/__init__.py | 3 + .../bigframes/bigframes/operations/ai_ops.py | 21 ++-- .../bigframes/operations/googlesql.py | 110 ++++++++++++++++++ .../bigframes/bigframes/operations/type.py | 2 + .../tests/unit/bigquery/test_mathematical.py | 6 +- .../bigframes_vendored/ibis/expr/api.py | 4 + .../ibis/expr/operations/core.py | 4 + .../ibis/expr/operations/generic.py | 6 + 16 files changed, 274 insertions(+), 25 deletions(-) create mode 100644 packages/bigframes/bigframes/operations/googlesql.py diff --git a/packages/bigframes/bigframes/bigquery/_operations/geo.py b/packages/bigframes/bigframes/bigquery/_operations/geo.py index cd1d22b16489..e9ea711c9690 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/geo.py +++ b/packages/bigframes/bigframes/bigquery/_operations/geo.py @@ -99,7 +99,7 @@ def st_area( bigframes.pandas.Series: Series of float representing the areas. """ - series = series._apply_unary_op(ops.geo_area_op) + series = series._apply_nary_op(ops.googlesql.ST_AREA, []) series.name = None return series @@ -223,7 +223,7 @@ def st_centroid( bigframes.pandas.Series: A series of geography objects representing the centroids. """ - series = series._apply_unary_op(ops.geo_st_centroid_op) + series = series._apply_nary_op(ops.googlesql.ST_CENTROID, []) series.name = None return series @@ -753,6 +753,4 @@ def st_simplify( Returns: a Series containing the simplified GEOGRAPHY data. """ - return geography._apply_unary_op( - ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters) - ) + return geography._apply_nary_op(ops.googlesql.ST_SIMPLIFY, [tolerance_meters]) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 476d012bea4d..9d19f0b337e6 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -20,6 +20,7 @@ import bigframes.core.expression from bigframes import dtypes from bigframes import operations as ops +from bigframes.operations import googlesql def rand() -> bigframes.core.col.Expression: @@ -47,12 +48,7 @@ def rand() -> bigframes.core.col.Expression: :func:`~bigframes.pandas.DataFrame.assign` and other methods. See :func:`bigframes.pandas.col`. """ - op = ops.SqlScalarOp( - _output_type=dtypes.FLOAT_DTYPE, - sql_template="RAND()", - is_deterministic=False, - ) - return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) + return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(googlesql.RAND, ())) def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index 07e3110a968e..0286b1913fda 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -94,6 +94,14 @@ def _( ] return self.compile_row_op(expression.op, inputs) + @compile_expression.register + def _( + self, + expression: ex.Omitted, + bindings: typing.Dict[str, ibis_types.Value], + ) -> ibis_types.Value: + return bigframes_vendored.ibis.omitted() + def compile_row_op( self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value] ) -> ibis_types.Value: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 03ec72f4e44b..2a8b59092412 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -31,6 +31,7 @@ import bigframes.core.compile.ibis_compiler.default_ordering import bigframes.core.compile.ibis_types import bigframes.operations as ops +from bigframes.operations.googlesql import CallingConvention from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS from bigframes.core.compile.ibis_compiler.scalar_op_compiler import ( scalar_op_compiler, # TODO(tswast): avoid import of variables @@ -1890,6 +1891,39 @@ def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: return case_val.end() # type: ignore +@scalar_op_compiler.register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) +def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp): + arg_templates = [] + if op.calling_convention == CallingConvention.FUNCTION: + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + arg_spec = op.args[-1] + if operand.omitted: + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + elif arg_spec.arg_name: + arg_templates.add(f"{arg_spec.arg_name} => {{{i}}}") + else: + arg_templates.add(f"{{{i}}}") + args_template = ", ".join(arg_templates) + return f"{op.sql_name}({args_template})" + elif op.calling_convention == CallingConvention.PREFIX: + assert len(operands) == 1, "prefix op expects exactly 1 arg" + return f"{op.sql_name} {{0}}" + elif op.calling_convention == CallingConvention.INFIX: + assert len(operands) == 2, 'infix op expects exactly 2 args' + return f"{{0}} {op.sql_name} {{1}}" + return ibis_generic.SqlScalar( + op.sql_template, + values=tuple(typing.cast(ibis_generic.Value, expr.op()) for expr in operands if not expr.omitted), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() + @scalar_op_compiler.register_nary_op(ops.SqlScalarOp, pass_op=True) def sql_scalar_op_impl(*operands: ibis_types.Value, op: ops.SqlScalarOp): return ibis_generic.SqlScalar( diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py index e7b492c9fb92..7bba5cbce2b1 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py @@ -90,9 +90,10 @@ def _(self, expr: agg_exprs.WindowExpression) -> sge.Expression: @compile_expression.register def _(self, expr: ex.OpExpression) -> sge.Expression: - # Non-recursively compiles the children scalar expressions. inputs = tuple( TypedExpr(self.compile_expression(sub_expr), sub_expr.output_type) + if not isinstance(sub_expr, ex.Omitted) + else TypedExpr(sge.Null, None, is_omitted=True) for sub_expr in expr.inputs ) return self.compile_row_op(expr.op, inputs) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 637c18074eef..ac3c8951f81f 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -20,6 +20,7 @@ import bigframes.core.compile.sqlglot.expression_compiler as expression_compiler from bigframes import dtypes from bigframes import operations as ops +from bigframes.operations.googlesql import CallingConvention from bigframes.core.compile.sqlglot import sql, sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr @@ -82,6 +83,35 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.BitwiseNot(this=sge.paren(expr.expr)) +@register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) +def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression: + arg_templates = [] + if op.calling_convention == CallingConvention.FUNCTION: + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + arg_spec = op.args[-1] + if operand.is_omitted: + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + elif arg_spec.arg_name: + arg_templates.append(f"{arg_spec.arg_name} => {operand.expr.sql(dialect='bigquery')}") + else: + arg_templates.append(operand.expr.sql(dialect='bigquery')) + args_template = ", ".join(arg_templates) + return sg.parse_one(f"{op.sql_name}({args_template})", dialect="bigquery") + elif op.calling_convention == CallingConvention.PREFIX: + assert len(operands) == 1, "prefix op expects exactly 1 arg" + return sg.parse_one(f"{op.sql_name} {operands[0].expr.sql(dialect='bigquery')}", dialect="bigquery") + elif op.calling_convention == CallingConvention.INFIX: + assert len(operands) == 2, 'infix op expects exactly 2 args' + return sg.parse_one(f"{operands[0].expr.sql(dialect='bigquery')} {op.sql_name} {operands[1].expr.sql(dialect='bigquery')}", dialect="bigquery") + + raise NotImplementedError(f"Calling convention {op.calling_convention} not supported for {op}") + + @register_nary_op(ops.SqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: return sg.parse_one( diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py index 4623b8c9b43b..d8c38c2e7188 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/typed_expr.py @@ -25,3 +25,6 @@ class TypedExpr: expr: sge.Expression dtype: dtypes.ExpressionType + + # kludge to support optional args in argument lists + is_omitted: bool = False diff --git a/packages/bigframes/bigframes/core/expression.py b/packages/bigframes/bigframes/core/expression.py index 3ad5a9308185..f56fc21e5df5 100644 --- a/packages/bigframes/bigframes/core/expression.py +++ b/packages/bigframes/bigframes/core/expression.py @@ -363,6 +363,55 @@ def nullable(self) -> bool: def output_type(self) -> dtypes.ExpressionType: return self.dtype +@dataclasses.dataclass(frozen=True) +class Omitted(Expression): + """Represents an omitted optional arg used calling a function.""" + @property + def free_variables(self) -> typing.Tuple[Hashable, ...]: + return () + + @property + def is_const(self) -> bool: + return True + + @property + def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: + return () + + @property + def is_resolved(self): + return True # vacuously + + @property + def output_type(self) -> dtypes.ExpressionType: + return None + + def bind_refs( + self, + bindings: Mapping[ids.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> UnboundVariableExpression: + return self + + def bind_variables( + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, + ) -> Expression: + return self + + @property + def is_bijective(self) -> bool: + return True + + @property + def is_identity(self) -> bool: + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + return self + + @dataclasses.dataclass(frozen=True) class OpExpression(Expression): diff --git a/packages/bigframes/bigframes/operations/__init__.py b/packages/bigframes/bigframes/operations/__init__.py index bcc21e18cce0..164afb9a15cc 100644 --- a/packages/bigframes/bigframes/operations/__init__.py +++ b/packages/bigframes/bigframes/operations/__init__.py @@ -231,6 +231,7 @@ timestamp_add_op, timestamp_sub_op, ) +from bigframes.operations.googlesql import GoogleSqlScalarOp __all__ = [ # Base ops @@ -446,4 +447,6 @@ "ToArrayOp", "ArrayReduceOp", "ArrayMapOp", + # GoogleSql + "GoogleSqlScalarOp", ] diff --git a/packages/bigframes/bigframes/operations/ai_ops.py b/packages/bigframes/bigframes/operations/ai_ops.py index 0d9438741f46..b3b5f139112d 100644 --- a/packages/bigframes/bigframes/operations/ai_ops.py +++ b/packages/bigframes/bigframes/operations/ai_ops.py @@ -15,7 +15,8 @@ from __future__ import annotations import dataclasses -from typing import ClassVar, Literal, Tuple +import typing +from typing import Literal, Tuple import pandas as pd import pyarrow as pa @@ -26,7 +27,7 @@ @dataclasses.dataclass(frozen=True) class AIGenerate(base_ops.NaryOp): - name: ClassVar[str] = "ai_generate" + name: typing.ClassVar[str] = "ai_generate" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -54,7 +55,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateBool(base_ops.NaryOp): - name: ClassVar[str] = "ai_generate_bool" + name: typing.ClassVar[str] = "ai_generate_bool" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -76,7 +77,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateInt(base_ops.NaryOp): - name: ClassVar[str] = "ai_generate_int" + name: typing.ClassVar[str] = "ai_generate_int" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -98,7 +99,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateDouble(base_ops.NaryOp): - name: ClassVar[str] = "ai_generate_double" + name: typing.ClassVar[str] = "ai_generate_double" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -120,7 +121,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIEmbed(base_ops.UnaryOp): - name: ClassVar[str] = "ai_embed" + name: typing.ClassVar[str] = "ai_embed" endpoint: str | None model: str | None @@ -142,7 +143,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIIf(base_ops.NaryOp): - name: ClassVar[str] = "ai_if" + name: typing.ClassVar[str] = "ai_if" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -156,7 +157,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIClassify(base_ops.NaryOp): - name: ClassVar[str] = "ai_classify" + name: typing.ClassVar[str] = "ai_classify" prompt_context: Tuple[str | None, ...] categories: tuple[str, ...] @@ -172,7 +173,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIScore(base_ops.NaryOp): - name: ClassVar[str] = "ai_score" + name: typing.ClassVar[str] = "ai_score" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -185,7 +186,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AISimilarity(base_ops.BinaryOp): - name: ClassVar[str] = "ai_similarity" + name: typing.ClassVar[str] = "ai_similarity" endpoint: str | None model: str | None diff --git a/packages/bigframes/bigframes/operations/googlesql.py b/packages/bigframes/bigframes/operations/googlesql.py new file mode 100644 index 000000000000..97d2b6fde895 --- /dev/null +++ b/packages/bigframes/bigframes/operations/googlesql.py @@ -0,0 +1,110 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import annotations + +import typing +from typing import Callable, Iterable + +from enum import Enum, auto + +from bigframes import dtypes +import bigframes.operations as ops +import bigframes.operations.type as op_typing + +import dataclasses + + +class CallingConvention(Enum): + FUNCTION = auto() # standard: name(arg1, arg2) + INFIX = auto() # operator: arg1 name arg2 (e.g., +) + PREFIX = auto() # operator: name arg1 (e.g., NOT) + SPECIAL = auto() # Custom compilation template (e.g., CAST, EXTRACT) + +@dataclasses.dataclass(frozen=True) +class ArgSpec: + arg_name: Optional[str] = None + optional: bool = False + is_vararg: bool = False + const_only: bool = False + +@dataclasses.dataclass(frozen=True) +class OpSignature: + # Detailed specs for each parameter. This is particularly relevant for ren + arg_specs: Sequence[ArgSpec] + resolve_return_type: TypeResolver + has_varargs: bool = False + + +# Eventually we should migrate every op over to this that can be directly emitted 1:1 as a sql op +# This will allow us to fully lower to pure SQL dialect expressions and emitting sql text is trivial. +@dataclasses.dataclass(frozen=True) +class GoogleSqlScalarOp(ops.NaryOp): + name: typing.ClassVar[str] = "googlesql_scalar" + + # syntax + sql_name: str # for function `sql_name`(a, b), for infix a `sql_name`` b, for prefix `sql_name` a + args: tuple[ArgSpec] + # typing + signature: Callable[[Iterable[dtypes.ExpressionType]], dtypes.ExpressionType] + # syntax again + calling_convention: CallingConvention = CallingConvention.FUNCTION + + # semantics + is_deterministic: bool = True + + + @property + def deterministic(self) -> bool: + return self.is_deterministic + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return self.signature(*input_types) + + +RAND = GoogleSqlScalarOp("RAND", args=(), is_deterministic=False, signature=lambda:dtypes.FLOAT_DTYPE) + +def _check_geo_input(t: dtypes.ExpressionType, out: dtypes.ExpressionType) -> dtypes.ExpressionType: + if t is not None and not dtypes.is_geo_like(t): + raise TypeError(f"Type {t} is not supported. Type must be geo-like") + return out + +def _check_simplify_inputs(geo: dtypes.ExpressionType, tol: dtypes.ExpressionType) -> dtypes.ExpressionType: + if geo is not None and not dtypes.is_geo_like(geo): + raise TypeError(f"Type {geo} is not supported. Type must be geo-like") + if tol is not None and not dtypes.is_numeric(tol): + raise TypeError(f"Type {tol} is not supported. Type must be numeric") + return dtypes.GEO_DTYPE + +ST_AREA = GoogleSqlScalarOp( + "ST_AREA", + args=(ArgSpec(),), + is_deterministic=True, + signature=lambda geo: _check_geo_input(geo, dtypes.FLOAT_DTYPE), +) + +ST_CENTROID = GoogleSqlScalarOp( + "ST_CENTROID", + args=(ArgSpec(),), + is_deterministic=True, + signature=lambda geo: _check_geo_input(geo, dtypes.GEO_DTYPE), +) + +ST_SIMPLIFY = GoogleSqlScalarOp( + "ST_SIMPLIFY", + args=(ArgSpec(), ArgSpec()), + is_deterministic=True, + signature=_check_simplify_inputs, +) diff --git a/packages/bigframes/bigframes/operations/type.py b/packages/bigframes/bigframes/operations/type.py index b53e6cd41ede..1c6bfc7be5d8 100644 --- a/packages/bigframes/bigframes/operations/type.py +++ b/packages/bigframes/bigframes/operations/type.py @@ -34,6 +34,8 @@ def as_method(self): """Convert the signature into an object method. Convenience function for constructing ops that use the signature.""" ... + def __call__(self, *args, **kwargs): + return self.as_method(*args, **kwargs) class UnaryTypeSignature(TypeSignature): @abc.abstractmethod diff --git a/packages/bigframes/tests/unit/bigquery/test_mathematical.py b/packages/bigframes/tests/unit/bigquery/test_mathematical.py index a39aeb103c04..f0cb16ae145f 100644 --- a/packages/bigframes/tests/unit/bigquery/test_mathematical.py +++ b/packages/bigframes/tests/unit/bigquery/test_mathematical.py @@ -26,8 +26,8 @@ def test_rand_returns_expression(): node = expr._value assert isinstance(node, ex.OpExpression) op = node.op - assert isinstance(op, ops.SqlScalarOp) - assert op.sql_template == "RAND()" - assert op._output_type == dtypes.FLOAT_DTYPE + assert isinstance(op, ops.GoogleSqlScalarOp) + assert op.sql_name == "RAND" + assert op.output_type() == dtypes.FLOAT_DTYPE assert not op.is_deterministic assert len(node.inputs) == 0 diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py index af85e937d561..953ecb2979fa 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/api.py @@ -2467,3 +2467,7 @@ def least(*args: Any) -> ir.Value: └────────────┘ """ return ops.Least(args).to_expr() + + +def omitted() -> ir.Value: + return ops.Omitted().to_expr() diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py index 5ad1c885a4a2..9594e9d4c43a 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py @@ -136,6 +136,10 @@ def to_expr(self): return getattr(ir, typename)(self) + @property + def ommitted(self) -> bool: + return False + # convenience aliases Scalar = Value[T, ds.Scalar] diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py index 3933a70a9630..d0303e7370d0 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -187,6 +187,12 @@ class Constant(Scalar, Singleton): class Impure(Value): pass +@public +class Omitted(Value): + @property + def ommitted(bool): + return True + @public class TimestampNow(Constant): From 52fe87ea6844aefaf5b3a62e41e11e3816722a9a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 01:10:23 +0000 Subject: [PATCH 02/11] fix issues --- .../bigquery/_operations/mathematical.py | 4 +- .../ibis_compiler/scalar_op_registry.py | 62 ++++++++++++++----- .../bigframes/bigframes/core/expression.py | 5 +- .../bigframes/operations/googlesql.py | 46 ++++++++------ .../bigframes/bigframes/operations/type.py | 1 + .../ibis/expr/operations/core.py | 2 +- .../ibis/expr/operations/generic.py | 3 +- 7 files changed, 82 insertions(+), 41 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 9d19f0b337e6..5e6a299f83f3 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -48,7 +48,9 @@ def rand() -> bigframes.core.col.Expression: :func:`~bigframes.pandas.DataFrame.assign` and other methods. See :func:`bigframes.pandas.col`. """ - return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(googlesql.RAND, ())) + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(googlesql.RAND, ()) + ) def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 2a8b59092412..abcc50555a31 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -31,11 +31,11 @@ import bigframes.core.compile.ibis_compiler.default_ordering import bigframes.core.compile.ibis_types import bigframes.operations as ops -from bigframes.operations.googlesql import CallingConvention from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS from bigframes.core.compile.ibis_compiler.scalar_op_compiler import ( scalar_op_compiler, # TODO(tswast): avoid import of variables ) +from bigframes.operations.googlesql import CallingConvention _ZERO = typing.cast(ibis_types.NumericValue, ibis_types.literal(0)) _NAN = typing.cast(ibis_types.NumericValue, ibis_types.literal(np.nan)) @@ -1893,36 +1893,64 @@ def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: @scalar_op_compiler.register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp): + final_operands = [] arg_templates = [] if op.calling_convention == CallingConvention.FUNCTION: for i, operand in enumerate(operands): if i < len(op.args): arg_spec = op.args[i] else: - assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) arg_spec = op.args[-1] - if operand.omitted: + if operand.op().omitted: assert arg_spec.optional, f"Argument omitted, but not optional" continue - elif arg_spec.arg_name: - arg_templates.add(f"{arg_spec.arg_name} => {{{i}}}") + + target_idx = len(final_operands) + final_operands.append(operand) + if arg_spec.arg_name: + arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") else: - arg_templates.add(f"{{{i}}}") + arg_templates.append(f"{{{target_idx}}}") args_template = ", ".join(arg_templates) - return f"{op.sql_name}({args_template})" + sql_template = f"{op.sql_name}({args_template})" + return ibis_generic.SqlScalar( + sql_template, + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() elif op.calling_convention == CallingConvention.PREFIX: assert len(operands) == 1, "prefix op expects exactly 1 arg" - return f"{op.sql_name} {{0}}" + return ibis_generic.SqlScalar( + f"{op.sql_name} {{0}}", + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() elif op.calling_convention == CallingConvention.INFIX: - assert len(operands) == 2, 'infix op expects exactly 2 args' - return f"{{0}} {op.sql_name} {{1}}" - return ibis_generic.SqlScalar( - op.sql_template, - values=tuple(typing.cast(ibis_generic.Value, expr.op()) for expr in operands if not expr.omitted), - output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( - op.output_type() - ), - ).to_expr() + assert len(operands) == 2, "infix op expects exactly 2 args" + return ibis_generic.SqlScalar( + f"{{0}} {op.sql_name} {{1}}", + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() + raise NotImplementedError( + f"Calling convention {op.calling_convention} not supported for {op}" + ) + @scalar_op_compiler.register_nary_op(ops.SqlScalarOp, pass_op=True) def sql_scalar_op_impl(*operands: ibis_types.Value, op: ops.SqlScalarOp): diff --git a/packages/bigframes/bigframes/core/expression.py b/packages/bigframes/bigframes/core/expression.py index f56fc21e5df5..ce2b9d9cebe7 100644 --- a/packages/bigframes/bigframes/core/expression.py +++ b/packages/bigframes/bigframes/core/expression.py @@ -363,9 +363,11 @@ def nullable(self) -> bool: def output_type(self) -> dtypes.ExpressionType: return self.dtype + @dataclasses.dataclass(frozen=True) class Omitted(Expression): """Represents an omitted optional arg used calling a function.""" + @property def free_variables(self) -> typing.Tuple[Hashable, ...]: return () @@ -380,7 +382,7 @@ def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: @property def is_resolved(self): - return True # vacuously + return True # vacuously @property def output_type(self) -> dtypes.ExpressionType: @@ -412,7 +414,6 @@ def transform_children(self, t: Callable[[Expression], Expression]) -> Expressio return self - @dataclasses.dataclass(frozen=True) class OpExpression(Expression): """An expression representing a scalar operation applied to 1 or more argument sub-expressions.""" diff --git a/packages/bigframes/bigframes/operations/googlesql.py b/packages/bigframes/bigframes/operations/googlesql.py index 97d2b6fde895..3e1e3ea2bc81 100644 --- a/packages/bigframes/bigframes/operations/googlesql.py +++ b/packages/bigframes/bigframes/operations/googlesql.py @@ -15,36 +15,36 @@ from __future__ import annotations +import dataclasses import typing -from typing import Callable, Iterable - from enum import Enum, auto +from typing import Callable, Iterable -from bigframes import dtypes import bigframes.operations as ops import bigframes.operations.type as op_typing - -import dataclasses +from bigframes import dtypes class CallingConvention(Enum): - FUNCTION = auto() # standard: name(arg1, arg2) - INFIX = auto() # operator: arg1 name arg2 (e.g., +) - PREFIX = auto() # operator: name arg1 (e.g., NOT) - SPECIAL = auto() # Custom compilation template (e.g., CAST, EXTRACT) + FUNCTION = auto() # standard: name(arg1, arg2) + INFIX = auto() # operator: arg1 name arg2 (e.g., +) + PREFIX = auto() # operator: name arg1 (e.g., NOT) + SPECIAL = auto() # Custom compilation template (e.g., CAST, EXTRACT) + @dataclasses.dataclass(frozen=True) class ArgSpec: - arg_name: Optional[str] = None + arg_name: str | None = None optional: bool = False is_vararg: bool = False const_only: bool = False + @dataclasses.dataclass(frozen=True) class OpSignature: # Detailed specs for each parameter. This is particularly relevant for ren - arg_specs: Sequence[ArgSpec] - resolve_return_type: TypeResolver + arg_specs: typing.Sequence[ArgSpec] + resolve_return_type: typing.Any has_varargs: bool = False @@ -55,17 +55,16 @@ class GoogleSqlScalarOp(ops.NaryOp): name: typing.ClassVar[str] = "googlesql_scalar" # syntax - sql_name: str # for function `sql_name`(a, b), for infix a `sql_name`` b, for prefix `sql_name` a - args: tuple[ArgSpec] + sql_name: str # for function `sql_name`(a, b), for infix a `sql_name`` b, for prefix `sql_name` a + args: tuple[ArgSpec, ...] # typing - signature: Callable[[Iterable[dtypes.ExpressionType]], dtypes.ExpressionType] + signature: typing.Callable[..., dtypes.ExpressionType] # syntax again calling_convention: CallingConvention = CallingConvention.FUNCTION # semantics is_deterministic: bool = True - @property def deterministic(self) -> bool: return self.is_deterministic @@ -74,20 +73,29 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT return self.signature(*input_types) -RAND = GoogleSqlScalarOp("RAND", args=(), is_deterministic=False, signature=lambda:dtypes.FLOAT_DTYPE) +RAND = GoogleSqlScalarOp( + "RAND", args=(), is_deterministic=False, signature=lambda: dtypes.FLOAT_DTYPE +) -def _check_geo_input(t: dtypes.ExpressionType, out: dtypes.ExpressionType) -> dtypes.ExpressionType: + +def _check_geo_input( + t: dtypes.ExpressionType, out: dtypes.ExpressionType +) -> dtypes.ExpressionType: if t is not None and not dtypes.is_geo_like(t): raise TypeError(f"Type {t} is not supported. Type must be geo-like") return out -def _check_simplify_inputs(geo: dtypes.ExpressionType, tol: dtypes.ExpressionType) -> dtypes.ExpressionType: + +def _check_simplify_inputs( + geo: dtypes.ExpressionType, tol: dtypes.ExpressionType +) -> dtypes.ExpressionType: if geo is not None and not dtypes.is_geo_like(geo): raise TypeError(f"Type {geo} is not supported. Type must be geo-like") if tol is not None and not dtypes.is_numeric(tol): raise TypeError(f"Type {tol} is not supported. Type must be numeric") return dtypes.GEO_DTYPE + ST_AREA = GoogleSqlScalarOp( "ST_AREA", args=(ArgSpec(),), diff --git a/packages/bigframes/bigframes/operations/type.py b/packages/bigframes/bigframes/operations/type.py index 1c6bfc7be5d8..0ddf3a113fc0 100644 --- a/packages/bigframes/bigframes/operations/type.py +++ b/packages/bigframes/bigframes/operations/type.py @@ -37,6 +37,7 @@ def as_method(self): def __call__(self, *args, **kwargs): return self.as_method(*args, **kwargs) + class UnaryTypeSignature(TypeSignature): @abc.abstractmethod def output_type(self, input_type: ExpressionType) -> ExpressionType: ... diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py index 9594e9d4c43a..ad0bd095b6c7 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/core.py @@ -137,7 +137,7 @@ def to_expr(self): return getattr(ir, typename)(self) @property - def ommitted(self) -> bool: + def omitted(self) -> bool: return False diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py index d0303e7370d0..6406c55e3e3d 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -187,10 +187,11 @@ class Constant(Scalar, Singleton): class Impure(Value): pass + @public class Omitted(Value): @property - def ommitted(bool): + def omitted(self) -> bool: return True From c2a875b056740e5dce6cbe7b4c402e3f376bfa8d Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 01:17:33 +0000 Subject: [PATCH 03/11] fix unit test --- .../snapshots/test_compile_geo/test_st_simplify/out.sql | 2 +- .../tests/unit/core/compile/sqlglot/test_compile_geo.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql index 1c146e1e1be5..177cb5292b3f 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_geo/test_st_simplify/out.sql @@ -1,7 +1,7 @@ WITH `bfcte_0` AS ( SELECT * - FROM UNNEST(ARRAY>[STRUCT('POINT(1 1)', 0)]) + FROM UNNEST(ARRAY>[STRUCT(ST_GEOGFROMTEXT('LINESTRING(0 0, 1 1, 2 0)'), 0)]) ) SELECT ST_SIMPLIFY(`bfcol_0`, 123.125) AS `0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py index 50de1488e6c6..0f603978212e 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py @@ -16,6 +16,7 @@ import bigframes.bigquery as bbq import bigframes.geopandas as gpd +from shapely.geometry import LineString # type: ignore pytest.importorskip("pytest_snapshot") @@ -44,7 +45,7 @@ def test_st_regionstats_without_optional_args(compiler_session, snapshot): def test_st_simplify(compiler_session, snapshot): - geos = gpd.GeoSeries(["POINT(1 1)"], session=compiler_session) + geos = gpd.GeoSeries([LineString([(0, 0), (1, 1), (2, 0)])], session=compiler_session) result = bbq.st_simplify( geos, tolerance_meters=123.125, From 17a6315a548316353ca2bfadd7d77990b3347a31 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 16:58:40 +0000 Subject: [PATCH 04/11] cleanup geo ops --- .../ibis_compiler/operations/geo_ops.py | 16 ---------------- .../compile/sqlglot/expressions/geo_ops.py | 19 ------------------- .../bigframes/geopandas/geoseries.py | 2 +- .../bigframes/operations/__init__.py | 5 ----- .../bigframes/bigframes/operations/geo_ops.py | 7 ------- .../sqlglot/expressions/test_geo_ops.py | 19 ------------------- 6 files changed, 1 insertion(+), 67 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py index 5cfc8237be2e..772752112a40 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -30,11 +30,6 @@ # Geo Ops -@register_unary_op(ops.geo_area_op) -def geo_area_op_impl(x: ibis_types.Value): - return cast(ibis_types.GeoSpatialValue, x).area() - - @register_unary_op(ops.geo_st_astext_op) def geo_st_astext_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).as_text() @@ -55,11 +50,6 @@ def geo_st_buffer_op_impl(x: ibis_types.Value, op: ops.GeoStBufferOp): ) -@register_unary_op(ops.geo_st_centroid_op, pass_op=False) -def geo_st_centroid_op_impl(x: ibis_types.Value): - return cast(ibis_types.GeoSpatialValue, x).centroid() - - @register_unary_op(ops.geo_st_convexhull_op, pass_op=False) def geo_st_convexhull_op_impl(x: ibis_types.Value): return st_convexhull(x) @@ -132,12 +122,6 @@ def geo_st_regionstats_op_impl( ).to_expr() -@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) -def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp): - x = cast(ibis_types.GeoSpatialValue, x) - return st_simplify(x, op.tolerance_meters) - - @register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).x() diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py index 1c9e559b8975..8c353988ae3e 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/geo_ops.py @@ -24,11 +24,6 @@ register_binary_op = expression_compiler.expression_compiler.register_binary_op -@register_unary_op(ops.geo_area_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_AREA", expr.expr) - - @register_unary_op(ops.geo_st_astext_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_ASTEXT", expr.expr) @@ -50,11 +45,6 @@ def _(expr: TypedExpr, op: ops.GeoStBufferOp) -> sge.Expression: ) -@register_unary_op(ops.geo_st_centroid_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_CENTROID", expr.expr) - - @register_unary_op(ops.geo_st_convexhull_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_CONVEXHULL", expr.expr) @@ -97,15 +87,6 @@ def _( return sge.func("ST_REGIONSTATS", *args) -@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) -def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression: - return sge.func( - "ST_SIMPLIFY", - expr.expr, - sge.convert(op.tolerance_meters), - ) - - @register_unary_op(ops.geo_x_op) def _(expr: TypedExpr) -> sge.Expression: return sge.func("ST_X", expr.expr) diff --git a/packages/bigframes/bigframes/geopandas/geoseries.py b/packages/bigframes/bigframes/geopandas/geoseries.py index 1da9d00a6ce6..dc373216b650 100644 --- a/packages/bigframes/bigframes/geopandas/geoseries.py +++ b/packages/bigframes/bigframes/geopandas/geoseries.py @@ -107,7 +107,7 @@ def buffer(self: GeoSeries, distance: float) -> bigframes.series.Series: # type @property def centroid(self: GeoSeries) -> bigframes.series.Series: # type: ignore - return self._apply_unary_op(ops.geo_st_centroid_op) + return self._apply_nary_op(ops.googlesql.ST_CENTROID, []) @property def convex_hull(self: GeoSeries) -> bigframes.series.Series: # type: ignore diff --git a/packages/bigframes/bigframes/operations/__init__.py b/packages/bigframes/bigframes/operations/__init__.py index 164afb9a15cc..5e0a23005a24 100644 --- a/packages/bigframes/bigframes/operations/__init__.py +++ b/packages/bigframes/bigframes/operations/__init__.py @@ -115,10 +115,8 @@ GeoStLengthOp, GeoStRegionStatsOp, GeoStSimplifyOp, - geo_area_op, geo_st_astext_op, geo_st_boundary_op, - geo_st_centroid_op, geo_st_convexhull_op, geo_st_difference_op, geo_st_geogfromtext_op, @@ -414,9 +412,7 @@ "euclidean_distance_op", "manhattan_distance_op", # Geo ops - "geo_area_op", "geo_st_boundary_op", - "geo_st_centroid_op", "geo_st_convexhull_op", "geo_st_difference_op", "geo_st_astext_op", @@ -430,7 +426,6 @@ "GeoStDistanceOp", "GeoStLengthOp", "GeoStRegionStatsOp", - "GeoStSimplifyOp", # AI ops "AIClassify", "AIGenerate", diff --git a/packages/bigframes/bigframes/operations/geo_ops.py b/packages/bigframes/bigframes/operations/geo_ops.py index 85344bc2fbe7..72f3ab48de5b 100644 --- a/packages/bigframes/bigframes/operations/geo_ops.py +++ b/packages/bigframes/bigframes/operations/geo_ops.py @@ -19,13 +19,6 @@ from bigframes import dtypes from bigframes.operations import base_ops -GeoAreaOp = base_ops.create_unary_op( - name="geo_area", - type_signature=op_typing.FixedOutputType( - dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" - ), -) -geo_area_op = GeoAreaOp() GeoStAstextOp = base_ops.create_unary_op( name="geo_st_astext", diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py index 2d9373967c52..4dad2630d92e 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -21,15 +21,6 @@ pytest.importorskip("pytest_snapshot") -def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" @@ -61,16 +52,6 @@ def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") -def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] From 173bb3af59925e15cb62186e73094549e7686e55 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 17:14:01 +0000 Subject: [PATCH 05/11] drop calling convention feature --- .../ibis_compiler/scalar_op_registry.py | 84 +++++++------------ .../sqlglot/expressions/generic_ops.py | 43 ++++------ .../bigframes/operations/googlesql.py | 11 +-- 3 files changed, 47 insertions(+), 91 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index abcc50555a31..29df7b08bf1a 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -1895,61 +1895,35 @@ def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp): final_operands = [] arg_templates = [] - if op.calling_convention == CallingConvention.FUNCTION: - for i, operand in enumerate(operands): - if i < len(op.args): - arg_spec = op.args[i] - else: - assert op.args[-1].is_vararg, ( - f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" - ) - arg_spec = op.args[-1] - if operand.op().omitted: - assert arg_spec.optional, f"Argument omitted, but not optional" - continue - - target_idx = len(final_operands) - final_operands.append(operand) - if arg_spec.arg_name: - arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") - else: - arg_templates.append(f"{{{target_idx}}}") - args_template = ", ".join(arg_templates) - sql_template = f"{op.sql_name}({args_template})" - return ibis_generic.SqlScalar( - sql_template, - values=tuple( - typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands - ), - output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( - op.output_type() - ), - ).to_expr() - elif op.calling_convention == CallingConvention.PREFIX: - assert len(operands) == 1, "prefix op expects exactly 1 arg" - return ibis_generic.SqlScalar( - f"{op.sql_name} {{0}}", - values=tuple( - typing.cast(ibis_generic.Value, expr.op()) for expr in operands - ), - output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( - op.output_type() - ), - ).to_expr() - elif op.calling_convention == CallingConvention.INFIX: - assert len(operands) == 2, "infix op expects exactly 2 args" - return ibis_generic.SqlScalar( - f"{{0}} {op.sql_name} {{1}}", - values=tuple( - typing.cast(ibis_generic.Value, expr.op()) for expr in operands - ), - output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( - op.output_type() - ), - ).to_expr() - raise NotImplementedError( - f"Calling convention {op.calling_convention} not supported for {op}" - ) + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) + arg_spec = op.args[-1] + if operand.op().omitted: + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + + target_idx = len(final_operands) + final_operands.append(operand) + if arg_spec.arg_name: + arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") + else: + arg_templates.append(f"{{{target_idx}}}") + args_template = ", ".join(arg_templates) + sql_template = f"{op.sql_name}({args_template})" + return ibis_generic.SqlScalar( + sql_template, + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + op.output_type() + ), + ).to_expr() @scalar_op_compiler.register_nary_op(ops.SqlScalarOp, pass_op=True) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py index ac3c8951f81f..3b7809c2c338 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -85,32 +85,23 @@ def _(expr: TypedExpr) -> sge.Expression: @register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression: - arg_templates = [] - if op.calling_convention == CallingConvention.FUNCTION: - for i, operand in enumerate(operands): - if i < len(op.args): - arg_spec = op.args[i] - else: - assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" - arg_spec = op.args[-1] - if operand.is_omitted: - assert arg_spec.optional, f"Argument omitted, but not optional" - continue - elif arg_spec.arg_name: - arg_templates.append(f"{arg_spec.arg_name} => {operand.expr.sql(dialect='bigquery')}") - else: - arg_templates.append(operand.expr.sql(dialect='bigquery')) - args_template = ", ".join(arg_templates) - return sg.parse_one(f"{op.sql_name}({args_template})", dialect="bigquery") - elif op.calling_convention == CallingConvention.PREFIX: - assert len(operands) == 1, "prefix op expects exactly 1 arg" - return sg.parse_one(f"{op.sql_name} {operands[0].expr.sql(dialect='bigquery')}", dialect="bigquery") - elif op.calling_convention == CallingConvention.INFIX: - assert len(operands) == 2, 'infix op expects exactly 2 args' - return sg.parse_one(f"{operands[0].expr.sql(dialect='bigquery')} {op.sql_name} {operands[1].expr.sql(dialect='bigquery')}", dialect="bigquery") - - raise NotImplementedError(f"Calling convention {op.calling_convention} not supported for {op}") - + args = [] + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + arg_spec = op.args[-1] + if operand.is_omitted: + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + elif arg_spec.arg_name: + args.append( + sge.Kwarg(this=arg_spec.arg_name, expression=operand.expr) + ) + else: + args.append(operand.expr) + return sg.func(op.sql_name, *args) @register_nary_op(ops.SqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: diff --git a/packages/bigframes/bigframes/operations/googlesql.py b/packages/bigframes/bigframes/operations/googlesql.py index 3e1e3ea2bc81..0100784bda1d 100644 --- a/packages/bigframes/bigframes/operations/googlesql.py +++ b/packages/bigframes/bigframes/operations/googlesql.py @@ -25,13 +25,6 @@ from bigframes import dtypes -class CallingConvention(Enum): - FUNCTION = auto() # standard: name(arg1, arg2) - INFIX = auto() # operator: arg1 name arg2 (e.g., +) - PREFIX = auto() # operator: name arg1 (e.g., NOT) - SPECIAL = auto() # Custom compilation template (e.g., CAST, EXTRACT) - - @dataclasses.dataclass(frozen=True) class ArgSpec: arg_name: str | None = None @@ -55,12 +48,10 @@ class GoogleSqlScalarOp(ops.NaryOp): name: typing.ClassVar[str] = "googlesql_scalar" # syntax - sql_name: str # for function `sql_name`(a, b), for infix a `sql_name`` b, for prefix `sql_name` a + sql_name: str args: tuple[ArgSpec, ...] # typing signature: typing.Callable[..., dtypes.ExpressionType] - # syntax again - calling_convention: CallingConvention = CallingConvention.FUNCTION # semantics is_deterministic: bool = True From 209719cafc259cde09643c1ece87bbca86498a8c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 17:16:11 +0000 Subject: [PATCH 06/11] revert ai_ops.py --- .../bigframes/bigframes/operations/ai_ops.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/packages/bigframes/bigframes/operations/ai_ops.py b/packages/bigframes/bigframes/operations/ai_ops.py index b3b5f139112d..0d9438741f46 100644 --- a/packages/bigframes/bigframes/operations/ai_ops.py +++ b/packages/bigframes/bigframes/operations/ai_ops.py @@ -15,8 +15,7 @@ from __future__ import annotations import dataclasses -import typing -from typing import Literal, Tuple +from typing import ClassVar, Literal, Tuple import pandas as pd import pyarrow as pa @@ -27,7 +26,7 @@ @dataclasses.dataclass(frozen=True) class AIGenerate(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_generate" + name: ClassVar[str] = "ai_generate" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -55,7 +54,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateBool(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_generate_bool" + name: ClassVar[str] = "ai_generate_bool" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -77,7 +76,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateInt(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_generate_int" + name: ClassVar[str] = "ai_generate_int" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -99,7 +98,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIGenerateDouble(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_generate_double" + name: ClassVar[str] = "ai_generate_double" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -121,7 +120,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIEmbed(base_ops.UnaryOp): - name: typing.ClassVar[str] = "ai_embed" + name: ClassVar[str] = "ai_embed" endpoint: str | None model: str | None @@ -143,7 +142,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIIf(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_if" + name: ClassVar[str] = "ai_if" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -157,7 +156,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIClassify(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_classify" + name: ClassVar[str] = "ai_classify" prompt_context: Tuple[str | None, ...] categories: tuple[str, ...] @@ -173,7 +172,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AIScore(base_ops.NaryOp): - name: typing.ClassVar[str] = "ai_score" + name: ClassVar[str] = "ai_score" prompt_context: Tuple[str | None, ...] connection_id: str | None @@ -186,7 +185,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT @dataclasses.dataclass(frozen=True) class AISimilarity(base_ops.BinaryOp): - name: typing.ClassVar[str] = "ai_similarity" + name: ClassVar[str] = "ai_similarity" endpoint: str | None model: str | None From db9c3a38bc410fc2e59f14cec0c6e9a00e9a4d0a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 18:28:18 +0000 Subject: [PATCH 07/11] fix ibis comiple --- .../compile/ibis_compiler/ibis_compiler.py | 7 ++++ .../ibis_compiler/scalar_op_compiler.py | 38 +++++++++++++++++++ .../ibis_compiler/scalar_op_registry.py | 36 ------------------ .../compile/sqlglot/expression_compiler.py | 2 +- .../sqlglot/expressions/generic_ops.py | 10 ++--- .../tests/system/small/engines/conftest.py | 36 +++++++++++++----- .../small/engines/test_googlesql_ops.py | 36 ++++++++++++++++++ .../core/compile/sqlglot/test_compile_geo.py | 6 ++- .../ibis/expr/operations/generic.py | 4 +- 9 files changed, 118 insertions(+), 57 deletions(-) create mode 100644 packages/bigframes/tests/system/small/engines/test_googlesql_ops.py diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py index d52a9e381b53..a9e61a7de3d5 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py @@ -22,6 +22,7 @@ import bigframes_vendored.ibis.expr.api as ibis_api import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes import bigframes_vendored.ibis.expr.types as ibis_types +import bigframes.core.rewrite.schema_binding as schema_binding import bigframes.core.compile.compiled as compiled import bigframes.core.compile.concat as concat_impl @@ -59,6 +60,9 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: if request.sort_rows: result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # Have to bind schema as the final step before compilation. + # Probably, should defer even further + result_node = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)) sql = compile_result_node(result_node) return configs.CompileResult( sql, @@ -72,6 +76,9 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node)) result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # Have to bind schema as the final step before compilation. + # Probably, should defer even further + result_node = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)) sql = compile_result_node(result_node) # Return the ordering iff no extra columns are needed to define the row order if ordering is not None: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index 0286b1913fda..c27d578880a8 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -22,11 +22,13 @@ import bigframes_vendored.ibis import bigframes_vendored.ibis.expr.types as ibis_types +import bigframes_vendored.ibis.expr.operations.generic as ibis_generic import bigframes.core.compile.ibis_types import bigframes.core.expression as ex from bigframes.core import agg_expressions, ordering from bigframes.operations import numeric_ops +from bigframes.operations import googlesql as gsql_ops if TYPE_CHECKING: import bigframes.operations as ops @@ -92,6 +94,8 @@ def _( self.compile_expression(sub_expr, bindings) for sub_expr in expression.inputs ] + if isinstance(expression.op, gsql_ops.GoogleSqlScalarOp): + return googlesql_scalar_op_impl(*inputs, op=expression.op, output_type=expression.output_type) return self.compile_row_op(expression.op, inputs) @compile_expression.register @@ -286,3 +290,37 @@ def isnanornull(arg): @scalar_op_compiler.register_unary_op(numeric_ops.isfinite_op) def isfinite(arg): return arg.isinf().negate() & arg.isnan().negate() + + +def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type): + final_operands = [] + arg_templates = [] + for i, operand in enumerate(operands): + if i < len(op.args): + arg_spec = op.args[i] + else: + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) + arg_spec = op.args[-1] + if isinstance(operand.op(), ibis_generic.OmittedArg): + assert arg_spec.optional, f"Argument omitted, but not optional" + continue + + target_idx = len(final_operands) + final_operands.append(operand) + if arg_spec.arg_name: + arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") + else: + arg_templates.append(f"{{{target_idx}}}") + args_template = ", ".join(arg_templates) + sql_template = f"{op.sql_name}({args_template})" + return ibis_generic.SqlScalar( + sql_template, + values=tuple( + typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands + ), + output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( + output_type + ), + ).to_expr() diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 29df7b08bf1a..03ec72f4e44b 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -35,7 +35,6 @@ from bigframes.core.compile.ibis_compiler.scalar_op_compiler import ( scalar_op_compiler, # TODO(tswast): avoid import of variables ) -from bigframes.operations.googlesql import CallingConvention _ZERO = typing.cast(ibis_types.NumericValue, ibis_types.literal(0)) _NAN = typing.cast(ibis_types.NumericValue, ibis_types.literal(np.nan)) @@ -1891,41 +1890,6 @@ def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: return case_val.end() # type: ignore -@scalar_op_compiler.register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) -def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp): - final_operands = [] - arg_templates = [] - for i, operand in enumerate(operands): - if i < len(op.args): - arg_spec = op.args[i] - else: - assert op.args[-1].is_vararg, ( - f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" - ) - arg_spec = op.args[-1] - if operand.op().omitted: - assert arg_spec.optional, f"Argument omitted, but not optional" - continue - - target_idx = len(final_operands) - final_operands.append(operand) - if arg_spec.arg_name: - arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}") - else: - arg_templates.append(f"{{{target_idx}}}") - args_template = ", ".join(arg_templates) - sql_template = f"{op.sql_name}({args_template})" - return ibis_generic.SqlScalar( - sql_template, - values=tuple( - typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands - ), - output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype( - op.output_type() - ), - ).to_expr() - - @scalar_op_compiler.register_nary_op(ops.SqlScalarOp, pass_op=True) def sql_scalar_op_impl(*operands: ibis_types.Value, op: ops.SqlScalarOp): return ibis_generic.SqlScalar( diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py index 7bba5cbce2b1..fc245ab3c242 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py @@ -92,7 +92,7 @@ def _(self, expr: agg_exprs.WindowExpression) -> sge.Expression: def _(self, expr: ex.OpExpression) -> sge.Expression: inputs = tuple( TypedExpr(self.compile_expression(sub_expr), sub_expr.output_type) - if not isinstance(sub_expr, ex.Omitted) + if not isinstance(sub_expr, ex.OmittedArg) else TypedExpr(sge.Null, None, is_omitted=True) for sub_expr in expr.inputs ) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 3b7809c2c338..bc328dfc4529 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -20,7 +20,6 @@ import bigframes.core.compile.sqlglot.expression_compiler as expression_compiler from bigframes import dtypes from bigframes import operations as ops -from bigframes.operations.googlesql import CallingConvention from bigframes.core.compile.sqlglot import sql, sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr @@ -90,19 +89,20 @@ def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression: if i < len(op.args): arg_spec = op.args[i] else: - assert op.args[-1].is_vararg, f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + assert op.args[-1].is_vararg, ( + f"Too many arguments, for {op.sql_name}, expected {len(op.args)}" + ) arg_spec = op.args[-1] if operand.is_omitted: assert arg_spec.optional, f"Argument omitted, but not optional" continue elif arg_spec.arg_name: - args.append( - sge.Kwarg(this=arg_spec.arg_name, expression=operand.expr) - ) + args.append(sge.Kwarg(this=arg_spec.arg_name, expression=operand.expr)) else: args.append(operand.expr) return sg.func(op.sql_name, *args) + @register_nary_op(ops.SqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: return sg.parse_one( diff --git a/packages/bigframes/tests/system/small/engines/conftest.py b/packages/bigframes/tests/system/small/engines/conftest.py index cea505dd28a6..f094edcc4159 100644 --- a/packages/bigframes/tests/system/small/engines/conftest.py +++ b/packages/bigframes/tests/system/small/engines/conftest.py @@ -43,22 +43,38 @@ def fake_session() -> Generator[bigframes.Session, None, None]: with bigframes.core.global_session._GlobalSessionContext(session): yield session +@pytest.fixture(scope="session") +def pyarrow_engine(): + return local_scan_executor.LocalScanExecutor() + +@pytest.fixture(scope="session") +def polars_engine(): + return polars_executor.PolarsExecutor() + +@pytest.fixture(scope="session") +def bq_engine(bigquery_client): + publisher = events.Publisher() + return direct_gbq_execution.DirectGbqExecutor( + bigquery_client, compiler="ibis", publisher=publisher + ) + +@pytest.fixture(scope="session") +def sqlglot_engine(bigquery_client): + publisher = events.Publisher() + return direct_gbq_execution.DirectGbqExecutor( + bigquery_client, compiler="sqlglot", publisher=publisher + ) @pytest.fixture(scope="session", params=["pyarrow", "polars", "bq", "bq-sqlglot"]) -def engine(request, bigquery_client: bigquery.Client) -> semi_executor.SemiExecutor: +def engine(request, pyarrow_engine, polars_engine, bq_engine, sqlglot_engine) -> semi_executor.SemiExecutor: if request.param == "pyarrow": - return local_scan_executor.LocalScanExecutor() + return pyarrow_engine if request.param == "polars": - return polars_executor.PolarsExecutor() - publisher = events.Publisher() + return polars_engine if request.param == "bq": - return direct_gbq_execution.DirectGbqExecutor( - bigquery_client, publisher=publisher - ) + return bq_engine if request.param == "bq-sqlglot": - return direct_gbq_execution.DirectGbqExecutor( - bigquery_client, compiler="sqlglot", publisher=publisher - ) + return sqlglot_engine raise ValueError(f"Unrecognized param: {request.param}") diff --git a/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py new file mode 100644 index 000000000000..803d73fe7e7e --- /dev/null +++ b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import pytest + +import bigframes.dtypes +import bigframes.operations.googlesql as gsql_ops +from bigframes.core import array_value, expression +from bigframes.session import polars_executor +from bigframes.testing.engine_utils import assert_equivalence_execution + +polars = pytest.importorskip("polars") + +# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree. +REFERENCE_ENGINE = polars_executor.PolarsExecutor() + + +def test_engines_googlesql_st_area(scalars_array_value: array_value.ArrayValue, bq_engine, sqlglot_engine): + expr = gsql_ops.ST_AREA.as_expr("geography_col") + + arr, _ = scalars_array_value.compute_values([expr]) + + assert_equivalence_execution(arr.node, bq_engine, sqlglot_engine) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py index 0f603978212e..4aad2dfa3153 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_geo.py @@ -13,10 +13,10 @@ # limitations under the License. import pytest +from shapely.geometry import LineString # type: ignore import bigframes.bigquery as bbq import bigframes.geopandas as gpd -from shapely.geometry import LineString # type: ignore pytest.importorskip("pytest_snapshot") @@ -45,7 +45,9 @@ def test_st_regionstats_without_optional_args(compiler_session, snapshot): def test_st_simplify(compiler_session, snapshot): - geos = gpd.GeoSeries([LineString([(0, 0), (1, 1), (2, 0)])], session=compiler_session) + geos = gpd.GeoSeries( + [LineString([(0, 0), (1, 1), (2, 0)])], session=compiler_session + ) result = bbq.st_simplify( geos, tolerance_meters=123.125, diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py index 6406c55e3e3d..6960953171b9 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -190,9 +190,7 @@ class Impure(Value): @public class Omitted(Value): - @property - def omitted(self) -> bool: - return True + pass @public From 0469052542b54caafe82752da4aaac9d28436897 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 18:28:54 +0000 Subject: [PATCH 08/11] lint --- .../core/compile/ibis_compiler/ibis_compiler.py | 10 +++++++--- .../core/compile/ibis_compiler/scalar_op_compiler.py | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py index a9e61a7de3d5..938759ae1814 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py @@ -22,7 +22,6 @@ import bigframes_vendored.ibis.expr.api as ibis_api import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes import bigframes_vendored.ibis.expr.types as ibis_types -import bigframes.core.rewrite.schema_binding as schema_binding import bigframes.core.compile.compiled as compiled import bigframes.core.compile.concat as concat_impl @@ -31,6 +30,7 @@ import bigframes.core.nodes as nodes import bigframes.core.ordering as bf_ordering import bigframes.core.rewrite as rewrites +import bigframes.core.rewrite.schema_binding as schema_binding from bigframes import dtypes, operations from bigframes.core import bq_data, expression, pyarrow_utils from bigframes.core.logging import data_types as data_type_logger @@ -62,7 +62,9 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: encoded_type_refs = data_type_logger.encode_type_refs(result_node) # Have to bind schema as the final step before compilation. # Probably, should defer even further - result_node = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)) + result_node = typing.cast( + nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node) + ) sql = compile_result_node(result_node) return configs.CompileResult( sql, @@ -78,7 +80,9 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: encoded_type_refs = data_type_logger.encode_type_refs(result_node) # Have to bind schema as the final step before compilation. # Probably, should defer even further - result_node = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)) + result_node = typing.cast( + nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node) + ) sql = compile_result_node(result_node) # Return the ordering iff no extra columns are needed to define the row order if ordering is not None: diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index c27d578880a8..bbf589b3a761 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -21,14 +21,14 @@ from typing import TYPE_CHECKING import bigframes_vendored.ibis -import bigframes_vendored.ibis.expr.types as ibis_types import bigframes_vendored.ibis.expr.operations.generic as ibis_generic +import bigframes_vendored.ibis.expr.types as ibis_types import bigframes.core.compile.ibis_types import bigframes.core.expression as ex from bigframes.core import agg_expressions, ordering -from bigframes.operations import numeric_ops from bigframes.operations import googlesql as gsql_ops +from bigframes.operations import numeric_ops if TYPE_CHECKING: import bigframes.operations as ops @@ -95,7 +95,9 @@ def _( for sub_expr in expression.inputs ] if isinstance(expression.op, gsql_ops.GoogleSqlScalarOp): - return googlesql_scalar_op_impl(*inputs, op=expression.op, output_type=expression.output_type) + return googlesql_scalar_op_impl( + *inputs, op=expression.op, output_type=expression.output_type + ) return self.compile_row_op(expression.op, inputs) @compile_expression.register @@ -292,7 +294,9 @@ def isfinite(arg): return arg.isinf().negate() & arg.isnan().negate() -def googlesql_scalar_op_impl(*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type): +def googlesql_scalar_op_impl( + *operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type +): final_operands = [] arg_templates = [] for i, operand in enumerate(operands): From 386151ee7b8414f8b583f3e4ae6074fa856aee3d Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 18:38:59 +0000 Subject: [PATCH 09/11] fix --- packages/bigframes/bigframes/core/expression.py | 2 +- .../bigframes_vendored/ibis/expr/operations/generic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/bigframes/core/expression.py b/packages/bigframes/bigframes/core/expression.py index ce2b9d9cebe7..e6fa983e9085 100644 --- a/packages/bigframes/bigframes/core/expression.py +++ b/packages/bigframes/bigframes/core/expression.py @@ -365,7 +365,7 @@ def output_type(self) -> dtypes.ExpressionType: @dataclasses.dataclass(frozen=True) -class Omitted(Expression): +class OmittedArg(Expression): """Represents an omitted optional arg used calling a function.""" @property diff --git a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py index 6960953171b9..cc0caf21b2e1 100644 --- a/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/ibis/expr/operations/generic.py @@ -189,7 +189,7 @@ class Impure(Value): @public -class Omitted(Value): +class OmittedArg(Value): pass From 7f07131f7d86b3c6b0b81c4ab78ed3008be5a5c0 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 18:46:26 +0000 Subject: [PATCH 10/11] fixes --- .../core/compile/ibis_compiler/scalar_op_compiler.py | 2 +- .../bigframes/tests/system/small/engines/conftest.py | 9 ++++++++- .../tests/system/small/engines/test_googlesql_ops.py | 4 +++- .../core/compile/sqlglot/expressions/test_geo_ops.py | 1 - 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index bbf589b3a761..66ff91ca6976 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -103,7 +103,7 @@ def _( @compile_expression.register def _( self, - expression: ex.Omitted, + expression: ex.OmittedArg, bindings: typing.Dict[str, ibis_types.Value], ) -> ibis_types.Value: return bigframes_vendored.ibis.omitted() diff --git a/packages/bigframes/tests/system/small/engines/conftest.py b/packages/bigframes/tests/system/small/engines/conftest.py index f094edcc4159..e930e98ef26c 100644 --- a/packages/bigframes/tests/system/small/engines/conftest.py +++ b/packages/bigframes/tests/system/small/engines/conftest.py @@ -43,14 +43,17 @@ def fake_session() -> Generator[bigframes.Session, None, None]: with bigframes.core.global_session._GlobalSessionContext(session): yield session + @pytest.fixture(scope="session") def pyarrow_engine(): return local_scan_executor.LocalScanExecutor() + @pytest.fixture(scope="session") def polars_engine(): return polars_executor.PolarsExecutor() + @pytest.fixture(scope="session") def bq_engine(bigquery_client): publisher = events.Publisher() @@ -58,6 +61,7 @@ def bq_engine(bigquery_client): bigquery_client, compiler="ibis", publisher=publisher ) + @pytest.fixture(scope="session") def sqlglot_engine(bigquery_client): publisher = events.Publisher() @@ -65,8 +69,11 @@ def sqlglot_engine(bigquery_client): bigquery_client, compiler="sqlglot", publisher=publisher ) + @pytest.fixture(scope="session", params=["pyarrow", "polars", "bq", "bq-sqlglot"]) -def engine(request, pyarrow_engine, polars_engine, bq_engine, sqlglot_engine) -> semi_executor.SemiExecutor: +def engine( + request, pyarrow_engine, polars_engine, bq_engine, sqlglot_engine +) -> semi_executor.SemiExecutor: if request.param == "pyarrow": return pyarrow_engine if request.param == "polars": diff --git a/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py index 803d73fe7e7e..2ea7070bbd30 100644 --- a/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py +++ b/packages/bigframes/tests/system/small/engines/test_googlesql_ops.py @@ -28,7 +28,9 @@ REFERENCE_ENGINE = polars_executor.PolarsExecutor() -def test_engines_googlesql_st_area(scalars_array_value: array_value.ArrayValue, bq_engine, sqlglot_engine): +def test_engines_googlesql_st_area( + scalars_array_value: array_value.ArrayValue, bq_engine, sqlglot_engine +): expr = gsql_ops.ST_AREA.as_expr("geography_col") arr, _ = scalars_array_value.compute_values([expr]) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py index 4dad2630d92e..85e374c76dbd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -21,7 +21,6 @@ pytest.importorskip("pytest_snapshot") - def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] From d2f9a7e8720c12ea27b75882cf0fd7e2bb50f724 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 12 May 2026 19:08:18 +0000 Subject: [PATCH 11/11] mypy fixes --- .../bigframes/core/compile/ibis_compiler/scalar_op_compiler.py | 2 +- .../bigframes/core/compile/sqlglot/expression_compiler.py | 2 +- .../bigframes/core/compile/sqlglot/expressions/generic_ops.py | 2 +- packages/bigframes/bigframes/core/expression.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py index 66ff91ca6976..4108675bc11f 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/scalar_op_compiler.py @@ -297,7 +297,7 @@ def isfinite(arg): def googlesql_scalar_op_impl( *operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type ): - final_operands = [] + final_operands: list[ibis_types.Value] = [] arg_templates = [] for i, operand in enumerate(operands): if i < len(op.args): diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py index fc245ab3c242..d07cb2186fbc 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expression_compiler.py @@ -93,7 +93,7 @@ def _(self, expr: ex.OpExpression) -> sge.Expression: inputs = tuple( TypedExpr(self.compile_expression(sub_expr), sub_expr.output_type) if not isinstance(sub_expr, ex.OmittedArg) - else TypedExpr(sge.Null, None, is_omitted=True) + else TypedExpr(sge.Null(), None, is_omitted=True) for sub_expr in expr.inputs ) return self.compile_row_op(expr.op, inputs) diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py index bc328dfc4529..644e7bc365f7 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -84,7 +84,7 @@ def _(expr: TypedExpr) -> sge.Expression: @register_nary_op(ops.GoogleSqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression: - args = [] + args: list[sge.Expression] = [] for i, operand in enumerate(operands): if i < len(op.args): arg_spec = op.args[i] diff --git a/packages/bigframes/bigframes/core/expression.py b/packages/bigframes/bigframes/core/expression.py index e6fa983e9085..6c27dfc120b6 100644 --- a/packages/bigframes/bigframes/core/expression.py +++ b/packages/bigframes/bigframes/core/expression.py @@ -392,7 +392,7 @@ def bind_refs( self, bindings: Mapping[ids.ColumnId, Expression], allow_partial_bindings: bool = False, - ) -> UnboundVariableExpression: + ) -> OmittedArg: return self def bind_variables(