Skip to content
8 changes: 3 additions & 5 deletions packages/bigframes/bigframes/bigquery/_operations/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def st_area(
bigframes.pandas.Series:
Series of float representing the areas.
"""
series = series._apply_unary_op(ops.geo_area_op)
series = series._apply_nary_op(ops.googlesql.ST_AREA, [])
series.name = None
return series

Expand Down Expand Up @@ -223,7 +223,7 @@ def st_centroid(
bigframes.pandas.Series:
A series of geography objects representing the centroids.
"""
series = series._apply_unary_op(ops.geo_st_centroid_op)
series = series._apply_nary_op(ops.googlesql.ST_CENTROID, [])
series.name = None
return series

Expand Down Expand Up @@ -753,6 +753,4 @@ def st_simplify(
Returns:
a Series containing the simplified GEOGRAPHY data.
"""
return geography._apply_unary_op(
ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters)
)
return geography._apply_nary_op(ops.googlesql.ST_SIMPLIFY, [tolerance_meters])
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import bigframes.core.expression
from bigframes import dtypes
from bigframes import operations as ops
from bigframes.operations import googlesql


def rand() -> bigframes.core.col.Expression:
Expand Down Expand Up @@ -47,12 +48,9 @@ def rand() -> bigframes.core.col.Expression:
:func:`~bigframes.pandas.DataFrame.assign` and other methods. See
:func:`bigframes.pandas.col`.
"""
op = ops.SqlScalarOp(
_output_type=dtypes.FLOAT_DTYPE,
sql_template="RAND()",
is_deterministic=False,
return bigframes.core.col.Expression(
bigframes.core.expression.OpExpression(googlesql.RAND, ())
)
return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ()))


def hparam_range(min: float, max: float) -> bigframes.core.col.Expression:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import bigframes.core.nodes as nodes
import bigframes.core.ordering as bf_ordering
import bigframes.core.rewrite as rewrites
import bigframes.core.rewrite.schema_binding as schema_binding
from bigframes import dtypes, operations
from bigframes.core import bq_data, expression, pyarrow_utils
from bigframes.core.logging import data_types as data_type_logger
Expand Down Expand Up @@ -59,6 +60,11 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
if request.sort_rows:
result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
encoded_type_refs = data_type_logger.encode_type_refs(result_node)
# Have to bind schema as the final step before compilation.
# Probably, should defer even further
result_node = typing.cast(
nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)
)
sql = compile_result_node(result_node)
return configs.CompileResult(
sql,
Expand All @@ -72,6 +78,11 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node))
encoded_type_refs = data_type_logger.encode_type_refs(result_node)
# Have to bind schema as the final step before compilation.
# Probably, should defer even further
result_node = typing.cast(
nodes.ResultNode, schema_binding.bind_schema_to_tree(result_node)
)
sql = compile_result_node(result_node)
# Return the ordering iff no extra columns are needed to define the row order
if ordering is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@


# Geo Ops
@register_unary_op(ops.geo_area_op)
def geo_area_op_impl(x: ibis_types.Value):
return cast(ibis_types.GeoSpatialValue, x).area()


@register_unary_op(ops.geo_st_astext_op)
def geo_st_astext_op_impl(x: ibis_types.Value):
return cast(ibis_types.GeoSpatialValue, x).as_text()
Expand All @@ -55,11 +50,6 @@ def geo_st_buffer_op_impl(x: ibis_types.Value, op: ops.GeoStBufferOp):
)


@register_unary_op(ops.geo_st_centroid_op, pass_op=False)
def geo_st_centroid_op_impl(x: ibis_types.Value):
return cast(ibis_types.GeoSpatialValue, x).centroid()


@register_unary_op(ops.geo_st_convexhull_op, pass_op=False)
def geo_st_convexhull_op_impl(x: ibis_types.Value):
return st_convexhull(x)
Expand Down Expand Up @@ -132,12 +122,6 @@ def geo_st_regionstats_op_impl(
).to_expr()


@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp):
x = cast(ibis_types.GeoSpatialValue, x)
return st_simplify(x, op.tolerance_meters)


@register_unary_op(ops.geo_x_op)
def geo_x_op_impl(x: ibis_types.Value):
return cast(ibis_types.GeoSpatialValue, x).x()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
from typing import TYPE_CHECKING

import bigframes_vendored.ibis
import bigframes_vendored.ibis.expr.operations.generic as ibis_generic
import bigframes_vendored.ibis.expr.types as ibis_types

import bigframes.core.compile.ibis_types
import bigframes.core.expression as ex
from bigframes.core import agg_expressions, ordering
from bigframes.operations import googlesql as gsql_ops
from bigframes.operations import numeric_ops

if TYPE_CHECKING:
Expand Down Expand Up @@ -92,8 +94,20 @@ def _(
self.compile_expression(sub_expr, bindings)
for sub_expr in expression.inputs
]
if isinstance(expression.op, gsql_ops.GoogleSqlScalarOp):
return googlesql_scalar_op_impl(
*inputs, op=expression.op, output_type=expression.output_type
)
return self.compile_row_op(expression.op, inputs)

@compile_expression.register
def _(
self,
expression: ex.OmittedArg,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
return bigframes_vendored.ibis.omitted()

def compile_row_op(
self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value]
) -> ibis_types.Value:
Expand Down Expand Up @@ -278,3 +292,39 @@ def isnanornull(arg):
@scalar_op_compiler.register_unary_op(numeric_ops.isfinite_op)
def isfinite(arg):
return arg.isinf().negate() & arg.isnan().negate()


def googlesql_scalar_op_impl(
*operands: ibis_types.Value, op: ops.GoogleSqlScalarOp, output_type
):
final_operands: list[ibis_types.Value] = []
arg_templates = []
for i, operand in enumerate(operands):
if i < len(op.args):
arg_spec = op.args[i]
else:
assert op.args[-1].is_vararg, (
f"Too many arguments, for {op.sql_name}, expected {len(op.args)}"
)
arg_spec = op.args[-1]
if isinstance(operand.op(), ibis_generic.OmittedArg):
assert arg_spec.optional, f"Argument omitted, but not optional"
continue

target_idx = len(final_operands)
final_operands.append(operand)
if arg_spec.arg_name:
arg_templates.append(f"{arg_spec.arg_name} => {{{target_idx}}}")
else:
arg_templates.append(f"{{{target_idx}}}")
args_template = ", ".join(arg_templates)
sql_template = f"{op.sql_name}({args_template})"
return ibis_generic.SqlScalar(
sql_template,
values=tuple(
typing.cast(ibis_generic.Value, expr.op()) for expr in final_operands
),
output_type=bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(
output_type
),
).to_expr()
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ def _(self, expr: agg_exprs.WindowExpression) -> sge.Expression:

@compile_expression.register
def _(self, expr: ex.OpExpression) -> sge.Expression:
# Non-recursively compiles the children scalar expressions.
inputs = tuple(
TypedExpr(self.compile_expression(sub_expr), sub_expr.output_type)
if not isinstance(sub_expr, ex.OmittedArg)
else TypedExpr(sge.Null(), None, is_omitted=True)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing that this could cause problems at some point, but hopefully we'll catch that when we encounter such a case.

for sub_expr in expr.inputs
)
return self.compile_row_op(expr.op, inputs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,27 @@ def _(expr: TypedExpr) -> sge.Expression:
return sge.BitwiseNot(this=sge.paren(expr.expr))


@register_nary_op(ops.GoogleSqlScalarOp, pass_op=True)
def _(*operands: TypedExpr, op: ops.GoogleSqlScalarOp) -> sge.Expression:
args: list[sge.Expression] = []
for i, operand in enumerate(operands):
if i < len(op.args):
arg_spec = op.args[i]
else:
assert op.args[-1].is_vararg, (
f"Too many arguments, for {op.sql_name}, expected {len(op.args)}"
)
arg_spec = op.args[-1]
if operand.is_omitted:
assert arg_spec.optional, f"Argument omitted, but not optional"
continue
elif arg_spec.arg_name:
args.append(sge.Kwarg(this=arg_spec.arg_name, expression=operand.expr))
else:
args.append(operand.expr)
return sg.func(op.sql_name, *args)


@register_nary_op(ops.SqlScalarOp, pass_op=True)
def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression:
return sg.parse_one(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@
register_binary_op = expression_compiler.expression_compiler.register_binary_op


@register_unary_op(ops.geo_area_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("ST_AREA", expr.expr)


@register_unary_op(ops.geo_st_astext_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("ST_ASTEXT", expr.expr)
Expand All @@ -50,11 +45,6 @@ def _(expr: TypedExpr, op: ops.GeoStBufferOp) -> sge.Expression:
)


@register_unary_op(ops.geo_st_centroid_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("ST_CENTROID", expr.expr)


@register_unary_op(ops.geo_st_convexhull_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("ST_CONVEXHULL", expr.expr)
Expand Down Expand Up @@ -97,15 +87,6 @@ def _(
return sge.func("ST_REGIONSTATS", *args)


@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression:
return sge.func(
"ST_SIMPLIFY",
expr.expr,
sge.convert(op.tolerance_meters),
)


@register_unary_op(ops.geo_x_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("ST_X", expr.expr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ class TypedExpr:

expr: sge.Expression
dtype: dtypes.ExpressionType

# kludge to support optional args in argument lists
is_omitted: bool = False
50 changes: 50 additions & 0 deletions packages/bigframes/bigframes/core/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,56 @@ def output_type(self) -> dtypes.ExpressionType:
return self.dtype


@dataclasses.dataclass(frozen=True)
class OmittedArg(Expression):
"""Represents an omitted optional arg used calling a function."""

@property
def free_variables(self) -> typing.Tuple[Hashable, ...]:
return ()

@property
def is_const(self) -> bool:
return True

@property
def column_references(self) -> typing.Tuple[ids.ColumnId, ...]:
return ()

@property
def is_resolved(self):
return True # vacuously

@property
def output_type(self) -> dtypes.ExpressionType:
return None

def bind_refs(
self,
bindings: Mapping[ids.ColumnId, Expression],
allow_partial_bindings: bool = False,
) -> OmittedArg:
return self

def bind_variables(
self,
bindings: Mapping[Hashable, Expression],
allow_partial_bindings: bool = False,
) -> Expression:
return self

@property
def is_bijective(self) -> bool:
return True

@property
def is_identity(self) -> bool:
return True

def transform_children(self, t: Callable[[Expression], Expression]) -> Expression:
return self


@dataclasses.dataclass(frozen=True)
class OpExpression(Expression):
"""An expression representing a scalar operation applied to 1 or more argument sub-expressions."""
Expand Down
2 changes: 1 addition & 1 deletion packages/bigframes/bigframes/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def buffer(self: GeoSeries, distance: float) -> bigframes.series.Series: # type

@property
def centroid(self: GeoSeries) -> bigframes.series.Series: # type: ignore
return self._apply_unary_op(ops.geo_st_centroid_op)
return self._apply_nary_op(ops.googlesql.ST_CENTROID, [])

@property
def convex_hull(self: GeoSeries) -> bigframes.series.Series: # type: ignore
Expand Down
8 changes: 3 additions & 5 deletions packages/bigframes/bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,8 @@
GeoStLengthOp,
GeoStRegionStatsOp,
GeoStSimplifyOp,
geo_area_op,
geo_st_astext_op,
geo_st_boundary_op,
geo_st_centroid_op,
geo_st_convexhull_op,
geo_st_difference_op,
geo_st_geogfromtext_op,
Expand Down Expand Up @@ -231,6 +229,7 @@
timestamp_add_op,
timestamp_sub_op,
)
from bigframes.operations.googlesql import GoogleSqlScalarOp

__all__ = [
# Base ops
Expand Down Expand Up @@ -413,9 +412,7 @@
"euclidean_distance_op",
"manhattan_distance_op",
# Geo ops
"geo_area_op",
"geo_st_boundary_op",
"geo_st_centroid_op",
"geo_st_convexhull_op",
"geo_st_difference_op",
"geo_st_astext_op",
Expand All @@ -429,7 +426,6 @@
"GeoStDistanceOp",
"GeoStLengthOp",
"GeoStRegionStatsOp",
"GeoStSimplifyOp",
# AI ops
"AIClassify",
"AIGenerate",
Expand All @@ -446,4 +442,6 @@
"ToArrayOp",
"ArrayReduceOp",
"ArrayMapOp",
# GoogleSql
"GoogleSqlScalarOp",
]
Loading
Loading