Skip to content
807 changes: 641 additions & 166 deletions packages/bigframes/bigframes/core/bytecode.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,25 @@ def numeric_to_datetime(
)


@scalar_op_compiler.register_unary_op(ops.coerce_to_bool_op)
def coerce_to_bool_op_impl(x: ibis_types.Value):
x_type = x.type()
if x_type.is_boolean():
res = x
elif x_type.is_numeric():
res = x != 0 # type: ignore
elif x_type.is_string():
res = x.length() > 0 # type: ignore
elif x_type.is_binary():
res = x.length() > 0 # type: ignore
elif isinstance(x_type, ibis_dtypes.Array):
res = x.length() > 0 # type: ignore
else:
res = x.notnull()

return res.fill_null(False) # type: ignore


@scalar_op_compiler.register_unary_op(ops.AsTypeOp, pass_op=True)
def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
to_type = bigframes.core.compile.ibis_types.bigframes_dtype_to_ibis_dtype(
Expand Down
22 changes: 22 additions & 0 deletions packages/bigframes/bigframes/core/compile/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,28 @@ def _(
) -> pl.Expr:
return pl.when(condition).then(original).otherwise(otherwise)

@compile_op.register(gen_ops.CoerceToBoolOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, gen_ops.CoerceToBoolOp)
from_type = self._expr_types.get(id(input))
if from_type is None:
return input.cast(pl.Boolean).fill_null(False)

if from_type == bigframes.dtypes.BOOL_DTYPE:
res = input
elif bigframes.dtypes.is_numeric(from_type):
res = input != 0
elif from_type == bigframes.dtypes.BYTES_DTYPE:
res = input.bin.size() > 0
elif bigframes.dtypes.is_string_like(from_type):
res = input.str.len_chars() > 0
elif bigframes.dtypes.is_array_like(from_type):
res = input.list.len() > 0
else:
res = input.is_not_null()

return res.fill_null(False)

@compile_op.register(gen_ops.AsTypeOp)
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
assert isinstance(op, gen_ops.AsTypeOp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,28 @@ def _(expr: TypedExpr) -> sge.Expression:
)


@register_unary_op(ops.coerce_to_bool_op)
def _(expr: TypedExpr) -> sge.Expression:
from_type = expr.dtype
sg_expr = expr.expr

if from_type == dtypes.BOOL_DTYPE:
res = sg_expr
elif dtypes.is_numeric(from_type):
res = sge.NEQ(this=sg_expr, expression=sge.convert(0))
elif dtypes.is_string_like(from_type):
res = sge.GT(this=sge.func("LENGTH", sg_expr), expression=sge.convert(0))
elif dtypes.is_array_like(from_type):
res = sge.GT(this=sge.func("ARRAY_LENGTH", sg_expr), expression=sge.convert(0))
else:
res = sge.Is(
this=sge.paren(sg_expr, copy=False),
expression=sg.not_(sge.Null(), copy=False),
)

return sge.Coalesce(this=res, expressions=[sge.convert(False)])


@register_ternary_op(ops.where_op)
def _(
original: TypedExpr, condition: TypedExpr, replacement: TypedExpr
Expand Down
10 changes: 9 additions & 1 deletion packages/bigframes/bigframes/core/py_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@
const,
deref,
)
from bigframes.operations import NUMPY_TO_BINOP, NUMPY_TO_OP, generic_ops, numeric_ops
from bigframes.operations import (
NUMPY_TO_BINOP,
NUMPY_TO_OP,
ScalarOp,
generic_ops,
numeric_ops,
)

_CALLABLE_TO_OP = {
**NUMPY_TO_OP,
Expand Down Expand Up @@ -365,6 +371,8 @@ def resolve_call(call: Call) -> Expression:
op = _CALLABLE_TO_OP[fn]
return OpExpression(op, call.inputs)
elif isinstance(callable, PyObject):
if isinstance(callable.value, ScalarOp):
return OpExpression(callable.value, call.inputs)
if callable.value in python_op_maps.PYTHON_TO_BIGFRAMES:
op = python_op_maps.PYTHON_TO_BIGFRAMES[callable.value] # type: ignore
return OpExpression(op, call.inputs)
Expand Down
7 changes: 6 additions & 1 deletion packages/bigframes/bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,12 @@ def is_clusterable(type_: ExpressionType) -> bool:

def is_bool_coercable(type_: ExpressionType) -> bool:
# TODO: Implement more bool coercions
return (type_ is None) or is_numeric(type_) or is_string_like(type_)
return (
(type_ is None)
or is_numeric(type_)
or is_string_like(type_)
or is_array_like(type_)
)


BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {
Expand Down
4 changes: 4 additions & 0 deletions packages/bigframes/bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,15 @@
from bigframes.operations.generic_ops import (
AsTypeOp,
CaseWhenOp,
CoerceToBoolOp,
IsInOp,
MapOp,
RowKey,
SqlScalarOp,
case_when_op,
clip_op,
coalesce_op,
coerce_to_bool_op,
fillna_op,
hash_op,
invert_op,
Expand Down Expand Up @@ -255,6 +257,8 @@
"maximum_op",
"minimum_op",
"notnull_op",
"CoerceToBoolOp",
"coerce_to_bool_op",
"RowKey",
"SqlScalarOp",
"where_op",
Expand Down
15 changes: 15 additions & 0 deletions packages/bigframes/bigframes/operations/generic_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@
)
notnull_op = NotNullOp()


# Semantics match Python's truth value testing (truthy and falsey objects).
# See https://docs.python.org/3/library/stdtypes.html#truth-value-testing
CoerceToBoolOp = base_ops.create_unary_op(
Comment thread
tswast marked this conversation as resolved.
name="coerce_to_bool",
type_signature=op_typing.FixedOutputType(
dtypes.is_bool_coercable, dtypes.BOOL_DTYPE, description="coercable to bool"
),
)
CoerceToBoolOp.__doc__ = (
"Coerce a value to a boolean, matching Python's truth value testing semantics "
"(truthy/falsey). See https://docs.python.org/3/library/stdtypes.html#truth-value-testing"
)
coerce_to_bool_op = CoerceToBoolOp()

HashOp = base_ops.create_unary_op(
name="hash",
type_signature=op_typing.FixedOutputType(
Expand Down
3 changes: 3 additions & 0 deletions packages/bigframes/bigframes/operations/python_op_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
array_ops,
bool_ops,
comparison_ops,
generic_ops,
numeric_ops,
string_ops,
)
Expand All @@ -47,6 +48,8 @@
operator.and_: bool_ops.and_op,
operator.or_: bool_ops.or_op,
operator.xor: bool_ops.xor_op,
operator.invert: generic_ops.invert_op,
operator.not_: generic_ops.invert_op,
## math
math.log: numeric_ops.ln_op,
math.log10: numeric_ops.log10_op,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,39 @@ def test_engines_notnull_op(scalars_array_value: array_value.ArrayValue, engine)
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)


@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
def test_engines_coerce_to_bool_op_scalars(
scalars_array_value: array_value.ArrayValue, engine
):
arr, _ = scalars_array_value.compute_values(
[
ops.coerce_to_bool_op.as_expr(expression.deref("bool_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("int64_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("float64_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("string_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("bytes_col")),
]
)

assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)


@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
def test_engines_coerce_to_bool_op_arrays(
arrays_array_value: array_value.ArrayValue, engine
):
arr, _ = arrays_array_value.compute_values(
[
ops.coerce_to_bool_op.as_expr(expression.deref("int_list_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("bool_list_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("float_list_col")),
ops.coerce_to_bool_op.as_expr(expression.deref("string_list_col")),
]
)

assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)


@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
def test_engines_invert_op(scalars_array_value: array_value.ArrayValue, engine):
arr, _ = scalars_array_value.compute_values(
Expand Down
Loading
Loading