From ed10ff7053623b2a3124408517ef1e4ccd0a6175 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 11 May 2026 14:41:14 +0000
Subject: [PATCH 1/2] feat: Add Series-inspired methods to Expression

Added missing Series-inspired mathematical, aggregation, cumulative, and
utility methods to `leanframe.core.expression.Expression`. This aligns the
deferred execution capabilities of `col()` with concrete column operations
exposed by `Series`. Methods correctly return a new `Expression` object.

Added accompanying unit tests.

Co-authored-by: tswast <247555+tswast@users.noreply.github.com>
---
 leanframe/core/expression.py  | 106 ++++++++++++++++++++++++++++++++++
 tests/unit/test_expression.py |  65 +++++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 tests/unit/test_expression.py

diff --git a/leanframe/core/expression.py b/leanframe/core/expression.py
index a80c284..b0d794d 100644
--- a/leanframe/core/expression.py
+++ b/leanframe/core/expression.py
@@ -18,6 +18,8 @@
 
 import ibis
 import ibis.expr.types as ibis_types
+import pandas as pd
+from leanframe.core.dtypes import convert_pandas_to_ibis
 
 
 def col(name: str) -> Expression:
@@ -60,3 +62,107 @@ def __ne__(self, other) -> Expression:  # type: ignore[override]
 
     def __eq__(self, other) -> Expression:  # type: ignore[override]
         return Expression(self._data == getattr(other, "_data", other))
+
+    def lt(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is less than the other."""
+        return self < other
+
+    def gt(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is greater than the other."""
+        return self > other
+
+    def le(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is less than or equal to the other."""
+        return self <= other
+
+    def ge(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is greater than or equal to the other."""
+        return self >= other
+
+    def ne(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is not equal to the other."""
+        return self != other
+
+    def eq(self, other) -> Expression:
+        """Return a boolean Expression showing whether each element is equal to the other."""
+        return self == other
+
+    def __round__(self, n=0) -> Expression:
+        return Expression(self._data.round(n))
+
+    def abs(self) -> Expression:
+        """Return an Expression with the absolute value of each element."""
+        return Expression(self._data.abs())
+
+    def all(self) -> Expression:
+        """Return whether all elements are True."""
+        return Expression(self._data.all())
+
+    def any(self) -> Expression:
+        """Return whether any element is True."""
+        return Expression(self._data.any())
+
+    def sum(self) -> Expression:
+        """Return the sum of the Expression."""
+        return Expression(self._data.sum())
+
+    def mean(self) -> Expression:
+        """Return the mean of the Expression."""
+        return Expression(self._data.mean())
+
+    def min(self) -> Expression:
+        """Return the min of the Expression."""
+        return Expression(self._data.min())
+
+    def max(self) -> Expression:
+        """Return the max of the Expression."""
+        return Expression(self._data.max())
+
+    def std(self) -> Expression:
+        """Return the std of the Expression."""
+        return Expression(self._data.std())
+
+    def var(self) -> Expression:
+        """Return the var of the Expression."""
+        return Expression(self._data.var())
+
+    def count(self) -> Expression:
+        """Return the number of non-null observations in the Expression."""
+        return Expression(self._data.count())
+
+    def cummax(self) -> Expression:
+        """Return an Expression with the cumulative maximum of each element."""
+        return Expression(self._data.cummax())
+
+    def cummin(self) -> Expression:
+        """Return an Expression with the cumulative minimum of each element."""
+        return Expression(self._data.cummin())
+
+    def cumprod(self) -> Expression:
+        """Return an Expression with the cumulative product of each element.
+
+        Note: This currently uses a `log().cumsum().exp()` workaround, which
+        may fail or return NaN if the data contains zeros or negative numbers.
+        """
+        return Expression(self._data.log().cumsum().exp().cast(self._data.type()))
+
+    def cumsum(self) -> Expression:
+        """Return an Expression with the cumulative sum of each element."""
+        return Expression(self._data.cumsum())
+
+    def diff(self) -> Expression:
+        """Return an Expression with the difference between each element and the previous element."""
+        return Expression(self._data - self._data.lag())
+
+    def copy(self) -> Expression:
+        """Return a copy of the Expression."""
+        return Expression(self._data)
+
+    def isin(self, values) -> Expression:
+        """Return a boolean Expression showing whether each element is contained in values."""
+        return Expression(self._data.isin(values))
+
+    def astype(self, dtype: pd.ArrowDtype) -> Expression:
+        """Cast an Expression to a specified dtype."""
+        ibis_type = convert_pandas_to_ibis(dtype)
+        return Expression(self._data.cast(ibis_type))
diff --git a/tests/unit/test_expression.py b/tests/unit/test_expression.py
new file mode 100644
index 0000000..a48194e
--- /dev/null
+++ b/tests/unit/test_expression.py
@@ -0,0 +1,65 @@
+# Copyright 2025 Google LLC, LeanFrame Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Expression methods."""
+
+import pandas as pd
+import pyarrow as pa
+from leanframe.core.expression import Expression, col
+
+def test_expression_comparison_methods():
+    a = col('a')
+    b = col('b')
+
+    assert isinstance(a.lt(b), Expression)
+    assert isinstance(a.gt(b), Expression)
+    assert isinstance(a.le(b), Expression)
+    assert isinstance(a.ge(b), Expression)
+    assert isinstance(a.ne(b), Expression)
+    assert isinstance(a.eq(b), Expression)
+
+def test_expression_math_methods():
+    a = col('a')
+
+    assert isinstance(round(a, 2), Expression)
+    assert isinstance(a.abs(), Expression)
+
+def test_expression_aggregation_methods():
+    a = col('a')
+
+    assert isinstance(a.all(), Expression)
+    assert isinstance(a.any(), Expression)
+    assert isinstance(a.sum(), Expression)
+    assert isinstance(a.mean(), Expression)
+    assert isinstance(a.min(), Expression)
+    assert isinstance(a.max(), Expression)
+    assert isinstance(a.std(), Expression)
+    assert isinstance(a.var(), Expression)
+    assert isinstance(a.count(), Expression)
+
+def test_expression_cumulative_methods():
+    a = col('a')
+
+    assert isinstance(a.cummax(), Expression)
+    assert isinstance(a.cummin(), Expression)
+    assert isinstance(a.cumprod(), Expression)
+    assert isinstance(a.cumsum(), Expression)
+    assert isinstance(a.diff(), Expression)
+
+def test_expression_utility_methods():
+    a = col('a')
+
+    assert isinstance(a.isin([1, 2, 3]), Expression)
+    assert isinstance(a.astype(pd.ArrowDtype(pa.int64())), Expression)
+    assert isinstance(a.copy(), Expression)

From 89b54ae19589711219ea1eb982e87df5a5c542f1 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 11 May 2026 15:40:32 +0000
Subject: [PATCH 2/2] feat: Add Series-inspired methods to Expression

Added missing Series-inspired mathematical, aggregation, cumulative, and
utility methods to `leanframe.core.expression.Expression`. This aligns the
deferred execution capabilities of `col()` with concrete column operations
exposed by `Series`. Methods correctly return a new `Expression` object.

Added robust unit tests that evaluate the deferred expressions over
an ephemeral duckdb backend using `assign` and `to_pandas`.

Co-authored-by: tswast <247555+tswast@users.noreply.github.com>
---
 tests/unit/test_expression.py | 164 ++++++++++++++++++++++++++--------
 1 file changed, 126 insertions(+), 38 deletions(-)

diff --git a/tests/unit/test_expression.py b/tests/unit/test_expression.py
index a48194e..cfb3f4a 100644
--- a/tests/unit/test_expression.py
+++ b/tests/unit/test_expression.py
@@ -12,54 +12,142 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for Expression methods."""
+"""Tests for Expression methods evaluation."""
 
+import ibis
 import pandas as pd
 import pyarrow as pa
-from leanframe.core.expression import Expression, col
+import pytest
+from leanframe import Session, col
 
-def test_expression_comparison_methods():
-    a = col('a')
-    b = col('b')
+@pytest.fixture
+def session():
+    """Return a Session connected to an in-memory duckdb."""
+    return Session(ibis.duckdb.connect())
 
-    assert isinstance(a.lt(b), Expression)
-    assert isinstance(a.gt(b), Expression)
-    assert isinstance(a.le(b), Expression)
-    assert isinstance(a.ge(b), Expression)
-    assert isinstance(a.ne(b), Expression)
-    assert isinstance(a.eq(b), Expression)
+@pytest.fixture
+def df(session):
+    """Return a test DataFrame."""
+    data = {
+        'a': [1, -2, 3, -4, 5],
+        'b': [5, 4, 3, 2, 1],
+    }
+    return session.read_ibis(ibis.memtable(data))
 
-def test_expression_math_methods():
-    a = col('a')
+def test_expression_comparison_methods(df):
+    result = df.assign(
+        lt=col('a').lt(col('b')),
+        gt=col('a').gt(col('b')),
+        le=col('a').le(col('b')),
+        ge=col('a').ge(col('b')),
+        ne=col('a').ne(col('b')),
+        eq=col('a').eq(col('b')),
+    ).to_pandas()
 
-    assert isinstance(round(a, 2), Expression)
-    assert isinstance(a.abs(), Expression)
+    pd.testing.assert_series_equal(
+        result['lt'], pd.Series([True, True, False, True, False], name='lt'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['gt'], pd.Series([False, False, False, False, True], name='gt'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['le'], pd.Series([True, True, True, True, False], name='le'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['ge'], pd.Series([False, False, True, False, True], name='ge'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['ne'], pd.Series([True, True, False, True, True], name='ne'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['eq'], pd.Series([False, False, True, False, False], name='eq'), check_dtype=False
+    )
 
-def test_expression_aggregation_methods():
-    a = col('a')
+def test_expression_math_methods(session):
+    df = session.read_ibis(ibis.memtable({'a': [1.5, -2.1, 3.8]}))
+    result = df.assign(
+        r=round(col('a')),
+        r1=round(col('a'), 1),
+        abs=col('a').abs()
+    ).to_pandas()
 
-    assert isinstance(a.all(), Expression)
-    assert isinstance(a.any(), Expression)
-    assert isinstance(a.sum(), Expression)
-    assert isinstance(a.mean(), Expression)
-    assert isinstance(a.min(), Expression)
-    assert isinstance(a.max(), Expression)
-    assert isinstance(a.std(), Expression)
-    assert isinstance(a.var(), Expression)
-    assert isinstance(a.count(), Expression)
+    pd.testing.assert_series_equal(
+        result['r'], pd.Series([2.0, -2.0, 4.0], name='r'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['r1'], pd.Series([1.5, -2.1, 3.8], name='r1'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['abs'], pd.Series([1.5, 2.1, 3.8], name='abs'), check_dtype=False
+    )
 
-def test_expression_cumulative_methods():
-    a = col('a')
+def test_expression_aggregation_methods(session):
+    df = session.read_ibis(ibis.memtable({'a': [1, 2, 3], 'b': [True, False, True]}))
+    result = df.assign(
+        all_b=col('b').all(),
+        any_b=col('b').any(),
+        sum_a=col('a').sum(),
+        mean_a=col('a').mean(),
+        min_a=col('a').min(),
+        max_a=col('a').max(),
+        count_a=col('a').count()
+    ).to_pandas()
 
-    assert isinstance(a.cummax(), Expression)
-    assert isinstance(a.cummin(), Expression)
-    assert isinstance(a.cumprod(), Expression)
-    assert isinstance(a.cumsum(), Expression)
-    assert isinstance(a.diff(), Expression)
+    pd.testing.assert_series_equal(
+        result['all_b'], pd.Series([False, False, False], name='all_b'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['any_b'], pd.Series([True, True, True], name='any_b'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['sum_a'], pd.Series([6, 6, 6], name='sum_a'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['mean_a'], pd.Series([2.0, 2.0, 2.0], name='mean_a'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['min_a'], pd.Series([1, 1, 1], name='min_a'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['max_a'], pd.Series([3, 3, 3], name='max_a'), check_dtype=False
+    )
 
-def test_expression_utility_methods():
-    a = col('a')
+def test_expression_cumulative_methods(session):
+    df = session.read_ibis(ibis.memtable({'a': [1, 2, 3]}))
+    result = df.assign(
+        cummax=col('a').cummax(),
+        cummin=col('a').cummin(),
+        cumsum=col('a').cumsum(),
+        cumprod=col('a').cumprod(),
+        diff=col('a').diff()
+    ).to_pandas()
 
-    assert isinstance(a.isin([1, 2, 3]), Expression)
-    assert isinstance(a.astype(pd.ArrowDtype(pa.int64())), Expression)
-    assert isinstance(a.copy(), Expression)
+    pd.testing.assert_series_equal(
+        result['cummax'], pd.Series([1, 2, 3], name='cummax'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['cummin'], pd.Series([1, 1, 1], name='cummin'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['cumsum'], pd.Series([1, 3, 6], name='cumsum'), check_dtype=False
+    )
+    # cumprod uses log trick, check approximate match. 1*1, 1*2, 2*3 = 1, 2, 6.
+    pd.testing.assert_series_equal(
+        result['cumprod'].round(), pd.Series([1.0, 2.0, 6.0], name='cumprod'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['diff'], pd.Series([float('nan'), 1.0, 1.0], name='diff'), check_dtype=False
+    )
+
+def test_expression_utility_methods(df):
+    result = df.assign(
+        isin=col('a').isin([1, 3]),
+        cast=col('a').astype(pd.ArrowDtype(pa.float64()))
+    ).to_pandas()
+
+    pd.testing.assert_series_equal(
+        result['isin'], pd.Series([True, False, True, False, False], name='isin'), check_dtype=False
+    )
+    pd.testing.assert_series_equal(
+        result['cast'], pd.Series([1.0, -2.0, 3.0, -4.0, 5.0], name='cast'), check_dtype=False
+    )