feat: add DataFrame.resample and Series.resample

tswast · tswast · commit d38e16be5024 · 2025-10-30T21:35:52.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -1985,6 +1985,11 @@ def _generate_resample_label(
             Literal["epoch", "start", "start_day", "end", "end_day"],
         ] = "start_day",
     ) -> Block:
+        if not isinstance(rule, str):
+            raise NotImplementedError(
+                f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
+            )
+
         # Validate and resolve the index or column to use for grouping
         if on is None:
             if len(self.index_columns) == 0:
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -4256,10 +4256,12 @@ def _split(
         return [DataFrame(block) for block in blocks]
 
     @validations.requires_ordering()
-    def _resample(
+    def resample(
         self,
         rule: str,
         *,
+        closed: Optional[Literal["right", "left"]] = None,
+        label: Optional[Literal["right", "left"]] = None,
         on: blocks.Label = None,
         level: Optional[LevelsType] = None,
         origin: Union[
@@ -4269,7 +4271,7 @@ def _resample(
             Literal["epoch", "start", "start_day", "end", "end_day"],
         ] = "start_day",
     ) -> bigframes.core.groupby.DataFrameGroupBy:
-        """Internal function to support resample. Resample time-series data.
+        """Resample time-series data.
 
         **Examples:**
 
@@ -4285,7 +4287,7 @@ def _resample(
         Resample on a DataFrame with index:
 
         >>> df = bpd.DataFrame(data).set_index("timestamp_col")
-        >>> df._resample(rule="7s").min()
+        >>> df.resample(rule="7s").min()
                              int64_col  int64_too
         2021-01-01 12:59:55          0         10
         2021-01-01 13:00:02          2         12
@@ -4298,7 +4300,7 @@ def _resample(
         Resample with column and origin set to 'start':
 
         >>> df = bpd.DataFrame(data)
-        >>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
+        >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min()
                              int64_col  int64_too
         2021-01-01 13:00:00          0         10
         2021-01-01 13:00:07          7         17
@@ -4311,6 +4313,14 @@ def _resample(
         Args:
             rule (str):
                 The offset string representing target conversion.
+            closed (Literal['right'] | Literal['left'] | None):
+                Which side of bin interval is closed. The default is 'left' for
+                all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA',
+                'BQE', and 'W' which all have a default of 'right'.
+            label (Literal['right'] | Literal['left'] | None):
+                Which bin edge label to label bucket with. The default is 'left'
+                for all frequency offsets except for 'ME', 'YE', 'QE', 'BME',
+                'BA', 'BQE', and 'W' which all have a default of 'right'.
             on (str, default None):
                 For a DataFrame, column to use instead of index for resampling. Column
                 must be datetime-like.
@@ -4327,6 +4337,8 @@ def _resample(
         """
         block = self._block._generate_resample_label(
             rule=rule,
+            closed=closed,
+            label=label,
             on=on,
             level=level,
             origin=origin,
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -2505,7 +2505,7 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
         )
 
     @validations.requires_ordering()
-    def _resample(
+    def resample(
         self,
         rule: str,
         *,
@@ -2531,7 +2531,7 @@ def _resample(
         ...     "int64_col": range(30),
         ... }
         >>> s = bpd.DataFrame(data).set_index("timestamp_col")
-        >>> s._resample(rule="7s", origin="epoch").min()
+        >>> s.resample(rule="7s", origin="epoch").min()
                              int64_col
         2021-01-01 12:59:56          0
         2021-01-01 13:00:03          3
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -5923,7 +5923,7 @@ def test_dataframe_explode_xfail(col_names):
         ),
     ],
 )
-def test__resample_with_column(
+def test_resample_with_column(
     scalars_df_index, scalars_pandas_df_index, on, rule, origin
 ):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
@@ -5943,30 +5943,51 @@ def test__resample_with_column(
     )
 
 
+@pytest.mark.parametrize("index_col", ["timestamp_col", "datetime_col"])
 @pytest.mark.parametrize(
-    ("append", "level", "col", "rule"),
+    ("index_append", "level"),
+    [(True, 1), (False, None), (False, 0)],
+)
+@pytest.mark.parametrize(
+    "rule",
     [
-        pytest.param(False, None, "timestamp_col", "100d"),
-        pytest.param(True, 1, "timestamp_col", "1200h"),
-        pytest.param(False, None, "datetime_col", "100d"),
+        # TODO(tswast): support timedeltas and dataoffsets
+        "100d",
+        "1200h",
     ],
 )
-def test__resample_with_index(
-    scalars_df_index, scalars_pandas_df_index, append, level, col, rule
+@pytest.mark.parametrize("closed", ["left", "right", None])
+@pytest.mark.parametrize("label", ["left", "right", None])
+@pytest.mark.parametrize(
+    "origin",
+    ["epoch", "start", "start_day", "end", "end_day"],
+)
+def test_resample_with_index(
+    scalars_df_index,
+    scalars_pandas_df_index,
+    index_append,
+    level,
+    index_col,
+    rule,
+    closed,
+    origin,
+    label,
 ):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df_index = scalars_df_index.set_index(col, append=append)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
+    scalars_df_index = scalars_df_index.set_index(index_col, append=index_append)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
+        index_col, append=index_append
+    )
     bf_result = (
         scalars_df_index[["int64_col", "int64_too"]]
-        ._resample(rule=rule, level=level)
+        .resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
         .min()
         .to_pandas()
     )
     pd_result = (
         scalars_pandas_df_index[["int64_col", "int64_too"]]
-        .resample(rule=rule, level=level)
+        .resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
         .min()
     )
     assert_pandas_df_equal(bf_result, pd_result)
@@ -6010,7 +6031,7 @@ def test__resample_with_index(
         ),
     ],
 )
-def test__resample_start_time(rule, origin, data):
+def test_resample_start_time(rule, origin, data):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -4856,14 +4856,14 @@ def test_series_explode_null(data):
         pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
     ],
 )
-def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
+def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
         "int64_col"
     ]
-    bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
+    bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
     pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
     pd.testing.assert_series_equal(bf_result, pd_result)
 
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
@@ -248,20 +248,24 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
         ),
     ],
 )
-def test__resample_with_index(unordered_session, rule, origin, data):
+def test_resample_with_index(unordered_session, rule, origin, data):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
     scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
     scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
     scalars_pandas_df_index.index.name = None
 
-    bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
-
+    bf_result = scalars_df_index.resample(rule=rule, origin=origin).min()
     pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
 
+    assert isinstance(bf_result.index, bpd.DatetimeIndex)
+    assert isinstance(pd_result.index, pd.DatetimeIndex)
     pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
+        bf_result.to_pandas(),
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
     )
 
 
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
@@ -42,6 +42,17 @@ def test_dataframe_repr_with_uninitialized_object():
     assert "DataFrame" in got
 
 
+@pytest.mark.parametrize("rule", [pd.DateOffset(weeks=1), pd.Timedelta(hours=8)])
+def test_dataframe_rule_not_implememented(
+    monkeypatch: pytest.MonkeyPatch,
+    rule,
+):
+    dataframe = mocks.create_dataframe(monkeypatch)
+
+    with pytest.raises(NotImplementedError, match="rule"):
+        dataframe.resample(rule=rule)
+
+
 def test_dataframe_setattr_with_uninitialized_object():
     """Ensures DataFrame can be subclassed without trying to set attributes as columns."""
     # Avoid calling __init__ since it might be called later in a subclass.
diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py
@@ -5006,14 +5006,14 @@ def test_series_explode_null(data):
         pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
     ],
 )
-def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
+def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
         "int64_col"
     ]
-    bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
+    bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
     pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
     pd.testing.assert_series_equal(bf_result, pd_result)