Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit d38e16b

Browse files
committed
feat: add DataFrame.resample and Series.resample
1 parent d97cafc commit d38e16b

8 files changed

Lines changed: 79 additions & 26 deletions

File tree

bigframes/core/blocks.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1985,6 +1985,11 @@ def _generate_resample_label(
19851985
Literal["epoch", "start", "start_day", "end", "end_day"],
19861986
] = "start_day",
19871987
) -> Block:
1988+
if not isinstance(rule, str):
1989+
raise NotImplementedError(
1990+
f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
1991+
)
1992+
19881993
# Validate and resolve the index or column to use for grouping
19891994
if on is None:
19901995
if len(self.index_columns) == 0:

bigframes/dataframe.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4256,10 +4256,12 @@ def _split(
42564256
return [DataFrame(block) for block in blocks]
42574257

42584258
@validations.requires_ordering()
4259-
def _resample(
4259+
def resample(
42604260
self,
42614261
rule: str,
42624262
*,
4263+
closed: Optional[Literal["right", "left"]] = None,
4264+
label: Optional[Literal["right", "left"]] = None,
42634265
on: blocks.Label = None,
42644266
level: Optional[LevelsType] = None,
42654267
origin: Union[
@@ -4269,7 +4271,7 @@ def _resample(
42694271
Literal["epoch", "start", "start_day", "end", "end_day"],
42704272
] = "start_day",
42714273
) -> bigframes.core.groupby.DataFrameGroupBy:
4272-
"""Internal function to support resample. Resample time-series data.
4274+
"""Resample time-series data.
42734275
42744276
**Examples:**
42754277
@@ -4285,7 +4287,7 @@ def _resample(
42854287
Resample on a DataFrame with index:
42864288
42874289
>>> df = bpd.DataFrame(data).set_index("timestamp_col")
4288-
>>> df._resample(rule="7s").min()
4290+
>>> df.resample(rule="7s").min()
42894291
int64_col int64_too
42904292
2021-01-01 12:59:55 0 10
42914293
2021-01-01 13:00:02 2 12
@@ -4298,7 +4300,7 @@ def _resample(
42984300
Resample with column and origin set to 'start':
42994301
43004302
>>> df = bpd.DataFrame(data)
4301-
>>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
4303+
>>> df.resample(rule="7s", on = "timestamp_col", origin="start").min()
43024304
int64_col int64_too
43034305
2021-01-01 13:00:00 0 10
43044306
2021-01-01 13:00:07 7 17
@@ -4311,6 +4313,14 @@ def _resample(
43114313
Args:
43124314
rule (str):
43134315
The offset string representing target conversion.
4316+
closed (Literal['right'] | Literal['left'] | None):
4317+
Which side of bin interval is closed. The default is 'left' for
4318+
all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA',
4319+
'BQE', and 'W' which all have a default of 'right'.
4320+
label (Literal['right'] | Literal['left'] | None):
4321+
Which bin edge label to label bucket with. The default is 'left'
4322+
for all frequency offsets except for 'ME', 'YE', 'QE', 'BME',
4323+
'BA', 'BQE', and 'W' which all have a default of 'right'.
43144324
on (str, default None):
43154325
For a DataFrame, column to use instead of index for resampling. Column
43164326
must be datetime-like.
@@ -4327,6 +4337,8 @@ def _resample(
43274337
"""
43284338
block = self._block._generate_resample_label(
43294339
rule=rule,
4340+
closed=closed,
4341+
label=label,
43304342
on=on,
43314343
level=level,
43324344
origin=origin,

bigframes/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,7 +2505,7 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
25052505
)
25062506

25072507
@validations.requires_ordering()
2508-
def _resample(
2508+
def resample(
25092509
self,
25102510
rule: str,
25112511
*,
@@ -2531,7 +2531,7 @@ def _resample(
25312531
... "int64_col": range(30),
25322532
... }
25332533
>>> s = bpd.DataFrame(data).set_index("timestamp_col")
2534-
>>> s._resample(rule="7s", origin="epoch").min()
2534+
>>> s.resample(rule="7s", origin="epoch").min()
25352535
int64_col
25362536
2021-01-01 12:59:56 0
25372537
2021-01-01 13:00:03 3

tests/system/small/test_dataframe.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5923,7 +5923,7 @@ def test_dataframe_explode_xfail(col_names):
59235923
),
59245924
],
59255925
)
5926-
def test__resample_with_column(
5926+
def test_resample_with_column(
59275927
scalars_df_index, scalars_pandas_df_index, on, rule, origin
59285928
):
59295929
# TODO: supply a reason why this isn't compatible with pandas 1.x
@@ -5943,30 +5943,51 @@ def test__resample_with_column(
59435943
)
59445944

59455945

5946+
@pytest.mark.parametrize("index_col", ["timestamp_col", "datetime_col"])
59465947
@pytest.mark.parametrize(
5947-
("append", "level", "col", "rule"),
5948+
("index_append", "level"),
5949+
[(True, 1), (False, None), (False, 0)],
5950+
)
5951+
@pytest.mark.parametrize(
5952+
"rule",
59485953
[
5949-
pytest.param(False, None, "timestamp_col", "100d"),
5950-
pytest.param(True, 1, "timestamp_col", "1200h"),
5951-
pytest.param(False, None, "datetime_col", "100d"),
5954+
# TODO(tswast): support timedeltas and dataoffsets
5955+
"100d",
5956+
"1200h",
59525957
],
59535958
)
5954-
def test__resample_with_index(
5955-
scalars_df_index, scalars_pandas_df_index, append, level, col, rule
5959+
@pytest.mark.parametrize("closed", ["left", "right", None])
5960+
@pytest.mark.parametrize("label", ["left", "right", None])
5961+
@pytest.mark.parametrize(
5962+
"origin",
5963+
["epoch", "start", "start_day", "end", "end_day"],
5964+
)
5965+
def test_resample_with_index(
5966+
scalars_df_index,
5967+
scalars_pandas_df_index,
5968+
index_append,
5969+
level,
5970+
index_col,
5971+
rule,
5972+
closed,
5973+
origin,
5974+
label,
59565975
):
59575976
# TODO: supply a reason why this isn't compatible with pandas 1.x
59585977
pytest.importorskip("pandas", minversion="2.0.0")
5959-
scalars_df_index = scalars_df_index.set_index(col, append=append)
5960-
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
5978+
scalars_df_index = scalars_df_index.set_index(index_col, append=index_append)
5979+
scalars_pandas_df_index = scalars_pandas_df_index.set_index(
5980+
index_col, append=index_append
5981+
)
59615982
bf_result = (
59625983
scalars_df_index[["int64_col", "int64_too"]]
5963-
._resample(rule=rule, level=level)
5984+
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
59645985
.min()
59655986
.to_pandas()
59665987
)
59675988
pd_result = (
59685989
scalars_pandas_df_index[["int64_col", "int64_too"]]
5969-
.resample(rule=rule, level=level)
5990+
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
59705991
.min()
59715992
)
59725993
assert_pandas_df_equal(bf_result, pd_result)
@@ -6010,7 +6031,7 @@ def test__resample_with_index(
60106031
),
60116032
],
60126033
)
6013-
def test__resample_start_time(rule, origin, data):
6034+
def test_resample_start_time(rule, origin, data):
60146035
# TODO: supply a reason why this isn't compatible with pandas 1.x
60156036
pytest.importorskip("pandas", minversion="2.0.0")
60166037
col = "timestamp_col"

tests/system/small/test_series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4856,14 +4856,14 @@ def test_series_explode_null(data):
48564856
pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
48574857
],
48584858
)
4859-
def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
4859+
def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
48604860
# TODO: supply a reason why this isn't compatible with pandas 1.x
48614861
pytest.importorskip("pandas", minversion="2.0.0")
48624862
scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
48634863
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
48644864
"int64_col"
48654865
]
4866-
bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
4866+
bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
48674867
pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
48684868
pd.testing.assert_series_equal(bf_result, pd_result)
48694869

tests/system/small/test_unordered.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,20 +248,24 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
248248
),
249249
],
250250
)
251-
def test__resample_with_index(unordered_session, rule, origin, data):
251+
def test_resample_with_index(unordered_session, rule, origin, data):
252252
# TODO: supply a reason why this isn't compatible with pandas 1.x
253253
pytest.importorskip("pandas", minversion="2.0.0")
254254
col = "timestamp_col"
255255
scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
256256
scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
257257
scalars_pandas_df_index.index.name = None
258258

259-
bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
260-
259+
bf_result = scalars_df_index.resample(rule=rule, origin=origin).min()
261260
pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
262261

262+
assert isinstance(bf_result.index, bpd.DatetimeIndex)
263+
assert isinstance(pd_result.index, pd.DatetimeIndex)
263264
pd.testing.assert_frame_equal(
264-
bf_result, pd_result, check_dtype=False, check_index_type=False
265+
bf_result.to_pandas(),
266+
pd_result,
267+
check_index_type=False,
268+
check_dtype=False,
265269
)
266270

267271

tests/unit/test_dataframe.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@ def test_dataframe_repr_with_uninitialized_object():
4242
assert "DataFrame" in got
4343

4444

45+
@pytest.mark.parametrize("rule", [pd.DateOffset(weeks=1), pd.Timedelta(hours=8)])
46+
def test_dataframe_rule_not_implememented(
47+
monkeypatch: pytest.MonkeyPatch,
48+
rule,
49+
):
50+
dataframe = mocks.create_dataframe(monkeypatch)
51+
52+
with pytest.raises(NotImplementedError, match="rule"):
53+
dataframe.resample(rule=rule)
54+
55+
4556
def test_dataframe_setattr_with_uninitialized_object():
4657
"""Ensures DataFrame can be subclassed without trying to set attributes as columns."""
4758
# Avoid calling __init__ since it might be called later in a subclass.

tests/unit/test_series_polars.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5006,14 +5006,14 @@ def test_series_explode_null(data):
50065006
pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
50075007
],
50085008
)
5009-
def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
5009+
def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
50105010
# TODO: supply a reason why this isn't compatible with pandas 1.x
50115011
pytest.importorskip("pandas", minversion="2.0.0")
50125012
scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
50135013
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
50145014
"int64_col"
50155015
]
5016-
bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
5016+
bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
50175017
pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
50185018
pd.testing.assert_series_equal(bf_result, pd_result)
50195019

0 commit comments

Comments
 (0)