Skip to content

Commit f6d9544

Browse files
stewjbclaude
andcommitted
Test: add temporal, empty, and multi-chunk edge-case coverage
New tests for the export/import paths in sbdf.pyx: - test_temporal_nulls_roundtrip: mixed-null datetime/date/time/timespan columns - test_negative_timespans: negative timedelta values including sub-ms precision - test_pre_epoch_dates: dates across full year-1..9999 range (regression for the pd.to_datetime → np.asarray fix that handles pre-Timestamp dates) - test_pre_epoch_datetimes: datetimes before Unix epoch - test_time_edge_cases: midnight, end-of-day, microsecond truncation - test_all_null_temporal_columns: all-NaT datetime64/timedelta64 columns - test_numpy_datetime_with_nulls: NaT at specific positions in datetime64[ms] - test_numpy_timedelta_with_nulls: NaT at specific positions in timedelta64[ms] - test_empty_dataframe: 0-row export for bool/int/float/datetime/timedelta/string - test_multichunk_export: 100,001-row export forces a second SBDF row slice Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0946c9e commit f6d9544

1 file changed

Lines changed: 191 additions & 0 deletions

File tree

spotfire/test/test_sbdf.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,197 @@ def test_export_import_unicode_path(self):
535535
self.assertEqual(dataframe["col"].dtype, "int64")
536536
self.assertEqual(dataframe["txt"].dtype, "object")
537537

538+
def test_temporal_nulls_roundtrip(self):
539+
"""Verify that mixed-null temporal columns survive export/import with correct positions."""
540+
dt = datetime.datetime
541+
d = datetime.date
542+
t = datetime.time
543+
td = datetime.timedelta
544+
545+
cases = {
546+
"datetime": [dt(2020, 6, 15, 12, 0, 0), None, dt(1969, 7, 20, 20, 17, 0)],
547+
"date": [d(2020, 6, 15), None, d(1969, 7, 20)],
548+
"time": [t(12, 0, 0), None, t(20, 17, 0)],
549+
"timespan": [td(days=1), None, td(seconds=30)],
550+
}
551+
for col_name, values in cases.items():
552+
with self.subTest(type=col_name):
553+
dataframe = pd.DataFrame({"x": values})
554+
new_df = self._roundtrip_dataframe(dataframe)
555+
self.assertFalse(pd.isnull(new_df.at[0, "x"]), "row 0 should not be null")
556+
self.assertTrue(pd.isnull(new_df.at[1, "x"]), "row 1 should be null")
557+
self.assertFalse(pd.isnull(new_df.at[2, "x"]), "row 2 should not be null")
558+
559+
def test_negative_timespans(self):
560+
"""Verify that negative timedelta values round-trip correctly."""
561+
cases = [
562+
datetime.timedelta(seconds=-1),
563+
datetime.timedelta(days=-1),
564+
datetime.timedelta(days=-5, seconds=300),
565+
datetime.timedelta(milliseconds=-1),
566+
datetime.timedelta(days=-1, seconds=86399, microseconds=999000), # -1 ms
567+
]
568+
dataframe = pd.DataFrame({"x": cases})
569+
new_df = self._roundtrip_dataframe(dataframe)
570+
for i, expected in enumerate(cases):
571+
with self.subTest(i=i, value=expected):
572+
got = new_df.at[i, "x"]
573+
# SBDF has millisecond resolution; truncate expected to ms
574+
expected_ms = datetime.timedelta(milliseconds=expected // datetime.timedelta(milliseconds=1))
575+
self.assertEqual(got, expected_ms)
576+
577+
def test_pre_epoch_dates(self):
578+
"""Verify that dates before the Unix epoch (1970-01-01) round-trip correctly."""
579+
cases = [
580+
datetime.date(1, 1, 1), # SBDF epoch
581+
datetime.date(1582, 10, 4), # day before Gregorian calendar
582+
datetime.date(1969, 12, 31), # one day before Unix epoch
583+
datetime.date(1970, 1, 1), # Unix epoch
584+
datetime.date(1970, 1, 2), # one day after Unix epoch
585+
datetime.date(9999, 12, 31), # max Python date
586+
]
587+
dataframe = pd.DataFrame({"x": cases})
588+
new_df = self._roundtrip_dataframe(dataframe)
589+
for i, expected in enumerate(cases):
590+
with self.subTest(date=expected):
591+
self.assertEqual(new_df.at[i, "x"], expected)
592+
593+
def test_pre_epoch_datetimes(self):
594+
"""Verify that datetimes before the Unix epoch round-trip correctly."""
595+
cases = [
596+
datetime.datetime(1, 1, 1, 0, 0, 0),
597+
datetime.datetime(1969, 12, 31, 23, 59, 59),
598+
datetime.datetime(1969, 12, 31, 0, 0, 0),
599+
]
600+
dataframe = pd.DataFrame({"x": cases})
601+
new_df = self._roundtrip_dataframe(dataframe)
602+
for i, expected in enumerate(cases):
603+
with self.subTest(dt=expected):
604+
self.assertEqual(new_df.at[i, "x"], expected)
605+
606+
def test_time_edge_cases(self):
607+
"""Verify midnight, end-of-day, and microsecond-precision time values."""
608+
cases = [
609+
(datetime.time(0, 0, 0), datetime.time(0, 0, 0)),
610+
(datetime.time(23, 59, 59, 999000), datetime.time(23, 59, 59, 999000)),
611+
(datetime.time(12, 30, 45, 500), datetime.time(12, 30, 45, 0)), # sub-ms truncated
612+
(datetime.time(0, 0, 0, 1000), datetime.time(0, 0, 0, 1000)), # 1 ms exactly
613+
]
614+
for input_val, expected in cases:
615+
with self.subTest(time=input_val):
616+
dataframe = pd.DataFrame({"x": [input_val]})
617+
new_df = self._roundtrip_dataframe(dataframe)
618+
self.assertEqual(new_df.at[0, "x"], expected)
619+
620+
def test_all_null_temporal_columns(self):
621+
"""Verify that all-null columns of each temporal type export and import without error."""
622+
for spotfire_type, dtype in [("DateTime", "datetime64[ms]"),
623+
("TimeSpan", "timedelta64[ms]")]:
624+
with self.subTest(type=spotfire_type):
625+
dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], # type: ignore[call-overload]
626+
dtype=dtype)})
627+
new_df = self._roundtrip_dataframe(dataframe)
628+
self.assertEqual(len(new_df), 3)
629+
self.assertTrue(new_df["x"].isna().all())
630+
631+
def test_numpy_datetime_with_nulls(self):
632+
"""Verify that numpy datetime64 columns with NaT values export and import correctly."""
633+
values = pd.array([
634+
pd.NaT,
635+
pd.Timestamp("2020-01-01"),
636+
pd.NaT,
637+
pd.Timestamp("1969-07-20"),
638+
pd.NaT,
639+
], dtype="datetime64[ms]")
640+
dataframe = pd.DataFrame({"x": values})
641+
new_df = self._roundtrip_dataframe(dataframe)
642+
self.assertTrue(pd.isnull(new_df.at[0, "x"]))
643+
self.assertEqual(new_df.at[1, "x"], datetime.datetime(2020, 1, 1))
644+
self.assertTrue(pd.isnull(new_df.at[2, "x"]))
645+
self.assertEqual(new_df.at[3, "x"], datetime.datetime(1969, 7, 20))
646+
self.assertTrue(pd.isnull(new_df.at[4, "x"]))
647+
648+
def test_numpy_timedelta_with_nulls(self):
649+
"""Verify that numpy timedelta64 columns with NaT values export and import correctly."""
650+
values = pd.array([ # type: ignore[call-overload]
651+
pd.NaT,
652+
pd.Timedelta(days=1),
653+
pd.NaT,
654+
pd.Timedelta(seconds=-30),
655+
pd.NaT,
656+
], dtype="timedelta64[ms]")
657+
dataframe = pd.DataFrame({"x": values})
658+
new_df = self._roundtrip_dataframe(dataframe)
659+
self.assertTrue(pd.isnull(new_df.at[0, "x"]))
660+
self.assertEqual(new_df.at[1, "x"], datetime.timedelta(days=1))
661+
self.assertTrue(pd.isnull(new_df.at[2, "x"]))
662+
self.assertEqual(new_df.at[3, "x"], datetime.timedelta(seconds=-30))
663+
self.assertTrue(pd.isnull(new_df.at[4, "x"]))
664+
665+
def test_empty_dataframe(self):
666+
"""Verify 0-row DataFrames export and import correctly for all column types.
667+
668+
Exercises the zero-size array code paths that boundscheck=False leaves unchecked,
669+
ensuring no off-by-one occurs at the loop boundary when row_count is 0.
670+
"""
671+
cases = [
672+
("bool", pd.DataFrame({"x": pd.array([], dtype="bool")})),
673+
("int64", pd.DataFrame({"x": pd.array([], dtype="int64")})),
674+
("float64", pd.DataFrame({"x": pd.array([], dtype="float64")})),
675+
("datetime64[ms]", pd.DataFrame({"x": pd.array([], dtype="datetime64[ms]")})),
676+
("timedelta64[ms]", pd.DataFrame({"x": pd.array([], dtype="timedelta64[ms]")})),
677+
]
678+
for label, dataframe in cases:
679+
with self.subTest(dtype=label):
680+
new_df = self._roundtrip_dataframe(dataframe)
681+
self.assertEqual(len(new_df), 0)
682+
self.assertIn("x", new_df.columns)
683+
# String requires an explicit type annotation when the column is empty (no values to infer from)
684+
str_df = pd.DataFrame({"x": pd.Series([], dtype=object)})
685+
spotfire.set_spotfire_types(str_df, {"x": "String"})
686+
with self.subTest(dtype="string"):
687+
new_df = self._roundtrip_dataframe(str_df)
688+
self.assertEqual(len(new_df), 0)
689+
self.assertIn("x", new_df.columns)
690+
691+
def test_multichunk_export(self):
692+
"""Verify exports spanning multiple SBDF row slices produce correct values.
693+
694+
The default slice size is ``100_000 // num_columns`` rows, so a 100_001-row
695+
single-column DataFrame forces a second slice (start=100_000, count=1).
696+
This exercises direct ``[start+i]`` indexing and pointer arithmetic for
697+
precomputed int64 paths, both unchecked under boundscheck=False.
698+
"""
699+
n = 100_001
700+
# time column
701+
times = [datetime.time(0, 0, 0)] * n
702+
times[-1] = datetime.time(23, 59, 58)
703+
dataframe = pd.DataFrame({"t": times})
704+
new_df = self._roundtrip_dataframe(dataframe)
705+
self.assertEqual(len(new_df), n)
706+
self.assertEqual(new_df.at[0, "t"], datetime.time(0, 0, 0))
707+
self.assertEqual(new_df.at[n - 1, "t"], datetime.time(23, 59, 58))
708+
# date column
709+
dates = [datetime.date(2000, 1, 1)] * n
710+
dates[-1] = datetime.date(1969, 7, 20)
711+
dataframe = pd.DataFrame({"d": dates})
712+
new_df = self._roundtrip_dataframe(dataframe)
713+
self.assertEqual(new_df.at[n - 1, "d"], datetime.date(1969, 7, 20))
714+
# datetime64[ms] column
715+
dts = pd.array([pd.Timestamp("2000-01-01")] * n, dtype="datetime64[ms]")
716+
dts[n - 1] = pd.Timestamp("1969-07-20")
717+
dataframe = pd.DataFrame({"dt": dts})
718+
new_df = self._roundtrip_dataframe(dataframe)
719+
self.assertEqual(new_df.at[n - 1, "dt"], datetime.datetime(1969, 7, 20))
720+
# timedelta64[ms] column
721+
tds = pd.array([pd.Timedelta(0)] * n, dtype="timedelta64[ms]")
722+
tds[n - 1] = pd.Timedelta(seconds=-1)
723+
dataframe = pd.DataFrame({"td": tds})
724+
new_df = self._roundtrip_dataframe(dataframe)
725+
self.assertEqual(len(new_df), n)
726+
self.assertEqual(new_df.at[0, "td"], datetime.timedelta(0))
727+
self.assertEqual(new_df.at[n - 1, "td"], datetime.timedelta(seconds=-1))
728+
538729
@staticmethod
539730
def _roundtrip_dataframe(dataframe: typing.Any) -> pd.DataFrame:
540731
"""Write out a dataframe to SBDF and immediately read it back in to a new one."""

0 commit comments

Comments
 (0)