From bbf59f0c8b8752fe791d2f5aa329da3ea3be2b8e Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 08:06:52 -0500
Subject: [PATCH 01/21] Perf: vectorise Pandas datetime/timespan import+export;
 add Cython directives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Import (Pandas path):
- DateTime and TimeSpan now use _import_vts_numpy (raw int64 ms) instead of
  per-row Python object boxing loops (_import_vt_datetime / _import_vt_timespan).
- DataFrame assembly converts with arr.view('datetime64[ms]') /
  arr.view('timedelta64[ms]') — zero-copy reinterpretation; supports the full
  SBDF date range (year 1-9999) without pd.to_datetime nanosecond overflow.

Export (Pandas path):
- _export_obj_dataframe stores tz-naive datetime64 columns as datetime64[ms]
  and timedelta64 columns as timedelta64[ms] instead of object arrays.
- _export_vt_datetime fast path: view('int64') + vectorised SBDF epoch offset
  addition replaces per-row isinstance + .to_pydatetime() + arithmetic.
- _export_vt_timespan fast path: view('int64') gives ms directly — no per-row
  .to_pytimedelta() or division.
- Object-dtype and tz-aware columns still fall through to the per-row loop.

Cython directives:
- boundscheck=False, wraparound=False, cdivision=True added file-wide,
  eliminating runtime bounds/wrap guards in every inner loop.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 135 ++++++++++++++++++++++++++++++----------------
 1 file changed, 89 insertions(+), 46 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index d7ae63e..2a7d14d 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3
+# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
 
 # Copyright © 2022. Cloud Software Group, Inc.
 # This file is subject to the license terms contained
@@ -954,14 +954,11 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                 importer_contexts.append(_ImportContext(np_c.NPY_INT32, col_type))
                 importer_fns[i] = _import_vts_numpy
             elif col_type.id == sbdf_c.SBDF_DATETIMETYPEID:
-                if output_format == OutputFormat.POLARS:
-                    # Store raw int64 ms values; _import_build_polars_dataframe will adjust the
-                    # epoch offset and reinterpret as datetime64[ms] without boxing Python objects.
-                    importer_contexts.append(_ImportContext(np_c.NPY_INT64, col_type))
-                    importer_fns[i] = _import_vts_numpy
-                else:
-                    importer_contexts.append(_ImportContext(np_c.NPY_OBJECT, col_type))
-                    importer_fns[i] = _import_vt_datetime
+                # Store raw int64 ms values for both Polars and Pandas paths.  The Pandas
+                # assembly converts vectorially with pd.to_datetime(); _import_build_polars_dataframe
+                # adjusts the epoch offset and casts zero-copy.
+                importer_contexts.append(_ImportContext(np_c.NPY_INT64, col_type))
+                importer_fns[i] = _import_vts_numpy
             elif col_type.id == sbdf_c.SBDF_DATETYPEID:
                 if output_format == OutputFormat.POLARS:
                     importer_contexts.append(_ImportContext(np_c.NPY_INT32, col_type))
@@ -970,14 +967,10 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                     importer_contexts.append(_ImportContext(np_c.NPY_OBJECT, col_type))
                     importer_fns[i] = _import_vt_date
             elif col_type.id == sbdf_c.SBDF_TIMESPANTYPEID:
-                if output_format == OutputFormat.POLARS:
-                    # Timespans are stored as int64 ms with no epoch — reinterpret directly as
-                    # timedelta64[ms] in _import_build_polars_dataframe.
-                    importer_contexts.append(_ImportContext(np_c.NPY_INT64, col_type))
-                    importer_fns[i] = _import_vts_numpy
-                else:
-                    importer_contexts.append(_ImportContext(np_c.NPY_OBJECT, col_type))
-                    importer_fns[i] = _import_vt_timespan
+                # Store raw int64 ms for both paths.  Pandas assembly uses pd.to_timedelta();
+                # _import_build_polars_dataframe reinterprets as Duration('ms') zero-copy.
+                importer_contexts.append(_ImportContext(np_c.NPY_INT64, col_type))
+                importer_fns[i] = _import_vts_numpy
             elif col_type.id == sbdf_c.SBDF_TIMETYPEID:
                 if output_format == OutputFormat.POLARS:
                     importer_contexts.append(_ImportContext(np_c.NPY_INT64, col_type))
@@ -1039,8 +1032,31 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
         for i in range(num_columns):
             values = importer_contexts[i].get_values_array()
             invalid_array = importer_contexts[i].get_invalid_array()
+            vt_id = importer_contexts[i].get_value_type_id()
             dtype_name = importer_contexts[i].get_pandas_dtype_name()
-            if dtype_name in ("Int32", "Int64"):
+            if vt_id == sbdf_c.SBDF_DATETIMETYPEID:
+                # values is int64 ms since SBDF epoch.  Subtract the fixed SBDF→Unix offset,
+                # then reinterpret the buffer as datetime64[ms] via view() — zero-copy, no
+                # nanosecond conversion, and wide enough to represent the full SBDF date range
+                # (year 1 through 9999).
+                arr_ms = values - _SBDF_TO_UNIX_EPOCH_MS
+                if invalid_array.any():
+                    arr_ms[invalid_array] = 0  # ensure sentinel doesn't become an invalid dt64
+                column_series = pd.Series(arr_ms.view('datetime64[ms]'), dtype='datetime64[ms]',
+                                          name=column_names[i])
+                if invalid_array.any():
+                    column_series.loc[invalid_array] = pd.NaT
+            elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID:
+                # values is int64 ms — reinterpret directly as timedelta64[ms]; same trick as
+                # datetime: view() avoids any per-element conversion.
+                arr_ms = values.copy()
+                if invalid_array.any():
+                    arr_ms[invalid_array] = 0
+                column_series = pd.Series(arr_ms.view('timedelta64[ms]'), dtype='timedelta64[ms]',
+                                          name=column_names[i])
+                if invalid_array.any():
+                    column_series.loc[invalid_array] = pd.NaT
+            elif dtype_name in ("Int32", "Int64"):
                 # Build nullable integer array with mask in one shot; avoids a second-pass
                 # .loc assignment that triggers Pandas dtype coercion overhead.
                 base_dtype = "int32" if dtype_name == "Int32" else "int64"
@@ -1250,7 +1266,16 @@ cdef _export_obj_dataframe(obj):
                    pd.NA: na_value,
                    pd.NaT: na_value,
                    }
-            if obj[col].dtype == "object":
+            col_dtype = obj[col].dtype
+            if context.valuetype_id == sbdf_c.SBDF_DATETIMETYPEID and col_dtype.kind == 'M' and not hasattr(col_dtype, 'tz'):
+                # Tz-naive datetime64: store as datetime64[ms] so the exporter can use a
+                # vectorised view('int64') instead of per-row Python object unpacking.
+                values = obj[col].to_numpy(dtype="datetime64[ms]", na_value=np.datetime64("NaT"))
+            elif context.valuetype_id == sbdf_c.SBDF_TIMESPANTYPEID and col_dtype.kind == 'm':
+                # timedelta64: store as timedelta64[ms]; view('int64') in the exporter gives ms
+                # directly with no per-row conversion.
+                values = obj[col].to_numpy(dtype="timedelta64[ms]", na_value=np.timedelta64("NaT"))
+            elif col_dtype == "object":
                 values = obj[col].replace(nas).to_numpy()
             else:
                 values = obj[col].replace(nas).to_numpy(dtype=context.get_numpy_dtype())
@@ -1950,22 +1975,32 @@ cdef int _export_vt_datetime(_ExportContext context, Py_ssize_t start, Py_ssize_
     shape[0] = <np_c.npy_intp>count
     cdef np_c.ndarray new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
     cdef int i
-    current_tz = datetime.datetime.now().astimezone().tzinfo
-    for i in range(count):
-        if not context.invalid_array[start + i]:
-            val_i = context.values_array[start + i]
-            if isinstance(val_i, pd.Timestamp):
-                if val_i.tz:
-                    dt = val_i.tz_convert(current_tz).tz_localize(None).to_pydatetime()
+    if context.values_array.dtype.kind == 'M':
+        # Fast path for tz-naive datetime64[ms]: reinterpret the buffer as int64 (ms since Unix
+        # epoch) and add the fixed SBDF→Unix offset.  No Python object creation per row.
+        src_ms = context.values_array[start:start + count].view(np.int64)
+        new_values[:] = src_ms
+        new_values += _SBDF_TO_UNIX_EPOCH_MS
+        invalid_slice = context.invalid_array[start:start + count]
+        if invalid_slice.any():
+            new_values[invalid_slice] = 0
+    else:
+        current_tz = datetime.datetime.now().astimezone().tzinfo
+        for i in range(count):
+            if not context.invalid_array[start + i]:
+                val_i = context.values_array[start + i]
+                if isinstance(val_i, pd.Timestamp):
+                    if val_i.tz:
+                        dt = val_i.tz_convert(current_tz).tz_localize(None).to_pydatetime()
+                    else:
+                        dt = val_i.to_pydatetime()
+                elif isinstance(val_i, np.datetime64):
+                    dt = np.datetime64(val_i, "ms").astype(datetime.datetime)
+                elif isinstance(val_i, datetime.datetime):
+                    dt = val_i
                 else:
-                    dt = val_i.to_pydatetime()
-            elif isinstance(val_i, np.datetime64):
-                dt = np.datetime64(val_i, "ms").astype(datetime.datetime)
-            elif isinstance(val_i, datetime.datetime):
-                dt = val_i
-            else:
-                raise SBDFError(f"cannot convert '{val_i}' to Spotfire DateTime type; incompatible types")
-            new_values[i] = int((dt - _DATETIME_EPOCH) / _TIMEDELTA_ONE_MSEC)
+                    raise SBDFError(f"cannot convert '{val_i}' to Spotfire DateTime type; incompatible types")
+                new_values[i] = int((dt - _DATETIME_EPOCH) / _TIMEDELTA_ONE_MSEC)
     return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_datetime(), <int>count, np_c.PyArray_DATA(new_values), NULL, obj)
 
 
@@ -2011,18 +2046,26 @@ cdef int _export_vt_timespan(_ExportContext context, Py_ssize_t start, Py_ssize_
     shape[0] = <np_c.npy_intp>count
     cdef np_c.ndarray new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
     cdef int i
-    for i in range(count):
-        if not context.invalid_array[start + i]:
-            val_i = context.values_array[start + i]
-            if isinstance(val_i, pd.Timedelta):
-                td = val_i.to_pytimedelta()
-            elif isinstance(val_i, np.timedelta64):
-                td = np.timedelta64(val_i, "ms").astype(datetime.timedelta)
-            elif isinstance(val_i, datetime.timedelta):
-                td = val_i
-            else:
-                raise SBDFError(f"cannot convert '{val_i}' to Spotfire TimeSpan type; incompatible types")
-            new_values[i] = int(td / _TIMEDELTA_ONE_MSEC)
+    if context.values_array.dtype.kind == 'm':
+        # Fast path for timedelta64[ms]: the int64 view is already ms — no per-row unpacking.
+        src_ms = context.values_array[start:start + count].view(np.int64)
+        new_values[:] = src_ms
+        invalid_slice = context.invalid_array[start:start + count]
+        if invalid_slice.any():
+            new_values[invalid_slice] = 0
+    else:
+        for i in range(count):
+            if not context.invalid_array[start + i]:
+                val_i = context.values_array[start + i]
+                if isinstance(val_i, pd.Timedelta):
+                    td = val_i.to_pytimedelta()
+                elif isinstance(val_i, np.timedelta64):
+                    td = np.timedelta64(val_i, "ms").astype(datetime.timedelta)
+                elif isinstance(val_i, datetime.timedelta):
+                    td = val_i
+                else:
+                    raise SBDFError(f"cannot convert '{val_i}' to Spotfire TimeSpan type; incompatible types")
+                new_values[i] = int(td / _TIMEDELTA_ONE_MSEC)
     return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_timespan(), <int>count, np_c.PyArray_DATA(new_values), NULL, obj)
 
 

From 346a150802c206fbaa45de99cca5033fcf20c4f1 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 08:32:18 -0500
Subject: [PATCH 02/21] Fix: wrap long conditional line to stay under 120 chars
 (E501)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 2a7d14d..78807c0 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1267,7 +1267,8 @@ cdef _export_obj_dataframe(obj):
                    pd.NaT: na_value,
                    }
             col_dtype = obj[col].dtype
-            if context.valuetype_id == sbdf_c.SBDF_DATETIMETYPEID and col_dtype.kind == 'M' and not hasattr(col_dtype, 'tz'):
+            if (context.valuetype_id == sbdf_c.SBDF_DATETIMETYPEID and col_dtype.kind == 'M' and
+                    not hasattr(col_dtype, 'tz')):
                 # Tz-naive datetime64: store as datetime64[ms] so the exporter can use a
                 # vectorised view('int64') instead of per-row Python object unpacking.
                 values = obj[col].to_numpy(dtype="datetime64[ms]", na_value=np.datetime64("NaT"))

From 7c3535d7c4ca8a7f600af862832173537704d824 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 08:44:56 -0500
Subject: [PATCH 03/21] Perf: zero-copy Pandas datetime/timespan export;
 single-pass NaT import

Export: pre-transform datetime64[ms]/timedelta64[ms] columns to int64
SBDF-ms once at set_arrays time so _export_vt_datetime/_export_vt_timespan
can use _export_get_offset_ptr directly (zero-copy, same as numeric types)
instead of allocating + copying + transforming per chunk.  Retain the
non-precomputed fast/slow paths for tz-aware and object-dtype columns.

Import: replace the double-pass NaT handling (zero + .loc assignment) with
a single write of the int64 NaT sentinel (INT64_MIN) before view(), avoiding
the slow Pandas indexing layer entirely.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 75 +++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 78807c0..5405df9 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1038,24 +1038,23 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                 # values is int64 ms since SBDF epoch.  Subtract the fixed SBDF→Unix offset,
                 # then reinterpret the buffer as datetime64[ms] via view() — zero-copy, no
                 # nanosecond conversion, and wide enough to represent the full SBDF date range
-                # (year 1 through 9999).
+                # (year 1 through 9999).  Write the NaT sentinel (INT64_MIN) directly into the
+                # int64 buffer so NaT positions are set in a single pass without a slow second
+                # .loc assignment through the Pandas indexing layer.
                 arr_ms = values - _SBDF_TO_UNIX_EPOCH_MS
                 if invalid_array.any():
-                    arr_ms[invalid_array] = 0  # ensure sentinel doesn't become an invalid dt64
+                    arr_ms[invalid_array] = np.iinfo(np.int64).min  # NaT sentinel for datetime64
                 column_series = pd.Series(arr_ms.view('datetime64[ms]'), dtype='datetime64[ms]',
                                           name=column_names[i])
-                if invalid_array.any():
-                    column_series.loc[invalid_array] = pd.NaT
             elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID:
                 # values is int64 ms — reinterpret directly as timedelta64[ms]; same trick as
-                # datetime: view() avoids any per-element conversion.
+                # datetime: view() avoids any per-element conversion.  NaT sentinel written
+                # directly to eliminate the second .loc pass.
                 arr_ms = values.copy()
                 if invalid_array.any():
-                    arr_ms[invalid_array] = 0
+                    arr_ms[invalid_array] = np.iinfo(np.int64).min  # NaT sentinel for timedelta64
                 column_series = pd.Series(arr_ms.view('timedelta64[ms]'), dtype='timedelta64[ms]',
                                           name=column_names[i])
-                if invalid_array.any():
-                    column_series.loc[invalid_array] = pd.NaT
             elif dtype_name in ("Int32", "Int64"):
                 # Build nullable integer array with mask in one shot; avoids a second-pass
                 # .loc assignment that triggers Pandas dtype coercion overhead.
@@ -1130,6 +1129,7 @@ cdef class _ExportContext:
     cdef int polars_exporter_id  # 0=default; 1=datetime; 2=date; 3=timespan; 4=time; 5=string
     cdef np_c.ndarray _arrow_offsets  # int64 view of Arrow offsets buffer (string fast path)
     cdef np_c.ndarray _arrow_data     # uint8 view of Arrow values buffer (string fast path)
+    cdef bint values_precomputed_sbdf_int64  # True when values_array already holds int64 SBDF-ms
 
     def __init__(self):
         """Initialize the export context."""
@@ -1140,6 +1140,7 @@ cdef class _ExportContext:
         self.polars_exporter_id = 0
         self._arrow_offsets = None
         self._arrow_data = None
+        self.values_precomputed_sbdf_int64 = False
 
     cdef void set_arrays(self, np_c.ndarray values, invalid):
         """Set the NumPy ``ndarray`` with the values to export and a list or NumPy ``ndarray`` of whether each value
@@ -1267,21 +1268,34 @@ cdef _export_obj_dataframe(obj):
                    pd.NaT: na_value,
                    }
             col_dtype = obj[col].dtype
+            invalids = pd.isnull(obj[col])
             if (context.valuetype_id == sbdf_c.SBDF_DATETIMETYPEID and col_dtype.kind == 'M' and
                     not hasattr(col_dtype, 'tz')):
-                # Tz-naive datetime64: store as datetime64[ms] so the exporter can use a
-                # vectorised view('int64') instead of per-row Python object unpacking.
-                values = obj[col].to_numpy(dtype="datetime64[ms]", na_value=np.datetime64("NaT"))
+                # Pre-compute int64 SBDF-ms once so the exporter is zero-copy (no per-chunk
+                # alloc+copy+add).  view('int64') + offset produces a new contiguous int64 array;
+                # NaT positions (INT64_MIN + offset, still valid int64) are zeroed here so the
+                # exporter can call _export_get_offset_ptr directly without further work.
+                raw = obj[col].to_numpy(dtype="datetime64[ms]", na_value=np.datetime64("NaT"))
+                values = raw.view(np.int64) + _SBDF_TO_UNIX_EPOCH_MS
+                if invalids.any():
+                    values[invalids] = 0
+                context.set_arrays(values, invalids)
+                context.values_precomputed_sbdf_int64 = True
             elif context.valuetype_id == sbdf_c.SBDF_TIMESPANTYPEID and col_dtype.kind == 'm':
-                # timedelta64: store as timedelta64[ms]; view('int64') in the exporter gives ms
-                # directly with no per-row conversion.
-                values = obj[col].to_numpy(dtype="timedelta64[ms]", na_value=np.timedelta64("NaT"))
+                # Same zero-copy pre-computation for timedelta64[ms]: int64 view IS already ms,
+                # no epoch offset required — just copy so we can safely zero invalid positions.
+                raw = obj[col].to_numpy(dtype="timedelta64[ms]", na_value=np.timedelta64("NaT"))
+                values = raw.view(np.int64).copy()
+                if invalids.any():
+                    values[invalids] = 0
+                context.set_arrays(values, invalids)
+                context.values_precomputed_sbdf_int64 = True
             elif col_dtype == "object":
                 values = obj[col].replace(nas).to_numpy()
+                context.set_arrays(values, invalids)
             else:
                 values = obj[col].replace(nas).to_numpy(dtype=context.get_numpy_dtype())
-            invalids = pd.isnull(obj[col])
-            context.set_arrays(values, invalids)
+                context.set_arrays(values, invalids)
             exporter_contexts.append(context)
             try:
                 column_metadata.append(obj[col].spotfire_column_metadata)
@@ -1974,18 +1988,22 @@ cdef int _export_vt_datetime(_ExportContext context, Py_ssize_t start, Py_ssize_
     """Export a slice of data consisting of datetime values."""
     cdef np_c.npy_intp shape[1]
     shape[0] = <np_c.npy_intp>count
-    cdef np_c.ndarray new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
+    cdef np_c.ndarray new_values
     cdef int i
+    if context.values_precomputed_sbdf_int64:
+        # Zero-copy path: values_array already holds int64 SBDF-ms with invalids zeroed.
+        return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_datetime(), <int>count,
+                                          _export_get_offset_ptr(context.values_array, start, count),
+                                          NULL, obj)
     if context.values_array.dtype.kind == 'M':
-        # Fast path for tz-naive datetime64[ms]: reinterpret the buffer as int64 (ms since Unix
-        # epoch) and add the fixed SBDF→Unix offset.  No Python object creation per row.
-        src_ms = context.values_array[start:start + count].view(np.int64)
-        new_values[:] = src_ms
-        new_values += _SBDF_TO_UNIX_EPOCH_MS
+        # Fast path for tz-naive datetime64[ms]: single numpy op produces a new int64 array
+        # with the SBDF epoch offset applied (no separate alloc+copy+add steps).
+        new_values = context.values_array[start:start + count].view(np.int64) + _SBDF_TO_UNIX_EPOCH_MS
         invalid_slice = context.invalid_array[start:start + count]
         if invalid_slice.any():
             new_values[invalid_slice] = 0
     else:
+        new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
         current_tz = datetime.datetime.now().astimezone().tzinfo
         for i in range(count):
             if not context.invalid_array[start + i]:
@@ -2045,16 +2063,21 @@ cdef int _export_vt_timespan(_ExportContext context, Py_ssize_t start, Py_ssize_
     """Export a slice of data consisting of timespan values."""
     cdef np_c.npy_intp shape[1]
     shape[0] = <np_c.npy_intp>count
-    cdef np_c.ndarray new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
+    cdef np_c.ndarray new_values
     cdef int i
+    if context.values_precomputed_sbdf_int64:
+        # Zero-copy path: values_array already holds int64 ms with invalids zeroed.
+        return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_timespan(), <int>count,
+                                          _export_get_offset_ptr(context.values_array, start, count),
+                                          NULL, obj)
     if context.values_array.dtype.kind == 'm':
-        # Fast path for timedelta64[ms]: the int64 view is already ms — no per-row unpacking.
-        src_ms = context.values_array[start:start + count].view(np.int64)
-        new_values[:] = src_ms
+        # Fast path for timedelta64[ms]: single-op slice+view (no alloc+copy+zero triple).
+        new_values = context.values_array[start:start + count].view(np.int64).copy()
         invalid_slice = context.invalid_array[start:start + count]
         if invalid_slice.any():
             new_values[invalid_slice] = 0
     else:
+        new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
         for i in range(count):
             if not context.invalid_array[start + i]:
                 val_i = context.values_array[start + i]

From b39020f1210a5f74bacace75461034f8371a17c4 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 09:09:31 -0500
Subject: [PATCH 04/21] Perf: vectorise date export; fix any(Series) hotspot;
 use DataFrame constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Export: pre-compute date (object) columns to int64 SBDF-ms via pd.to_datetime,
  same zero-copy approach as datetime64/timedelta64.
- Export: replace any(invalid) with bool(self.invalid_array.any()) in set_arrays —
  the built-in any() was iterating 100k Python booleans per column; numpy any() is
  a single vectorised call.  This alone accounts for the large numeric export gain.
- Import: replace pd.concat(columns, axis=1) with pd.DataFrame(dict(...)) to skip
  concat's index alignment, dtype consolidation and metadata overhead.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 5405df9..9a9cbe3 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1066,7 +1066,7 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                 column_series = pd.Series(values, dtype=dtype_name, name=column_names[i])
                 column_series.loc[invalid_array] = None
             imported_columns.append(column_series)
-        dataframe = pd.concat(imported_columns, axis=1)
+        dataframe = pd.DataFrame(dict(zip(column_names, imported_columns)))
         for i in range(num_columns):
             dataframe[column_names[i]].spotfire_column_metadata = column_metadata[i]
             dataframe[column_names[i]].attrs['spotfire_type'] = importer_contexts[i].get_spotfire_type_name()
@@ -1151,7 +1151,7 @@ cdef class _ExportContext:
         """
         self.values_array = values
         self.invalid_array = np.asarray(invalid, dtype="bool")
-        self.any_invalid = any(invalid)
+        self.any_invalid = bool(self.invalid_array.any())
 
     cdef void set_arrow_string(self, np_c.ndarray offsets, np_c.ndarray data,
                                np_c.ndarray invalid):
@@ -1290,6 +1290,18 @@ cdef _export_obj_dataframe(obj):
                     values[invalids] = 0
                 context.set_arrays(values, invalids)
                 context.values_precomputed_sbdf_int64 = True
+            elif context.valuetype_id == sbdf_c.SBDF_DATETYPEID and col_dtype == object:
+                # Pre-compute int64 SBDF-ms for date (object) columns: pd.to_datetime iterates
+                # in C rather than Python, then view('int64') * 86400000 + epoch offset gives
+                # the same zero-copy export path as datetime64.  Use day 0 (Unix epoch) as the
+                # na_value to keep null positions safe before zeroing them explicitly.
+                days = pd.to_datetime(obj[col], errors='coerce').to_numpy(
+                    dtype='datetime64[D]', na_value=np.datetime64(0, 'D'))
+                values = days.view(np.int64).copy() * 86400000 + _SBDF_TO_UNIX_EPOCH_MS
+                if invalids.any():
+                    values[invalids] = 0
+                context.set_arrays(values, invalids)
+                context.values_precomputed_sbdf_int64 = True
             elif col_dtype == "object":
                 values = obj[col].replace(nas).to_numpy()
                 context.set_arrays(values, invalids)
@@ -2027,8 +2039,14 @@ cdef int _export_vt_date(_ExportContext context, Py_ssize_t start, Py_ssize_t co
     """Export a slice of data consisting of date values."""
     cdef np_c.npy_intp shape[1]
     shape[0] = <np_c.npy_intp> count
-    cdef np_c.ndarray new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
+    cdef np_c.ndarray new_values
     cdef int i
+    if context.values_precomputed_sbdf_int64:
+        # Zero-copy path: values_array already holds int64 SBDF-ms (midnight) with invalids zeroed.
+        return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_date(), <int>count,
+                                          _export_get_offset_ptr(context.values_array, start, count),
+                                          NULL, obj)
+    new_values = np_c.PyArray_ZEROS(1, shape, np_c.NPY_INT64, 0)
     for i in range(count):
         if not context.invalid_array[start + i]:
             val_i = context.values_array[start + i]

From f9d2e63be719dcadc9fbce148ab0ba21a53db22b Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 09:27:19 -0500
Subject: [PATCH 05/21] Perf: faster time export; drop redundant timedelta
 copy; guard object .loc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Time export: replace datetime.combine(min, t) - min (2 Python object
  allocations per row) with direct integer arithmetic on time attributes.
  As the last unoptimized temporal column, this is the primary driver of
  the ~40% temporal export improvement.
- Timedelta import: drop values.copy() — get_values_array() already returns
  a fresh array from np.concatenate(), so the explicit copy was redundant.
- Object-type import (.loc): guard column_series.loc[invalid_array] = None
  with if invalid_array.any() — consistent with datetime/timedelta paths,
  avoids Pandas indexing overhead for null-free columns.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 9a9cbe3..6baf30a 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1049,11 +1049,11 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
             elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID:
                 # values is int64 ms — reinterpret directly as timedelta64[ms]; same trick as
                 # datetime: view() avoids any per-element conversion.  NaT sentinel written
-                # directly to eliminate the second .loc pass.
-                arr_ms = values.copy()
+                # directly to eliminate the second .loc pass.  No .copy() needed: values is
+                # already a fresh array from np.concatenate() in get_values_array().
                 if invalid_array.any():
-                    arr_ms[invalid_array] = np.iinfo(np.int64).min  # NaT sentinel for timedelta64
-                column_series = pd.Series(arr_ms.view('timedelta64[ms]'), dtype='timedelta64[ms]',
+                    values[invalid_array] = np.iinfo(np.int64).min  # NaT sentinel for timedelta64
+                column_series = pd.Series(values.view('timedelta64[ms]'), dtype='timedelta64[ms]',
                                           name=column_names[i])
             elif dtype_name in ("Int32", "Int64"):
                 # Build nullable integer array with mask in one shot; avoids a second-pass
@@ -1064,7 +1064,8 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                     name=column_names[i])
             else:
                 column_series = pd.Series(values, dtype=dtype_name, name=column_names[i])
-                column_series.loc[invalid_array] = None
+                if invalid_array.any():
+                    column_series.loc[invalid_array] = None
             imported_columns.append(column_series)
         dataframe = pd.DataFrame(dict(zip(column_names, imported_columns)))
         for i in range(num_columns):
@@ -2070,10 +2071,12 @@ cdef int _export_vt_time(_ExportContext context, Py_ssize_t start, Py_ssize_t co
         if not context.invalid_array[start + i]:
             val_i = context.values_array[start + i]
             if isinstance(val_i, datetime.time):
-                val = datetime.datetime.combine(datetime.datetime.min, val_i) - datetime.datetime.min
+                # Direct integer arithmetic on time attributes avoids allocating a datetime
+                # and timedelta object per row (which datetime.combine(...) - min requires).
+                new_values[i] = ((val_i.hour * 3600 + val_i.minute * 60 + val_i.second) * 1000
+                                 + val_i.microsecond // 1000)
             else:
                 raise SBDFError(f"cannot convert '{val_i}' to Spotfire Time type; incompatible types")
-            new_values[i] = val // _TIMEDELTA_ONE_MSEC
     return sbdf_c.sbdf_obj_create_arr(sbdf_c.sbdf_vt_time(), <int>count, np_c.PyArray_DATA(new_values), NULL, obj)
 
 

From debf567d7310340003de2ae96dcc8d65535f9e89 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 09:56:18 -0500
Subject: [PATCH 06/21] Fix: use np.asarray for date export to handle full
 year-1..9999 range

pd.to_datetime(errors='coerce') silently converts dates outside the Pandas
Timestamp range (year 1, pre-Gregorian, year 9999) to NaT, then to the Unix
epoch.  Replace with np.asarray(..., dtype='datetime64[D]') which covers the
full Python date range.  Zero NaT positions (INT64_MIN) before multiplying to
prevent int64 overflow.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 6baf30a..0b25f37 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1292,13 +1292,15 @@ cdef _export_obj_dataframe(obj):
                 context.set_arrays(values, invalids)
                 context.values_precomputed_sbdf_int64 = True
             elif context.valuetype_id == sbdf_c.SBDF_DATETYPEID and col_dtype == object:
-                # Pre-compute int64 SBDF-ms for date (object) columns: pd.to_datetime iterates
-                # in C rather than Python, then view('int64') * 86400000 + epoch offset gives
-                # the same zero-copy export path as datetime64.  Use day 0 (Unix epoch) as the
-                # na_value to keep null positions safe before zeroing them explicitly.
-                days = pd.to_datetime(obj[col], errors='coerce').to_numpy(
-                    dtype='datetime64[D]', na_value=np.datetime64(0, 'D'))
-                values = days.view(np.int64).copy() * 86400000 + _SBDF_TO_UNIX_EPOCH_MS
+                # Pre-compute int64 SBDF-ms for date (object) columns: numpy's asarray covers
+                # the full year-1 to year-9999 range (pd.to_datetime silently coerces out-of-
+                # Timestamp-range dates to NaT).  Zero null positions before multiplication to
+                # prevent int64 overflow from NaT's INT64_MIN sentinel.
+                days_dt64 = np.asarray(obj[col], dtype='datetime64[D]')
+                days = days_dt64.view(np.int64).copy()
+                if invalids.any():
+                    days[invalids] = 0
+                values = days * 86400000 + _SBDF_TO_UNIX_EPOCH_MS
                 if invalids.any():
                     values[invalids] = 0
                 context.set_arrays(values, invalids)

From 53b93b1022e632817141b1c09386d93925bf2ffa Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 10:26:23 -0500
Subject: [PATCH 07/21] Test: add temporal edge-case coverage for optimized
 export/import paths

Eight new test methods covering gaps exposed by the zero-copy temporal
optimizations: null roundtrips, negative timespans, pre-epoch/out-of-range
dates (year 1, pre-Gregorian, year 9999), pre-epoch datetimes, time edge
cases (midnight, end-of-day, microsecond truncation), all-null temporal
columns, and NaT at specific positions in numpy datetime64/timedelta64
arrays.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 126 +++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 2d220ec..76a3169 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -475,6 +475,132 @@ def test_numpy_timedelta_resolution(self):
                 val = df2.at[1, 'x']
                 self.assertEqual(val, target)
 
+    def test_temporal_nulls_roundtrip(self):
+        """Verify that mixed-null temporal columns survive export/import with correct positions."""
+        dt = datetime.datetime
+        d = datetime.date
+        t = datetime.time
+        td = datetime.timedelta
+
+        cases = {
+            "datetime": [dt(2020, 6, 15, 12, 0, 0), None, dt(1969, 7, 20, 20, 17, 0)],
+            "date":     [d(2020, 6, 15), None, d(1969, 7, 20)],
+            "time":     [t(12, 0, 0), None, t(20, 17, 0)],
+            "timespan": [td(days=1), None, td(seconds=30)],
+        }
+        for col_name, values in cases.items():
+            with self.subTest(type=col_name):
+                df = pd.DataFrame({"x": values})
+                df2 = self._roundtrip_dataframe(df)
+                self.assertFalse(pd.isnull(df2.at[0, "x"]), "row 0 should not be null")
+                self.assertTrue(pd.isnull(df2.at[1, "x"]), "row 1 should be null")
+                self.assertFalse(pd.isnull(df2.at[2, "x"]), "row 2 should not be null")
+
+    def test_negative_timespans(self):
+        """Verify that negative timedelta values round-trip correctly."""
+        cases = [
+            datetime.timedelta(seconds=-1),
+            datetime.timedelta(days=-1),
+            datetime.timedelta(days=-5, seconds=300),
+            datetime.timedelta(milliseconds=-1),
+            datetime.timedelta(days=-1, seconds=86399, microseconds=999000),  # -1 ms
+        ]
+        df = pd.DataFrame({"x": cases})
+        df2 = self._roundtrip_dataframe(df)
+        for i, expected in enumerate(cases):
+            with self.subTest(i=i, value=expected):
+                got = df2.at[i, "x"]
+                # SBDF has millisecond resolution; truncate expected to ms
+                expected_ms = datetime.timedelta(milliseconds=expected // datetime.timedelta(milliseconds=1))
+                self.assertEqual(got, expected_ms)
+
+    def test_pre_epoch_dates(self):
+        """Verify that dates before the Unix epoch (1970-01-01) round-trip correctly."""
+        cases = [
+            datetime.date(1, 1, 1),        # SBDF epoch
+            datetime.date(1582, 10, 4),    # day before Gregorian calendar
+            datetime.date(1969, 12, 31),   # one day before Unix epoch
+            datetime.date(1970, 1, 1),     # Unix epoch
+            datetime.date(1970, 1, 2),     # one day after Unix epoch
+            datetime.date(9999, 12, 31),   # max Python date
+        ]
+        df = pd.DataFrame({"x": cases})
+        df2 = self._roundtrip_dataframe(df)
+        for i, expected in enumerate(cases):
+            with self.subTest(date=expected):
+                self.assertEqual(df2.at[i, "x"], expected)
+
+    def test_pre_epoch_datetimes(self):
+        """Verify that datetimes before the Unix epoch round-trip correctly."""
+        cases = [
+            datetime.datetime(1, 1, 1, 0, 0, 0),
+            datetime.datetime(1969, 12, 31, 23, 59, 59),
+            datetime.datetime(1969, 12, 31, 0, 0, 0),
+        ]
+        df = pd.DataFrame({"x": cases})
+        df2 = self._roundtrip_dataframe(df)
+        for i, expected in enumerate(cases):
+            with self.subTest(dt=expected):
+                self.assertEqual(df2.at[i, "x"], expected)
+
+    def test_time_edge_cases(self):
+        """Verify midnight, end-of-day, and microsecond-precision time values."""
+        cases = [
+            (datetime.time(0, 0, 0),          datetime.time(0, 0, 0)),           # midnight
+            (datetime.time(23, 59, 59, 999000), datetime.time(23, 59, 59, 999000)),  # end of day (ms boundary)
+            (datetime.time(12, 30, 45, 500),  datetime.time(12, 30, 45, 0)),     # sub-ms truncated
+            (datetime.time(0, 0, 0, 1000),    datetime.time(0, 0, 0, 1000)),     # 1 ms exactly
+        ]
+        for input_val, expected in cases:
+            with self.subTest(time=input_val):
+                df = pd.DataFrame({"x": [input_val]})
+                df2 = self._roundtrip_dataframe(df)
+                self.assertEqual(df2.at[0, "x"], expected)
+
+    def test_all_null_temporal_columns(self):
+        """Verify that all-null columns of each temporal type export and import without error."""
+        for spotfire_type, dtype in [("DateTime", "datetime64[ms]"),
+                                     ("TimeSpan", "timedelta64[ms]")]:
+            with self.subTest(type=spotfire_type):
+                df = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})
+                df2 = self._roundtrip_dataframe(df)
+                self.assertEqual(len(df2), 3)
+                self.assertTrue(df2["x"].isna().all())
+
+    def test_numpy_datetime_with_nulls(self):
+        """Verify that numpy datetime64 columns with NaT values export and import correctly."""
+        values = pd.array([
+            pd.NaT,
+            pd.Timestamp("2020-01-01"),
+            pd.NaT,
+            pd.Timestamp("1969-07-20"),
+            pd.NaT,
+        ], dtype="datetime64[ms]")
+        df = pd.DataFrame({"x": values})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertTrue(pd.isnull(df2.at[0, "x"]))
+        self.assertEqual(df2.at[1, "x"], datetime.datetime(2020, 1, 1))
+        self.assertTrue(pd.isnull(df2.at[2, "x"]))
+        self.assertEqual(df2.at[3, "x"], datetime.datetime(1969, 7, 20))
+        self.assertTrue(pd.isnull(df2.at[4, "x"]))
+
+    def test_numpy_timedelta_with_nulls(self):
+        """Verify that numpy timedelta64 columns with NaT values export and import correctly."""
+        values = pd.array([
+            pd.NaT,
+            pd.Timedelta(days=1),
+            pd.NaT,
+            pd.Timedelta(seconds=-30),
+            pd.NaT,
+        ], dtype="timedelta64[ms]")
+        df = pd.DataFrame({"x": values})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertTrue(pd.isnull(df2.at[0, "x"]))
+        self.assertEqual(df2.at[1, "x"], datetime.timedelta(days=1))
+        self.assertTrue(pd.isnull(df2.at[2, "x"]))
+        self.assertEqual(df2.at[3, "x"], datetime.timedelta(seconds=-30))
+        self.assertTrue(pd.isnull(df2.at[4, "x"]))
+
     def test_image_matplot(self):
         """Verify Matplotlib figures export properly."""
         matplotlib.pyplot.clf()

From b74724c3d7968ebdcc8e74c57ceb93630923e618 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 10:43:36 -0500
Subject: [PATCH 08/21] Test: add empty-DataFrame and multi-chunk export tests
 for bounds safety

Two new tests targeting the boundscheck=False Cython directives:

- test_empty_dataframe: exercises every column type with 0 rows, verifying
  that zero-iteration export loops don't crash or corrupt memory.

- test_multichunk_export: exports 100_001 rows (one more than the default
  100_000-row slice size) and checks values at both the first row and the
  chunk boundary (row 100_000).  Covers _export_vt_time's direct [start+i]
  indexing and _export_get_offset_ptr for the precomputed int64 paths.

- test_polars_string_multichunk: same chunk-boundary check for the Polars
  Arrow buffer path in _export_extract_string_obj_arrow, which does raw C
  pointer arithmetic into the values buffer.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 73 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 76a3169..b29f5da 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -601,6 +601,79 @@ def test_numpy_timedelta_with_nulls(self):
         self.assertEqual(df2.at[3, "x"], datetime.timedelta(seconds=-30))
         self.assertTrue(pd.isnull(df2.at[4, "x"]))
 
+    def test_empty_dataframe(self):
+        """Verify 0-row DataFrames export and import correctly for all column types.
+
+        Exercises the zero-size array code paths that boundscheck=False leaves unchecked,
+        ensuring no off-by-one occurs at the loop boundary when row_count is 0.
+        """
+        cases = [
+            ("bool",            pd.DataFrame({"x": pd.array([], dtype="bool")})),
+            ("int64",           pd.DataFrame({"x": pd.array([], dtype="int64")})),
+            ("float64",         pd.DataFrame({"x": pd.array([], dtype="float64")})),
+            ("datetime64[ms]",  pd.DataFrame({"x": pd.array([], dtype="datetime64[ms]")})),
+            ("timedelta64[ms]", pd.DataFrame({"x": pd.array([], dtype="timedelta64[ms]")})),
+        ]
+        for label, df in cases:
+            with self.subTest(dtype=label):
+                df2 = self._roundtrip_dataframe(df)
+                self.assertEqual(len(df2), 0)
+                self.assertIn("x", df2.columns)
+        # String requires an explicit type annotation when the column is empty (no values to infer from)
+        df = pd.DataFrame({"x": pd.Series([], dtype=object)})
+        spotfire.set_spotfire_types(df, {"x": "String"})
+        with self.subTest(dtype="string"):
+            df2 = self._roundtrip_dataframe(df)
+            self.assertEqual(len(df2), 0)
+            self.assertIn("x", df2.columns)
+
+    def test_multichunk_export(self):
+        """Verify exports spanning multiple SBDF row slices produce correct values.
+
+        The default slice size is ``100_000 // num_columns`` rows, so a 100_001-row
+        single-column DataFrame forces a second slice (start=100_000, count=1).
+        This exercises _export_vt_time's direct ``[start+i]`` indexing and the
+        _export_get_offset_ptr pointer arithmetic for precomputed int64 paths,
+        both of which are unchecked under boundscheck=False.
+        """
+        n = 100_001
+
+        # time: _export_vt_time accesses context.values_array[start + i] directly
+        times = [datetime.time(0, 0, 0)] * n
+        times[-1] = datetime.time(23, 59, 58)
+        df = pd.DataFrame({"t": times})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertEqual(len(df2), n)
+        self.assertEqual(df2.at[0, "t"], datetime.time(0, 0, 0))
+        self.assertEqual(df2.at[n - 1, "t"], datetime.time(23, 59, 58))
+
+        # date: precomputed int64 via np.asarray, exported via _export_get_offset_ptr
+        dates = [datetime.date(2000, 1, 1)] * n
+        dates[-1] = datetime.date(2001, 9, 11)
+        df = pd.DataFrame({"d": dates})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertEqual(len(df2), n)
+        self.assertEqual(df2.at[0, "d"], datetime.date(2000, 1, 1))
+        self.assertEqual(df2.at[n - 1, "d"], datetime.date(2001, 9, 11))
+
+        # datetime64[ms]: precomputed int64, exported via _export_get_offset_ptr
+        dts = pd.array([pd.Timestamp("2000-01-01")] * n, dtype="datetime64[ms]")
+        dts[-1] = pd.Timestamp("1969-07-20 20:17:40")
+        df = pd.DataFrame({"dt": dts})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertEqual(len(df2), n)
+        self.assertEqual(df2.at[0, "dt"], datetime.datetime(2000, 1, 1))
+        self.assertEqual(df2.at[n - 1, "dt"], datetime.datetime(1969, 7, 20, 20, 17, 40))
+
+        # timedelta64[ms]: precomputed int64, exported via _export_get_offset_ptr
+        tds = pd.array([pd.Timedelta(0)] * n, dtype="timedelta64[ms]")
+        tds[-1] = pd.Timedelta(seconds=-1)
+        df = pd.DataFrame({"td": tds})
+        df2 = self._roundtrip_dataframe(df)
+        self.assertEqual(len(df2), n)
+        self.assertEqual(df2.at[0, "td"], datetime.timedelta(0))
+        self.assertEqual(df2.at[n - 1, "td"], datetime.timedelta(seconds=-1))
+
     def test_image_matplot(self):
         """Verify Matplotlib figures export properly."""
         matplotlib.pyplot.clf()

From b188627b1dd75111b1d101714126514ef0f06880 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 10:53:20 -0500
Subject: [PATCH 09/21] Revert "CI: add no_polars test environment to verify
 package works without polars/pyarrow"

This reverts commit 681a67d39a1712cf022cf65b8247739378589c7e.
---
 test_requirements_no_polars.txt | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 test_requirements_no_polars.txt

diff --git a/test_requirements_no_polars.txt b/test_requirements_no_polars.txt
deleted file mode 100644
index 73ab30d..0000000
--- a/test_requirements_no_polars.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-html-testRunner
-geopandas
-matplotlib
-pillow
-seaborn
-shapely
\ No newline at end of file

From 128de8651ff8cf215a14e889466cc0b74d141809 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 10:57:27 -0500
Subject: [PATCH 10/21] CI: add AddressSanitizer job to catch out-of-bounds
 access in native extension

Compiles sbdf.pyx with -fsanitize=address -fno-omit-frame-pointer and runs the
full test suite under LD_PRELOAD=libasan.so with PYTHONMALLOC=malloc.  This
provides runtime detection of heap buffer overflows that boundscheck=False and
the raw C pointer arithmetic in sbdf_helpers.c leave unchecked at the Python
level.  detect_leaks=0 suppresses intentional Python allocator "leaks".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/build.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 04da882..815677c 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -163,3 +163,35 @@ jobs:
           mypy spotfire
           cython-lint spotfire vendor
           find spotfire -name '*_helpers.[ch]' | xargs cpplint --repository=.
+  asan:
+    name: AddressSanitizer
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+      - name: Install dependencies
+        run: |
+          pip install setuptools Cython "numpy>=2.0.0rc1"
+          pip install ".[polars]"
+          pip install -r test_requirements_default.txt
+      - name: Rebuild extension with AddressSanitizer
+        env:
+          CFLAGS: "-fsanitize=address -fno-omit-frame-pointer -g"
+          LDFLAGS: "-fsanitize=address"
+        run: python setup.py build_ext --inplace
+      - name: Run tests under AddressSanitizer
+        run: |
+          LIBASAN=$(gcc -print-file-name=libasan.so)
+          LD_PRELOAD="$LIBASAN" PYTHONMALLOC=malloc python -m spotfire.test
+        env:
+          ASAN_OPTIONS: "detect_leaks=0:allocator_may_return_null=1"
+          TEST_ENVIRONMENT: asan
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: test-results-asan
+          path: build/test-results/*.html

From 0319d658d534fa01a99cf204f31d132a8bed163b Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:01:28 -0500
Subject: [PATCH 11/21] Fix: rename df/df2 variables to satisfy pylint
 invalid-name rule (min 3 chars)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 128 ++++++++++++++++++-------------------
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index b29f5da..8bf77c4 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -490,11 +490,11 @@ def test_temporal_nulls_roundtrip(self):
         }
         for col_name, values in cases.items():
             with self.subTest(type=col_name):
-                df = pd.DataFrame({"x": values})
-                df2 = self._roundtrip_dataframe(df)
-                self.assertFalse(pd.isnull(df2.at[0, "x"]), "row 0 should not be null")
-                self.assertTrue(pd.isnull(df2.at[1, "x"]), "row 1 should be null")
-                self.assertFalse(pd.isnull(df2.at[2, "x"]), "row 2 should not be null")
+                dataframe = pd.DataFrame({"x": values})
+                new_df = self._roundtrip_dataframe(dataframe)
+                self.assertFalse(pd.isnull(new_df.at[0, "x"]), "row 0 should not be null")
+                self.assertTrue(pd.isnull(new_df.at[1, "x"]), "row 1 should be null")
+                self.assertFalse(pd.isnull(new_df.at[2, "x"]), "row 2 should not be null")
 
     def test_negative_timespans(self):
         """Verify that negative timedelta values round-trip correctly."""
@@ -505,11 +505,11 @@ def test_negative_timespans(self):
             datetime.timedelta(milliseconds=-1),
             datetime.timedelta(days=-1, seconds=86399, microseconds=999000),  # -1 ms
         ]
-        df = pd.DataFrame({"x": cases})
-        df2 = self._roundtrip_dataframe(df)
+        dataframe = pd.DataFrame({"x": cases})
+        new_df = self._roundtrip_dataframe(dataframe)
         for i, expected in enumerate(cases):
             with self.subTest(i=i, value=expected):
-                got = df2.at[i, "x"]
+                got = new_df.at[i, "x"]
                 # SBDF has millisecond resolution; truncate expected to ms
                 expected_ms = datetime.timedelta(milliseconds=expected // datetime.timedelta(milliseconds=1))
                 self.assertEqual(got, expected_ms)
@@ -524,11 +524,11 @@ def test_pre_epoch_dates(self):
             datetime.date(1970, 1, 2),     # one day after Unix epoch
             datetime.date(9999, 12, 31),   # max Python date
         ]
-        df = pd.DataFrame({"x": cases})
-        df2 = self._roundtrip_dataframe(df)
+        dataframe = pd.DataFrame({"x": cases})
+        new_df = self._roundtrip_dataframe(dataframe)
         for i, expected in enumerate(cases):
             with self.subTest(date=expected):
-                self.assertEqual(df2.at[i, "x"], expected)
+                self.assertEqual(new_df.at[i, "x"], expected)
 
     def test_pre_epoch_datetimes(self):
         """Verify that datetimes before the Unix epoch round-trip correctly."""
@@ -537,11 +537,11 @@ def test_pre_epoch_datetimes(self):
             datetime.datetime(1969, 12, 31, 23, 59, 59),
             datetime.datetime(1969, 12, 31, 0, 0, 0),
         ]
-        df = pd.DataFrame({"x": cases})
-        df2 = self._roundtrip_dataframe(df)
+        dataframe = pd.DataFrame({"x": cases})
+        new_df = self._roundtrip_dataframe(dataframe)
         for i, expected in enumerate(cases):
             with self.subTest(dt=expected):
-                self.assertEqual(df2.at[i, "x"], expected)
+                self.assertEqual(new_df.at[i, "x"], expected)
 
     def test_time_edge_cases(self):
         """Verify midnight, end-of-day, and microsecond-precision time values."""
@@ -553,19 +553,19 @@ def test_time_edge_cases(self):
         ]
         for input_val, expected in cases:
             with self.subTest(time=input_val):
-                df = pd.DataFrame({"x": [input_val]})
-                df2 = self._roundtrip_dataframe(df)
-                self.assertEqual(df2.at[0, "x"], expected)
+                dataframe = pd.DataFrame({"x": [input_val]})
+                new_df = self._roundtrip_dataframe(dataframe)
+                self.assertEqual(new_df.at[0, "x"], expected)
 
     def test_all_null_temporal_columns(self):
         """Verify that all-null columns of each temporal type export and import without error."""
         for spotfire_type, dtype in [("DateTime", "datetime64[ms]"),
                                      ("TimeSpan", "timedelta64[ms]")]:
             with self.subTest(type=spotfire_type):
-                df = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})
-                df2 = self._roundtrip_dataframe(df)
-                self.assertEqual(len(df2), 3)
-                self.assertTrue(df2["x"].isna().all())
+                dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})
+                new_df = self._roundtrip_dataframe(dataframe)
+                self.assertEqual(len(new_df), 3)
+                self.assertTrue(new_df["x"].isna().all())
 
     def test_numpy_datetime_with_nulls(self):
         """Verify that numpy datetime64 columns with NaT values export and import correctly."""
@@ -576,13 +576,13 @@ def test_numpy_datetime_with_nulls(self):
             pd.Timestamp("1969-07-20"),
             pd.NaT,
         ], dtype="datetime64[ms]")
-        df = pd.DataFrame({"x": values})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertTrue(pd.isnull(df2.at[0, "x"]))
-        self.assertEqual(df2.at[1, "x"], datetime.datetime(2020, 1, 1))
-        self.assertTrue(pd.isnull(df2.at[2, "x"]))
-        self.assertEqual(df2.at[3, "x"], datetime.datetime(1969, 7, 20))
-        self.assertTrue(pd.isnull(df2.at[4, "x"]))
+        dataframe = pd.DataFrame({"x": values})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertTrue(pd.isnull(new_df.at[0, "x"]))
+        self.assertEqual(new_df.at[1, "x"], datetime.datetime(2020, 1, 1))
+        self.assertTrue(pd.isnull(new_df.at[2, "x"]))
+        self.assertEqual(new_df.at[3, "x"], datetime.datetime(1969, 7, 20))
+        self.assertTrue(pd.isnull(new_df.at[4, "x"]))
 
     def test_numpy_timedelta_with_nulls(self):
         """Verify that numpy timedelta64 columns with NaT values export and import correctly."""
@@ -593,13 +593,13 @@ def test_numpy_timedelta_with_nulls(self):
             pd.Timedelta(seconds=-30),
             pd.NaT,
         ], dtype="timedelta64[ms]")
-        df = pd.DataFrame({"x": values})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertTrue(pd.isnull(df2.at[0, "x"]))
-        self.assertEqual(df2.at[1, "x"], datetime.timedelta(days=1))
-        self.assertTrue(pd.isnull(df2.at[2, "x"]))
-        self.assertEqual(df2.at[3, "x"], datetime.timedelta(seconds=-30))
-        self.assertTrue(pd.isnull(df2.at[4, "x"]))
+        dataframe = pd.DataFrame({"x": values})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertTrue(pd.isnull(new_df.at[0, "x"]))
+        self.assertEqual(new_df.at[1, "x"], datetime.timedelta(days=1))
+        self.assertTrue(pd.isnull(new_df.at[2, "x"]))
+        self.assertEqual(new_df.at[3, "x"], datetime.timedelta(seconds=-30))
+        self.assertTrue(pd.isnull(new_df.at[4, "x"]))
 
     def test_empty_dataframe(self):
         """Verify 0-row DataFrames export and import correctly for all column types.
@@ -614,18 +614,18 @@ def test_empty_dataframe(self):
             ("datetime64[ms]",  pd.DataFrame({"x": pd.array([], dtype="datetime64[ms]")})),
             ("timedelta64[ms]", pd.DataFrame({"x": pd.array([], dtype="timedelta64[ms]")})),
         ]
-        for label, df in cases:
+        for label, dataframe in cases:
             with self.subTest(dtype=label):
-                df2 = self._roundtrip_dataframe(df)
-                self.assertEqual(len(df2), 0)
-                self.assertIn("x", df2.columns)
+                new_df = self._roundtrip_dataframe(dataframe)
+                self.assertEqual(len(new_df), 0)
+                self.assertIn("x", new_df.columns)
         # String requires an explicit type annotation when the column is empty (no values to infer from)
-        df = pd.DataFrame({"x": pd.Series([], dtype=object)})
-        spotfire.set_spotfire_types(df, {"x": "String"})
+        str_df = pd.DataFrame({"x": pd.Series([], dtype=object)})
+        spotfire.set_spotfire_types(str_df, {"x": "String"})
         with self.subTest(dtype="string"):
-            df2 = self._roundtrip_dataframe(df)
-            self.assertEqual(len(df2), 0)
-            self.assertIn("x", df2.columns)
+            new_df = self._roundtrip_dataframe(str_df)
+            self.assertEqual(len(new_df), 0)
+            self.assertIn("x", new_df.columns)
 
     def test_multichunk_export(self):
         """Verify exports spanning multiple SBDF row slices produce correct values.
@@ -641,38 +641,38 @@ def test_multichunk_export(self):
         # time: _export_vt_time accesses context.values_array[start + i] directly
         times = [datetime.time(0, 0, 0)] * n
         times[-1] = datetime.time(23, 59, 58)
-        df = pd.DataFrame({"t": times})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertEqual(len(df2), n)
-        self.assertEqual(df2.at[0, "t"], datetime.time(0, 0, 0))
-        self.assertEqual(df2.at[n - 1, "t"], datetime.time(23, 59, 58))
+        dataframe = pd.DataFrame({"t": times})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertEqual(len(new_df), n)
+        self.assertEqual(new_df.at[0, "t"], datetime.time(0, 0, 0))
+        self.assertEqual(new_df.at[n - 1, "t"], datetime.time(23, 59, 58))
 
         # date: precomputed int64 via np.asarray, exported via _export_get_offset_ptr
         dates = [datetime.date(2000, 1, 1)] * n
         dates[-1] = datetime.date(2001, 9, 11)
-        df = pd.DataFrame({"d": dates})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertEqual(len(df2), n)
-        self.assertEqual(df2.at[0, "d"], datetime.date(2000, 1, 1))
-        self.assertEqual(df2.at[n - 1, "d"], datetime.date(2001, 9, 11))
+        dataframe = pd.DataFrame({"d": dates})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertEqual(len(new_df), n)
+        self.assertEqual(new_df.at[0, "d"], datetime.date(2000, 1, 1))
+        self.assertEqual(new_df.at[n - 1, "d"], datetime.date(2001, 9, 11))
 
         # datetime64[ms]: precomputed int64, exported via _export_get_offset_ptr
         dts = pd.array([pd.Timestamp("2000-01-01")] * n, dtype="datetime64[ms]")
         dts[-1] = pd.Timestamp("1969-07-20 20:17:40")
-        df = pd.DataFrame({"dt": dts})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertEqual(len(df2), n)
-        self.assertEqual(df2.at[0, "dt"], datetime.datetime(2000, 1, 1))
-        self.assertEqual(df2.at[n - 1, "dt"], datetime.datetime(1969, 7, 20, 20, 17, 40))
+        dataframe = pd.DataFrame({"dt": dts})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertEqual(len(new_df), n)
+        self.assertEqual(new_df.at[0, "dt"], datetime.datetime(2000, 1, 1))
+        self.assertEqual(new_df.at[n - 1, "dt"], datetime.datetime(1969, 7, 20, 20, 17, 40))
 
         # timedelta64[ms]: precomputed int64, exported via _export_get_offset_ptr
         tds = pd.array([pd.Timedelta(0)] * n, dtype="timedelta64[ms]")
         tds[-1] = pd.Timedelta(seconds=-1)
-        df = pd.DataFrame({"td": tds})
-        df2 = self._roundtrip_dataframe(df)
-        self.assertEqual(len(df2), n)
-        self.assertEqual(df2.at[0, "td"], datetime.timedelta(0))
-        self.assertEqual(df2.at[n - 1, "td"], datetime.timedelta(seconds=-1))
+        dataframe = pd.DataFrame({"td": tds})
+        new_df = self._roundtrip_dataframe(dataframe)
+        self.assertEqual(len(new_df), n)
+        self.assertEqual(new_df.at[0, "td"], datetime.timedelta(0))
+        self.assertEqual(new_df.at[n - 1, "td"], datetime.timedelta(seconds=-1))
 
     def test_image_matplot(self):
         """Verify Matplotlib figures export properly."""

From ca99bf7d57fabe011115cafda394fc098111a62c Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:11:18 -0500
Subject: [PATCH 12/21] Fix: add intercept_cxx_exceptions=0 to ASAN_OPTIONS to
 suppress matplotlib false positive

When using LD_PRELOAD ASan injection with a non-ASan-compiled Python, ASan's
__cxa_throw interceptor is never initialized.  matplotlib's ft2font.so throws a
C++ exception during import, hitting the uninitialized interceptor and causing a
CHECK failure.  intercept_cxx_exceptions=0 disables the interceptor entirely;
sbdf.pyx generates no C++ exceptions so there is no loss of coverage.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/build.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 815677c..4b4b533 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -188,7 +188,7 @@ jobs:
           LIBASAN=$(gcc -print-file-name=libasan.so)
           LD_PRELOAD="$LIBASAN" PYTHONMALLOC=malloc python -m spotfire.test
         env:
-          ASAN_OPTIONS: "detect_leaks=0:allocator_may_return_null=1"
+          ASAN_OPTIONS: "detect_leaks=0:allocator_may_return_null=1:intercept_cxx_exceptions=0"
           TEST_ENVIRONMENT: asan
       - uses: actions/upload-artifact@v4
         if: always()

From eff19418ba4492c0dbb7f2740f96af57f4c8387f Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:20:19 -0500
Subject: [PATCH 13/21] Fix: type: ignore for pd.array NaT overloads; pin ASan
 job to Python 3.13

mypy: pd.array() with list[NaTType] or list[NaT|Timedelta] and a string dtype
has no matching overload in pandas-stubs.  Add type: ignore[call-overload] on
the two affected lines in test_all_null_temporal_columns and
test_numpy_timedelta_with_nulls.

ASan: Python 3.14 (beta) triggers a CHECK failure in asan_interceptors.cpp
when ft2font.so throws a C++ exception, even with intercept_cxx_exceptions=0.
Pin the ASan job to Python 3.13 where LD_PRELOAD ASan injection works cleanly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/build.yaml | 2 +-
 spotfire/test/test_sbdf.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 4b4b533..c115cb4 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -172,7 +172,7 @@ jobs:
           submodules: recursive
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.x'
+          python-version: '3.13'
       - name: Install dependencies
         run: |
           pip install setuptools Cython "numpy>=2.0.0rc1"
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 8bf77c4..4228537 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -562,7 +562,7 @@ def test_all_null_temporal_columns(self):
         for spotfire_type, dtype in [("DateTime", "datetime64[ms]"),
                                      ("TimeSpan", "timedelta64[ms]")]:
             with self.subTest(type=spotfire_type):
-                dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})
+                dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})  # type: ignore[call-overload]
                 new_df = self._roundtrip_dataframe(dataframe)
                 self.assertEqual(len(new_df), 3)
                 self.assertTrue(new_df["x"].isna().all())
@@ -586,7 +586,7 @@ def test_numpy_datetime_with_nulls(self):
 
     def test_numpy_timedelta_with_nulls(self):
         """Verify that numpy timedelta64 columns with NaT values export and import correctly."""
-        values = pd.array([
+        values = pd.array([  # type: ignore[call-overload]
             pd.NaT,
             pd.Timedelta(days=1),
             pd.NaT,

From acb1054ad01cdeb3fdabef1e729c4383db17471e Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:32:50 -0500
Subject: [PATCH 14/21] CI: fix ASan crash by dropping pybind11 packages; bump
 actions to Node.js 24; fix line-too-long
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ASan job: replace test_requirements_default.txt with html-testRunner + polars + pillow.
  matplotlib/seaborn/geopandas/shapely use pybind11 C++ extensions that throw exceptions,
  crashing LD_PRELOAD libasan injection (intercept_cxx_exceptions=0 doesn't help here).
  pillow is plain C — safe to keep for PIL image export ASan coverage.
- Bump GitHub Actions to Node.js 24: checkout v4→v5, setup-python v5→v6,
  upload-artifact v4→v7, download-artifact v4→v8.
- Fix pylint line-too-long (127>120) in test_sbdf.py line 565.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/build.yaml  | 38 ++++++++++++++++++-----------------
 .github/workflows/pylint.yaml |  4 ++--
 .github/workflows/sbom.yaml   | 22 ++++++++++----------
 spotfire/test/test_sbdf.py    |  3 ++-
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index c115cb4..984335e 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -1,15 +1,17 @@
 name: Build and Test Package
 on: [push, pull_request]
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
   build-sdist:
     name: Build Source Dist
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: recursive
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.x'
       - name: Install Tools
@@ -18,11 +20,11 @@ jobs:
       - name: Source Packaging
         run: |
           python -m build --sdist
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v7
         with:
           name: sdist
           path: 'dist/spotfire-*.tar.gz'
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v7
         with:
           name: test-files
           path: |
@@ -50,12 +52,12 @@ jobs:
         operating-system: ['ubuntu-latest', 'windows-latest']
       fail-fast: false
     steps:
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           name: sdist
           path: dist
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install Build Requirements
@@ -81,7 +83,7 @@ jobs:
           python -m build --wheel
           # Move wheel out of build dir into top-level dist dir 
           mv dist\*.whl ..\dist
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v7
         with:
           name: wheel-${{ matrix.python-version }}-${{ matrix.operating-system }}
           path: 'dist/spotfire-*.whl'
@@ -96,16 +98,16 @@ jobs:
         test-environment: ${{ fromJson(needs.build-sdist.outputs.test-environments) }}
       fail-fast: false
     steps:
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           name: wheel-${{ matrix.python-version }}-${{ matrix.operating-system }}
           path: dist
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           name: test-files
           path: test-files
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install Dependencies (Linux)
@@ -122,7 +124,7 @@ jobs:
         env:
           TEST_FILES_DIR: ${{ github.workspace }}/test-files/spotfire/test/files
           TEST_ENVIRONMENT: ${{ matrix.test-environment }}
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v7
         if: ${{ always() }}
         with:
           name: test-results-${{ matrix.python-version }}-${{ matrix.operating-system }}-${{ matrix.test-environment }}
@@ -138,14 +140,14 @@ jobs:
           echo -n "python-version="  >> $GITHUB_OUTPUT
           echo '${{ needs.build-sdist.outputs.python-versions }}' | sed -e 's/[^"]*"//' -e 's/".*//' >> $GITHUB_OUTPUT
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ steps.version.outputs.python-version }}
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           name: sdist
           path: dist
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           name: wheel-${{ steps.version.outputs.python-version }}-ubuntu-latest
           path: dist
@@ -167,17 +169,17 @@ jobs:
     name: AddressSanitizer
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: recursive
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: '3.13'
       - name: Install dependencies
         run: |
           pip install setuptools Cython "numpy>=2.0.0rc1"
           pip install ".[polars]"
-          pip install -r test_requirements_default.txt
+          pip install html-testRunner polars pillow
       - name: Rebuild extension with AddressSanitizer
         env:
           CFLAGS: "-fsanitize=address -fno-omit-frame-pointer -g"
@@ -190,7 +192,7 @@ jobs:
         env:
           ASAN_OPTIONS: "detect_leaks=0:allocator_may_return_null=1:intercept_cxx_exceptions=0"
           TEST_ENVIRONMENT: asan
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v7
         if: always()
         with:
           name: test-results-asan
diff --git a/.github/workflows/pylint.yaml b/.github/workflows/pylint.yaml
index 58911b8..3f03d70 100644
--- a/.github/workflows/pylint.yaml
+++ b/.github/workflows/pylint.yaml
@@ -7,11 +7,11 @@ jobs:
     name: Check Linters
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: recursive
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.x'
       - name: Install Tools
diff --git a/.github/workflows/sbom.yaml b/.github/workflows/sbom.yaml
index 72094c3..45cd37e 100644
--- a/.github/workflows/sbom.yaml
+++ b/.github/workflows/sbom.yaml
@@ -35,7 +35,7 @@ jobs:
     outputs:
       python-versions: ${{ steps.dynamic.outputs.pythons }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - name: Read python-versions
         id: dynamic
         run: |
@@ -48,14 +48,14 @@ jobs:
     needs: setup
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: recursive   # needed for vendor/sbdf-c when building/installing sdist
 
       # workflow_run: reuse artifact from build.yaml — no rebuild
       - name: Download sdist (from workflow_run)
         if: github.event_name == 'workflow_run'
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
         with:
           name: sdist
           path: dist
@@ -64,7 +64,7 @@ jobs:
 
       # push / release / workflow_dispatch: build fresh
       - name: Set Up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.x'
       - name: Build sdist
@@ -118,7 +118,7 @@ jobs:
             --tool     "trivy-${{ env.TRIVY_VERSION }}"
 
       - name: Upload SBOM artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: sbom-sdist
           path: spotfire-sdist.sbom.spdx.json
@@ -133,14 +133,14 @@ jobs:
         python-version: ${{ fromJson(needs.setup.outputs.python-versions) }}
       fail-fast: false
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: recursive   # needed for vendor/sbdf-c when building wheel fresh
 
       # workflow_run: reuse the ubuntu wheel artifact from build.yaml — no rebuild
       - name: Download wheel (from workflow_run)
         if: github.event_name == 'workflow_run'
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
         with:
           name: wheel-${{ matrix.python-version }}-ubuntu-latest
           path: dist
@@ -150,7 +150,7 @@ jobs:
       # Also download the sdist so scan-env can install from it (wheel is platform-specific)
       - name: Download sdist (from workflow_run)
         if: github.event_name == 'workflow_run'
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
         with:
           name: sdist
           path: dist
@@ -160,7 +160,7 @@ jobs:
       # push / release / workflow_dispatch: build fresh on Linux
       - name: Set Up Python
         if: github.event_name != 'workflow_run'
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Build wheel
@@ -221,7 +221,7 @@ jobs:
             --tool     "trivy-${{ env.TRIVY_VERSION }}"
 
       - name: Upload SBOM artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: sbom-wheel-${{ matrix.python-version }}
           path: spotfire-wheel-${{ matrix.python-version }}.sbom.spdx.json
@@ -234,7 +234,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Download all SBOM artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
         with:
           pattern: sbom-*
           path: all-sboms
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 4228537..f8e0a91 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -562,7 +562,8 @@ def test_all_null_temporal_columns(self):
         for spotfire_type, dtype in [("DateTime", "datetime64[ms]"),
                                      ("TimeSpan", "timedelta64[ms]")]:
             with self.subTest(type=spotfire_type):
-                dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT], dtype=dtype)})  # type: ignore[call-overload]
+                dataframe = pd.DataFrame({"x": pd.array([pd.NaT, pd.NaT, pd.NaT],  # type: ignore[call-overload]
+                                                         dtype=dtype)})
                 new_df = self._roundtrip_dataframe(dataframe)
                 self.assertEqual(len(new_df), 3)
                 self.assertTrue(new_df["x"].isna().all())

From 544205bf9aac752b2c95ccaa1e0bac29a46f91ea Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:37:49 -0500
Subject: [PATCH 15/21] Fix: make geopandas/matplotlib/seaborn imports
 optional; add CI concurrency group

test_sbdf.py imported geopandas, matplotlib, and seaborn unconditionally, causing
ModuleNotFoundError in the ASan CI job where those packages are not installed.
Change to try/except with None fallback (matching the polars pattern) and add
@unittest.skipIf guards to test_read_write_geodata, test_image_matplot,
test_image_seaborn.

Also add concurrency group to build.yaml to cancel superseded runs on push.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/build.yaml |  3 +++
 spotfire/test/test_sbdf.py   | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 984335e..2bd24ef 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -1,5 +1,8 @@
 name: Build and Test Package
 on: [push, pull_request]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index f8e0a91..42601c4 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -11,9 +11,18 @@
 import pandas as pd
 import pandas.testing as pdtest
 import numpy as np
-import geopandas as gpd
-import matplotlib.pyplot
-import seaborn
+try:
+    import geopandas as gpd  # type: ignore[import-not-found]
+except ImportError:
+    gpd = None  # type: ignore[assignment]
+try:
+    import matplotlib.pyplot  # type: ignore[import-not-found]
+except ImportError:
+    matplotlib = None  # type: ignore[assignment]
+try:
+    import seaborn  # type: ignore[import-not-found]
+except ImportError:
+    seaborn = None  # type: ignore[assignment]
 import PIL.Image
 from packaging import version
 
@@ -144,6 +153,7 @@ def test_read_10001(self):
         self.assertEqual(dataframe.at[10000, "String"], "kiwis")
         self.assertEqual(dataframe.at[10000, "Binary"], b"\x7c\x7d\x7e\x7f")
 
+    @unittest.skipIf(gpd is None, "geopandas not installed")
     def test_read_write_geodata(self):
         """Test that geo-encoded data is properly converted to/from ``GeoDataFrame``."""
         gdf = sbdf.import_data(utils.get_test_data_file("sbdf/NACountries.sbdf"))
@@ -675,6 +685,7 @@ def test_multichunk_export(self):
         self.assertEqual(new_df.at[0, "td"], datetime.timedelta(0))
         self.assertEqual(new_df.at[n - 1, "td"], datetime.timedelta(seconds=-1))
 
+    @unittest.skipIf(matplotlib is None, "matplotlib not installed")
     def test_image_matplot(self):
         """Verify Matplotlib figures export properly."""
         matplotlib.pyplot.clf()
@@ -687,6 +698,7 @@ def test_image_matplot(self):
         else:
             self.fail(f"Expected PNG bytes, got {type(image)}: {image!r}")
 
+    @unittest.skipIf(seaborn is None, "seaborn not installed")
     def test_image_seaborn(self):
         """Verify Seaborn grids export properly."""
         matplotlib.pyplot.clf()

From 53b608261b531f4d31d3c36fc593d0b1544ea21c Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 11:43:40 -0500
Subject: [PATCH 16/21] Fix: add explicit 'import matplotlib' so pylint
 recognises it as a module alias

Without the explicit import, pylint sees 'matplotlib = None' in the except block
as a new constant assignment and flags it as invalid-name (expects UPPER_CASE).
Adding 'import matplotlib' before 'import matplotlib.pyplot' matches the same
try/except pattern used for polars (import + None fallback).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 42601c4..9f096ba 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -16,7 +16,8 @@
 except ImportError:
     gpd = None  # type: ignore[assignment]
 try:
-    import matplotlib.pyplot  # type: ignore[import-not-found]
+    import matplotlib  # type: ignore[import-not-found]
+    import matplotlib.pyplot
 except ImportError:
     matplotlib = None  # type: ignore[assignment]
 try:

From d83e78fa5959cb4911797f2a920028563190e8c3 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 18:55:30 -0500
Subject: [PATCH 17/21] Perf: direct pointer access in string/binary C helpers;
 eliminate slice alloc in offset ptr

Three C-level optimizations:

1. _export_extract_string_obj / _export_extract_binary_obj: replace per-element
   PySequence_GetItem calls (Python API dispatch + refcount overhead) with direct
   pointer arithmetic into numpy array buffers.  Callers now pass
   PyArray_DATA(values_array) as void** and PyArray_DATA(invalid_array) as
   unsigned char*, eliminating ~2N Python API round-trips per string/binary column.

2. _export_get_offset_ptr: replace the Python slice allocation
   (array[start:start+count]) with direct byte-offset arithmetic on PyArray_DATA.
   Avoids a numpy view object allocation on every chunk/column export call.

3. Import string columns: pre-mask the numpy object array before pd.Series()
   construction instead of assigning None via .loc[] after the fact.  The .loc
   path triggers pandas label-indexing overhead; direct numpy assignment is O(k)
   with no indexer allocation.  Applied only when values.dtype.kind == 'O' to
   avoid incorrect coercion on bool/float arrays.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx         | 48 ++++++++++++++++++------
 spotfire/sbdf_helpers.c   | 79 +++++++++++++--------------------------
 spotfire/sbdf_helpers.h   | 10 +++--
 spotfire/sbdf_helpers.pxi | 11 +++---
 4 files changed, 76 insertions(+), 72 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 0b25f37..c2a1101 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1063,9 +1063,16 @@ def import_data(sbdf_file, output_format=OutputFormat.PANDAS):
                     pd.arrays.IntegerArray(values.astype(base_dtype), invalid_array),
                     name=column_names[i])
             else:
-                column_series = pd.Series(values, dtype=dtype_name, name=column_names[i])
-                if invalid_array.any():
-                    column_series.loc[invalid_array] = None
+                if values.dtype.kind == 'O':
+                    # Object-dtype (string) arrays can be pre-masked before Series construction,
+                    # avoiding the pandas .loc indexing overhead on the post-construction path.
+                    if invalid_array.any():
+                        values[invalid_array] = None
+                    column_series = pd.Series(values, dtype=dtype_name, name=column_names[i])
+                else:
+                    column_series = pd.Series(values, dtype=dtype_name, name=column_names[i])
+                    if invalid_array.any():
+                        column_series.loc[invalid_array] = None
             imported_columns.append(column_series)
         dataframe = pd.DataFrame(dict(zip(column_names, imported_columns)))
         for i in range(num_columns):
@@ -2118,13 +2125,19 @@ cdef int _export_vt_timespan(_ExportContext context, Py_ssize_t start, Py_ssize_
 
 cdef int _export_vt_string(_ExportContext context, Py_ssize_t start, Py_ssize_t count, sbdf_c.sbdf_object** obj):
     """Export a slice of data consisting of string values."""
-    obj[0] = _export_extract_string_obj(context.values_array, context.invalid_array, start, count)
+    obj[0] = _export_extract_string_obj(
+        <void**>np_c.PyArray_DATA(context.values_array),
+        <unsigned char*>np_c.PyArray_DATA(context.invalid_array),
+        start, count)
     return sbdf_c.SBDF_OK
 
 
 cdef int _export_vt_binary(_ExportContext context, Py_ssize_t start, Py_ssize_t count, sbdf_c.sbdf_object** obj):
     """Export a slice of data consisting of binary values."""
-    obj[0] = _export_extract_binary_obj(context.values_array, context.invalid_array, start, count)
+    obj[0] = _export_extract_binary_obj(
+        <void**>np_c.PyArray_DATA(context.values_array),
+        <unsigned char*>np_c.PyArray_DATA(context.invalid_array),
+        start, count)
     return sbdf_c.SBDF_OK
 
 
@@ -2363,15 +2376,16 @@ cdef (int, sbdf_c.sbdf_valuearray*) _export_process_invalid_array(_ExportContext
 
 
 cdef inline void* _export_get_offset_ptr(np_c.ndarray array, Py_ssize_t start, Py_ssize_t count):
-    """Slice a NumPy ``ndarray`` using Cython memoryviews.
+    """Return a pointer into ``array`` at element ``start``.
 
     :param array: the NumPy array to slice
     :param start: the index of the first element of the slice
-    :param count: the number of elements to include in the slice
-    :return: a pointer to the memory (owned by the NumPy array) of the slice
+    :param count: unused; kept for call-site compatibility
+    :return: a pointer to element ``start`` in the array's data buffer
     """
-    cdef np_c.ndarray sliced = array[start : start + count]
-    return np_c.PyArray_DATA(sliced)
+    cdef char *base = <char*>np_c.PyArray_DATA(array)
+    cdef Py_ssize_t sz = <Py_ssize_t>array.itemsize
+    return <void*>(base + start * sz)
 
 
 cdef sbdf_c.sbdf_metadata_head* _export_metadata(dict md, int column_num):
@@ -2414,7 +2428,12 @@ cdef sbdf_c.sbdf_metadata_head* _export_metadata(dict md, int column_num):
         val_type.id = _export_infer_valuetype_from_type(val, f"{metadata_description} metadata '{name_str}'")
 
         if val_type.id == sbdf_c.SBDF_STRINGTYPEID:
-            obj = _export_extract_string_obj(val, [False] * val_len, 0, val_len)
+            _meta_vals = np.asarray(val, dtype=object)
+            _meta_inv = np.zeros(val_len, dtype=bool)
+            obj = _export_extract_string_obj(
+                <void**>np_c.PyArray_DATA(_meta_vals),
+                <unsigned char*>np_c.PyArray_DATA(_meta_inv),
+                0, val_len)
             error = sbdf_c.SBDF_OK
         elif val_type.id == sbdf_c.SBDF_DOUBLETYPEID:
             data_double = <double*>mem.PyMem_RawMalloc(val_len * sizeof(double))
@@ -2474,7 +2493,12 @@ cdef sbdf_c.sbdf_metadata_head* _export_metadata(dict md, int column_num):
             error = sbdf_c.sbdf_obj_create_arr(val_type, val_len, data_datetime, NULL, &obj)
             mem.PyMem_RawFree(<void*>data_datetime)
         elif val_type.id == sbdf_c.SBDF_BINARYTYPEID:
-            obj = _export_extract_binary_obj(val, [False] * val_len, 0, val_len)
+            _meta_vals = np.asarray(val, dtype=object)
+            _meta_inv = np.zeros(val_len, dtype=bool)
+            obj = _export_extract_binary_obj(
+                <void**>np_c.PyArray_DATA(_meta_vals),
+                <unsigned char*>np_c.PyArray_DATA(_meta_inv),
+                0, val_len)
             error = sbdf_c.SBDF_OK
         elif val_type.id == sbdf_c.SBDF_DECIMALTYPEID:
             data_decimal = <_SbdfDecimal*>mem.PyMem_RawMalloc(val_len * sizeof(_SbdfDecimal))
diff --git a/spotfire/sbdf_helpers.c b/spotfire/sbdf_helpers.c
index ce89a23..5931cdb 100644
--- a/spotfire/sbdf_helpers.c
+++ b/spotfire/sbdf_helpers.c
@@ -80,15 +80,22 @@ void _allocated_list_done(struct _AllocatedList *alist, _allocated_dealloc_fn fu
     }
 }
 
-/* Utility functions for extracting strings from Python ``Union[str,bytes]`` into C */
-sbdf_object *_export_extract_string_obj(PyObject *vals, PyObject *invalids, Py_ssize_t start, Py_ssize_t count) {
+/* Utility functions for extracting strings from Python ``Union[str,bytes]`` into C.
+ * vals_ptr is PyArray_DATA() of a numpy object array; each slot is a borrowed PyObject*.
+ * inv_ptr is PyArray_DATA() of a numpy bool array; nonzero byte means null/invalid.
+ */
+sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count) {
     sbdf_object *t = calloc(1, sizeof(sbdf_object));
+    if (!t) {
+        PyErr_NoMemory();
+        return NULL;
+    }
 
     t->type = sbdf_vt_string();
     t->count = (int)count;
     char **data = (char **)calloc(count, sizeof(char *));
     if (!data) {
-        PyErr_Format(PyExc_MemoryError, "memory exhausted");
+        PyErr_NoMemory();
         sbdf_obj_destroy(t);
         return NULL;
     }
@@ -96,53 +103,33 @@ sbdf_object *_export_extract_string_obj(PyObject *vals, PyObject *invalids, Py_s
 
     for (int i = 0; i < count; i++) {
         Py_ssize_t idx = start + i;
-        PyObject *inv = PySequence_GetItem(invalids, idx);
-        if (inv == NULL) {
-            sbdf_obj_destroy(t);
-            return NULL;
-        }
-        if (PyObject_IsTrue(inv)) {
-            /* true: invalid value, add empty value to t->data */
+        if (inv_ptr[idx]) {
+            /* null/invalid value: write empty string */
             data[i] = sbdf_str_create_len("", 0);
         } else {
-            /* false: valid value, add encoded value to t->data */
-            PyObject *val = PySequence_GetItem(vals, idx);
-            if (val == NULL) {
-                Py_XDECREF(inv);
-                sbdf_obj_destroy(t);
-                return NULL;
-            }
+            /* valid value: borrowed ref from numpy object array — no Py_DECREF */
+            PyObject *val = (PyObject *)vals_ptr[idx];
             PyObject *val_str = PyObject_Str(val);
             if (val_str == NULL) {
-                Py_XDECREF(val);
-                Py_XDECREF(inv);
                 sbdf_obj_destroy(t);
                 return NULL;
             }
             PyObject *val_encoded = PyObject_CallMethod(val_str, "encode", "s", "utf-8");
+            Py_DECREF(val_str);
             if (val_encoded == NULL) {
-                Py_XDECREF(val_str);
-                Py_XDECREF(val);
-                Py_XDECREF(inv);
                 sbdf_obj_destroy(t);
                 return NULL;
             }
             char *val_buf;
             Py_ssize_t val_len;
             if (PyBytes_AsStringAndSize(val_encoded, &val_buf, &val_len) == -1) {
-                Py_XDECREF(val_encoded);
-                Py_XDECREF(val_str);
-                Py_XDECREF(val);
-                Py_XDECREF(inv);
+                Py_DECREF(val_encoded);
                 sbdf_obj_destroy(t);
                 return NULL;
             }
             data[i] = sbdf_str_create_len(val_buf, (int)val_len);
-            Py_XDECREF(val_encoded);
-            Py_XDECREF(val_str);
-            Py_XDECREF(val);
+            Py_DECREF(val_encoded);
         }
-        Py_XDECREF(inv);
     }
 
     return t;
@@ -178,14 +165,18 @@ sbdf_object *_export_extract_string_obj_arrow(const char *values_buf, const int6
     return t;
 }
 
-sbdf_object *_export_extract_binary_obj(PyObject *vals, PyObject *invalids, Py_ssize_t start, Py_ssize_t count) {
+sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count) {
     sbdf_object *t = calloc(1, sizeof(sbdf_object));
+    if (!t) {
+        PyErr_NoMemory();
+        return NULL;
+    }
 
     t->type = sbdf_vt_binary();
     t->count = (int)count;
     unsigned char **data = (unsigned char **)calloc(count, sizeof(unsigned char *));
     if (!data) {
-        PyErr_Format(PyExc_MemoryError, "memory exhausted");
+        PyErr_NoMemory();
         sbdf_obj_destroy(t);
         return NULL;
     }
@@ -193,41 +184,25 @@ sbdf_object *_export_extract_binary_obj(PyObject *vals, PyObject *invalids, Py_s
 
     for (int i = 0; i < count; i++) {
         Py_ssize_t idx = start + i;
-        PyObject *inv = PySequence_GetItem(invalids, idx);
-        if (inv == NULL) {
-            sbdf_obj_destroy(t);
-            return NULL;
-        }
-        if (PyObject_IsTrue(inv)) {
-            /* true: invalid value, add empty value to t->data */
+        if (inv_ptr[idx]) {
+            /* null/invalid value: write empty byte array */
             data[i] = sbdf_ba_create(0, 0);
         } else {
-            /* false: valid value, add value to t->data */
-            PyObject *val = PySequence_GetItem(vals, idx);
-            if (val == NULL) {
-                Py_XDECREF(inv);
-                sbdf_obj_destroy(t);
-                return NULL;
-            }
+            /* valid value: borrowed ref from numpy object array — no Py_DECREF */
+            PyObject *val = (PyObject *)vals_ptr[idx];
             if (!PyBytes_Check(val)) {
                 PyErr_Format(PyExc_SBDFError, "cannot convert '%S' to Spotfire Binary type; incompatible types", val);
-                Py_XDECREF(val);
-                Py_XDECREF(inv);
                 sbdf_obj_destroy(t);
                 return NULL;
             }
             char *val_buf;
             Py_ssize_t val_len;
             if (PyBytes_AsStringAndSize(val, &val_buf, &val_len) == -1) {
-                Py_XDECREF(val);
-                Py_XDECREF(inv);
                 sbdf_obj_destroy(t);
                 return NULL;
             }
             data[i] = sbdf_ba_create((unsigned char *)val_buf, (int)val_len);
-            Py_XDECREF(val);
         }
-        Py_XDECREF(inv);
     }
 
     return t;
diff --git a/spotfire/sbdf_helpers.h b/spotfire/sbdf_helpers.h
index 04e1255..95d90f6 100644
--- a/spotfire/sbdf_helpers.h
+++ b/spotfire/sbdf_helpers.h
@@ -35,9 +35,13 @@ struct _SbdfDecimal {
     unsigned char exponent_high_and_sign;
 };
 
-/* Utility functions for extracting strings from Python ``Union[str,bytes]`` into C */
-extern sbdf_object *_export_extract_string_obj(PyObject *vals, PyObject *invalids, Py_ssize_t start, Py_ssize_t count);
-extern sbdf_object *_export_extract_binary_obj(PyObject *vals, PyObject *invalids, Py_ssize_t start, Py_ssize_t count);
+/* Utility functions for extracting strings from Python ``Union[str,bytes]`` into C.
+ * vals_ptr: PyArray_DATA() of a numpy object array (array of PyObject* slots).
+ * inv_ptr:  PyArray_DATA() of a numpy bool array (one byte per element, nonzero == null).
+ * Both pointers must remain valid for the duration of the call (caller holds the numpy arrays).
+ */
+extern sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count);
+extern sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count);
 
 /* Fast string export directly from Arrow LargeUtf8 buffers: no Python str objects created.
  * values_buf: concatenated UTF-8 bytes from the Arrow values buffer.
diff --git a/spotfire/sbdf_helpers.pxi b/spotfire/sbdf_helpers.pxi
index ea719fa..d0e594b 100644
--- a/spotfire/sbdf_helpers.pxi
+++ b/spotfire/sbdf_helpers.pxi
@@ -21,11 +21,12 @@ cdef extern from "sbdf_helpers.h":
         unsigned char exponent_low
         unsigned char exponent_high_and_sign
 
-    # Utility functions for extracting strings from Python ``Union[str,bytes]`` into C
-    sbdf_c.sbdf_object* _export_extract_string_obj(object val, object invalids, Py_ssize_t start, Py_ssize_t count) \
-        except NULL
-    sbdf_c.sbdf_object* _export_extract_binary_obj(object val, object invalids, Py_ssize_t start, Py_ssize_t count) \
-        except NULL
+    # Utility functions for extracting strings from Python ``Union[str,bytes]`` into C.
+    # vals_ptr: PyArray_DATA() of a numpy object array; inv_ptr: PyArray_DATA() of a numpy bool array.
+    sbdf_c.sbdf_object* _export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                                    Py_ssize_t start, Py_ssize_t count) except NULL
+    sbdf_c.sbdf_object* _export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                                    Py_ssize_t start, Py_ssize_t count) except NULL
     # Fast Arrow LargeUtf8 path: no Python str objects, no re-encoding
     sbdf_c.sbdf_object* _export_extract_string_obj_arrow(const char *values_buf,
                                                          const long long *offsets,

From 7c1ed6744af4869f71a989bf789c0b1d73c79fd0 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 19:02:33 -0500
Subject: [PATCH 18/21] Fix: align continuation lines in sbdf_helpers.pxi to
 fix E127 pycodestyle violation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf_helpers.pxi | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/spotfire/sbdf_helpers.pxi b/spotfire/sbdf_helpers.pxi
index d0e594b..c072c3b 100644
--- a/spotfire/sbdf_helpers.pxi
+++ b/spotfire/sbdf_helpers.pxi
@@ -23,10 +23,14 @@ cdef extern from "sbdf_helpers.h":
 
     # Utility functions for extracting strings from Python ``Union[str,bytes]`` into C.
     # vals_ptr: PyArray_DATA() of a numpy object array; inv_ptr: PyArray_DATA() of a numpy bool array.
-    sbdf_c.sbdf_object* _export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr,
-                                                    Py_ssize_t start, Py_ssize_t count) except NULL
-    sbdf_c.sbdf_object* _export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr,
-                                                    Py_ssize_t start, Py_ssize_t count) except NULL
+    sbdf_c.sbdf_object* _export_extract_string_obj(void **vals_ptr,
+                                                   const unsigned char *inv_ptr,
+                                                   Py_ssize_t start,
+                                                   Py_ssize_t count) except NULL
+    sbdf_c.sbdf_object* _export_extract_binary_obj(void **vals_ptr,
+                                                   const unsigned char *inv_ptr,
+                                                   Py_ssize_t start,
+                                                   Py_ssize_t count) except NULL
     # Fast Arrow LargeUtf8 path: no Python str objects, no re-encoding
     sbdf_c.sbdf_object* _export_extract_string_obj_arrow(const char *values_buf,
                                                          const long long *offsets,

From fd7479c8f46b45626d2171aede9ecc955ee6ea2c Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 19:57:14 -0500
Subject: [PATCH 19/21] linting

---
 spotfire/test/test_sbdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 9f096ba..101f596 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -1,4 +1,4 @@
-"""Tests for importing and exporting data to SBDF files."""
+git checkout """Tests for importing and exporting data to SBDF files."""
 
 from pathlib import Path
 import datetime

From bb1d35dda2f0970eb27e11b45f7b14ed8e5be89c Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sat, 4 Apr 2026 20:10:23 -0500
Subject: [PATCH 20/21] Fix: remove stray 'git checkout' prefix from
 test_sbdf.py module docstring

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 101f596..9f096ba 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -1,4 +1,4 @@
-git checkout """Tests for importing and exporting data to SBDF files."""
+"""Tests for importing and exporting data to SBDF files."""
 
 from pathlib import Path
 import datetime

From 87d0d07185ae71e91d0c50f81e32a9450a274d5e Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Sun, 5 Apr 2026 09:28:02 -0500
Subject: [PATCH 21/21] Fix: wrap long function signatures in sbdf_helpers.c/.h
 to satisfy cpplint line-length rule

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf_helpers.c | 6 ++++--
 spotfire/sbdf_helpers.h | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/spotfire/sbdf_helpers.c b/spotfire/sbdf_helpers.c
index 5931cdb..964593c 100644
--- a/spotfire/sbdf_helpers.c
+++ b/spotfire/sbdf_helpers.c
@@ -84,7 +84,8 @@ void _allocated_list_done(struct _AllocatedList *alist, _allocated_dealloc_fn fu
  * vals_ptr is PyArray_DATA() of a numpy object array; each slot is a borrowed PyObject*.
  * inv_ptr is PyArray_DATA() of a numpy bool array; nonzero byte means null/invalid.
  */
-sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count) {
+sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                        Py_ssize_t start, Py_ssize_t count) {
     sbdf_object *t = calloc(1, sizeof(sbdf_object));
     if (!t) {
         PyErr_NoMemory();
@@ -165,7 +166,8 @@ sbdf_object *_export_extract_string_obj_arrow(const char *values_buf, const int6
     return t;
 }
 
-sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count) {
+sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                        Py_ssize_t start, Py_ssize_t count) {
     sbdf_object *t = calloc(1, sizeof(sbdf_object));
     if (!t) {
         PyErr_NoMemory();
diff --git a/spotfire/sbdf_helpers.h b/spotfire/sbdf_helpers.h
index 95d90f6..a263b01 100644
--- a/spotfire/sbdf_helpers.h
+++ b/spotfire/sbdf_helpers.h
@@ -40,8 +40,10 @@ struct _SbdfDecimal {
  * inv_ptr:  PyArray_DATA() of a numpy bool array (one byte per element, nonzero == null).
  * Both pointers must remain valid for the duration of the call (caller holds the numpy arrays).
  */
-extern sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count);
-extern sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr, Py_ssize_t start, Py_ssize_t count);
+extern sbdf_object *_export_extract_string_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                               Py_ssize_t start, Py_ssize_t count);
+extern sbdf_object *_export_extract_binary_obj(void **vals_ptr, const unsigned char *inv_ptr,
+                                               Py_ssize_t start, Py_ssize_t count);
 
 /* Fast string export directly from Arrow LargeUtf8 buffers: no Python str objects created.
  * values_buf: concatenated UTF-8 bytes from the Arrow values buffer.