From 6f8ea5173fdd1a6f562231f94c3b69330b781b12 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:00:44 +0000
Subject: [PATCH 01/10] vortex-row: crate scaffolding

Add an empty `vortex-row` crate with a minimal `initialize` stub so the
following commits can layer in the row-encoder, codec, scalar functions,
and per-encoding kernels without touching the workspace skeleton each
time. The crate is wired into the workspace members list and workspace
dependency table; `public-api.lock` is generated against the stub.

Signed-off-by: Claude <noreply@anthropic.com>
---
 Cargo.lock                 |  7 +++++++
 Cargo.toml                 |  2 ++
 vortex-row/Cargo.toml      | 20 ++++++++++++++++++++
 vortex-row/public-api.lock |  3 +++
 vortex-row/src/lib.rs      | 14 ++++++++++++++
 5 files changed, 46 insertions(+)
 create mode 100644 vortex-row/Cargo.toml
 create mode 100644 vortex-row/public-api.lock
 create mode 100644 vortex-row/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2819f2bacd0..63a608277d3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11034,6 +11034,13 @@ dependencies = [
  "vortex-tui",
 ]
 
+[[package]]
+name = "vortex-row"
+version = "0.1.0"
+dependencies = [
+ "vortex-session",
+]
+
 [[package]]
 name = "vortex-runend"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index fb87a953154..9fae5b564bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ members = [
     "vortex-mask",
     "vortex-utils",
     "vortex-session",
+    "vortex-row",
     "vortex-flatbuffers",
     "vortex-metrics",
     "vortex-io",
@@ -291,6 +292,7 @@ vortex-mask = { version = "0.1.0", path = "./vortex-mask", default-features = fa
 vortex-metrics = { version = "0.1.0", path = "./vortex-metrics", default-features = false }
 vortex-pco = { version = "0.1.0", path = "./encodings/pco", default-features = false }
 vortex-proto = { version = "0.1.0", path = "./vortex-proto", default-features = false }
+vortex-row = { version = "0.1.0", path = "./vortex-row", default-features = false }
 vortex-runend = { version = "0.1.0", path = "./encodings/runend", default-features = false }
 vortex-scan = { version = "0.1.0", path = "./vortex-scan", default-features = false }
 vortex-sequence = { version = "0.1.0", path = "encodings/sequence", default-features = false }
diff --git a/vortex-row/Cargo.toml b/vortex-row/Cargo.toml
new file mode 100644
index 00000000000..7515715392c
--- /dev/null
+++ b/vortex-row/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "vortex-row"
+authors = { workspace = true }
+categories = { workspace = true }
+description = "Row-oriented byte encoder for Vortex arrays, analogous to arrow-row."
+edition = { workspace = true }
+homepage = { workspace = true }
+include = { workspace = true }
+keywords = { workspace = true }
+license = { workspace = true }
+readme = { workspace = true }
+repository = { workspace = true }
+rust-version = { workspace = true }
+version = { workspace = true }
+
+[lints]
+workspace = true
+
+[dependencies]
+vortex-session = { workspace = true }
diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
new file mode 100644
index 00000000000..d507aa46a00
--- /dev/null
+++ b/vortex-row/public-api.lock
@@ -0,0 +1,3 @@
+pub mod vortex_row
+
+pub fn vortex_row::initialize(&vortex_session::VortexSession)
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
new file mode 100644
index 00000000000..f675ca12f4d
--- /dev/null
+++ b/vortex-row/src/lib.rs
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Row-oriented byte encoder, analogous to Apache Arrow's `arrow-row` crate.
+//!
+//! Subsequent commits add the encoder, decoder helpers, and per-encoding fast paths.
+//! This commit only establishes the crate skeleton and an `initialize` stub.
+
+use vortex_session::VortexSession;
+
+/// Register the row-encoding scalar functions on the given session.
+///
+/// Currently a stub: subsequent commits register `RowSize` and `RowEncode` here.
+pub fn initialize(_session: &VortexSession) {}

From 4f4aca5f32e93729f1cc6acf6767901e47fcf4aa Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:01:35 +0000
Subject: [PATCH 02/10] vortex-row: add SortField and RowEncodeOptions

Introduce the per-column sort-field options and the variadic-function
options struct used by the upcoming RowSize / RowEncode scalar functions.

`RowEncodeOptions::fields` uses a `SmallVec<[SortField; 4]>` so typical
1-4 column keys avoid a heap allocation. Includes a compact serialize /
deserialize helper used later by the scalar-function metadata round-trip.

Signed-off-by: Claude <noreply@anthropic.com>
---
 Cargo.lock                 |   2 +
 vortex-row/Cargo.toml      |   2 +
 vortex-row/public-api.lock | 156 ++++++++++++++++++++++++++++++++++++
 vortex-row/src/lib.rs      |   4 +
 vortex-row/src/options.rs  | 157 +++++++++++++++++++++++++++++++++++++
 5 files changed, 321 insertions(+)
 create mode 100644 vortex-row/src/options.rs

diff --git a/Cargo.lock b/Cargo.lock
index 63a608277d3..ce2b4e6d41b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11038,6 +11038,8 @@ dependencies = [
 name = "vortex-row"
 version = "0.1.0"
 dependencies = [
+ "smallvec",
+ "vortex-error",
  "vortex-session",
 ]
 
diff --git a/vortex-row/Cargo.toml b/vortex-row/Cargo.toml
index 7515715392c..3e314fd7697 100644
--- a/vortex-row/Cargo.toml
+++ b/vortex-row/Cargo.toml
@@ -17,4 +17,6 @@ version = { workspace = true }
 workspace = true
 
 [dependencies]
+smallvec = { workspace = true }
+vortex-error = { workspace = true }
 vortex-session = { workspace = true }
diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index d507aa46a00..998a7712f2d 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -1,3 +1,159 @@
 pub mod vortex_row
 
+pub mod vortex_row::options
+
+pub struct vortex_row::options::RowEncodeOptions
+
+pub vortex_row::options::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
+
+impl vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self
+
+impl core::clone::Clone for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions
+
+impl core::cmp::Eq for vortex_row::options::RowEncodeOptions
+
+impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool
+
+impl core::fmt::Debug for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::fmt::Display for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::hash::Hash for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)
+
+impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions
+
+pub struct vortex_row::options::SortField
+
+pub vortex_row::options::SortField::descending: bool
+
+pub vortex_row::options::SortField::nulls_first: bool
+
+impl vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::new(bool, bool) -> Self
+
+pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8
+
+pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8
+
+impl core::clone::Clone for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField
+
+impl core::cmp::Eq for vortex_row::options::SortField
+
+impl core::cmp::PartialEq for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool
+
+impl core::default::Default for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::default() -> Self
+
+impl core::fmt::Debug for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::fmt::Display for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::hash::Hash for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)
+
+impl core::marker::Copy for vortex_row::options::SortField
+
+impl core::marker::StructuralPartialEq for vortex_row::options::SortField
+
+pub const vortex_row::options::FIELDS_INLINE: usize
+
+pub struct vortex_row::RowEncodeOptions
+
+pub vortex_row::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
+
+impl vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self
+
+impl core::clone::Clone for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions
+
+impl core::cmp::Eq for vortex_row::options::RowEncodeOptions
+
+impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool
+
+impl core::fmt::Debug for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::fmt::Display for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::hash::Hash for vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)
+
+impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions
+
+pub struct vortex_row::SortField
+
+pub vortex_row::SortField::descending: bool
+
+pub vortex_row::SortField::nulls_first: bool
+
+impl vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::new(bool, bool) -> Self
+
+pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8
+
+pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8
+
+impl core::clone::Clone for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField
+
+impl core::cmp::Eq for vortex_row::options::SortField
+
+impl core::cmp::PartialEq for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool
+
+impl core::default::Default for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::default() -> Self
+
+impl core::fmt::Debug for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::fmt::Display for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::hash::Hash for vortex_row::options::SortField
+
+pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)
+
+impl core::marker::Copy for vortex_row::options::SortField
+
+impl core::marker::StructuralPartialEq for vortex_row::options::SortField
+
 pub fn vortex_row::initialize(&vortex_session::VortexSession)
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index f675ca12f4d..9e62f25caf2 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -6,6 +6,10 @@
 //! Subsequent commits add the encoder, decoder helpers, and per-encoding fast paths.
 //! This commit only establishes the crate skeleton and an `initialize` stub.
 
+pub mod options;
+
+pub use options::RowEncodeOptions;
+pub use options::SortField;
 use vortex_session::VortexSession;
 
 /// Register the row-encoding scalar functions on the given session.
diff --git a/vortex-row/src/options.rs b/vortex-row/src/options.rs
new file mode 100644
index 00000000000..a9e5e2b18ab
--- /dev/null
+++ b/vortex-row/src/options.rs
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use std::fmt::Display;
+use std::fmt::Formatter;
+
+use smallvec::SmallVec;
+
+/// Per-column options for the row-oriented byte encoder.
+///
+/// These options control how a single column is encoded into row bytes:
+/// - `descending`: if true, the encoded value bytes are bit-inverted so that
+///   lexicographic byte comparison reflects the reverse of the natural ordering.
+///   The null sentinel byte is NOT inverted, so nulls keep their requested
+///   position relative to non-nulls.
+/// - `nulls_first`: if true, nulls sort before non-nulls. If false, nulls sort
+///   after non-nulls. Implemented via the sentinel byte that precedes every
+///   value's encoded bytes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SortField {
+    /// If true, encoded value bytes are bit-inverted so lexicographic byte
+    /// comparison reflects the reverse of the natural ordering.
+    pub descending: bool,
+    /// If true, nulls sort before non-null values; otherwise nulls sort after.
+    pub nulls_first: bool,
+}
+
+impl Default for SortField {
+    fn default() -> Self {
+        Self {
+            descending: false,
+            nulls_first: true,
+        }
+    }
+}
+
+impl Display for SortField {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "descending={}, nulls_first={}",
+            self.descending, self.nulls_first
+        )
+    }
+}
+
+impl SortField {
+    /// Construct a new `SortField` with explicit options.
+    pub fn new(descending: bool, nulls_first: bool) -> Self {
+        Self {
+            descending,
+            nulls_first,
+        }
+    }
+
+    /// Returns the sentinel byte to write for a non-null value.
+    #[inline]
+    pub fn non_null_sentinel(&self) -> u8 {
+        // Non-null is always 0x01. Null choices are < or > 0x01.
+        0x01
+    }
+
+    /// Returns the sentinel byte to write for a null value.
+    #[inline]
+    pub fn null_sentinel(&self) -> u8 {
+        if self.nulls_first {
+            // Nulls before non-nulls (smaller byte sorts first).
+            0x00
+        } else {
+            // Nulls after non-nulls (larger byte sorts later).
+            0x02
+        }
+    }
+}
+
+/// Inline capacity for [`RowEncodeOptions::fields`]. Up to this many [`SortField`]s
+/// are held inline without a heap allocation; beyond, the storage spills.
+pub const FIELDS_INLINE: usize = 4;
+
+/// Options for the variadic [`RowSize`] and [`RowEncode`] scalar functions:
+/// one [`SortField`] per input column.
+///
+/// Stored in a [`SmallVec`] so that typical 1–4 column keys avoid a heap
+/// allocation; longer field lists spill to the heap transparently.
+///
+/// [`RowSize`]: super::size::RowSize
+/// [`RowEncode`]: super::encode::RowEncode
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct RowEncodeOptions {
+    /// Per-column sort fields, in left-to-right column order.
+    pub fields: SmallVec<[SortField; FIELDS_INLINE]>,
+}
+
+impl RowEncodeOptions {
+    /// Construct a new `RowEncodeOptions` from any iterator of [`SortField`]s.
+    pub fn new(fields: impl IntoIterator<Item = SortField>) -> Self {
+        Self {
+            fields: fields.into_iter().collect(),
+        }
+    }
+}
+
+impl Display for RowEncodeOptions {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[")?;
+        for (i, field) in self.fields.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{}", field)?;
+        }
+        write!(f, "]")
+    }
+}
+
+/// Serialize a [`RowEncodeOptions`] to a compact byte vector: 4-byte LE length followed by
+/// `2 * len` bytes (descending + nulls_first booleans for each field).
+pub(crate) fn serialize_row_encode_options(opts: &RowEncodeOptions) -> Vec<u8> {
+    use vortex_error::VortexExpect;
+    let n =
+        u32::try_from(opts.fields.len()).vortex_expect("RowEncodeOptions length must fit in u32");
+    let mut out = Vec::with_capacity(4 + 2 * opts.fields.len());
+    out.extend_from_slice(&n.to_le_bytes());
+    for f in &opts.fields {
+        out.push(u8::from(f.descending));
+        out.push(u8::from(f.nulls_first));
+    }
+    out
+}
+
+/// Deserialize a [`RowEncodeOptions`] produced by [`serialize_row_encode_options`].
+pub(crate) fn deserialize_row_encode_options(
+    bytes: &[u8],
+) -> vortex_error::VortexResult<RowEncodeOptions> {
+    if bytes.len() < 4 {
+        vortex_error::vortex_bail!("RowEncodeOptions metadata must contain a 4-byte length prefix");
+    }
+    let n = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
+    let expected = 4 + 2 * n;
+    if bytes.len() != expected {
+        vortex_error::vortex_bail!(
+            "RowEncodeOptions metadata wrong size: got {}, expected {}",
+            bytes.len(),
+            expected
+        );
+    }
+    let mut fields: SmallVec<[SortField; FIELDS_INLINE]> = SmallVec::with_capacity(n);
+    let mut i = 4;
+    for _ in 0..n {
+        fields.push(SortField {
+            descending: bytes[i] != 0,
+            nulls_first: bytes[i + 1] != 0,
+        });
+        i += 2;
+    }
+    Ok(RowEncodeOptions { fields })
+}

From 1b7af91e7c0b1fa4269b1768809f20c7ff2329b9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:04:03 +0000
Subject: [PATCH 03/10] vortex-row: codec for fixed-width canonical types

Add the byte-encoding kernels for the fixed-width portion of the row
encoder: Null, Bool, Primitive (12 PTypes), and Decimal (i8..i128). Each
encoder writes a 1-byte sentinel followed by the value's row-comparable
bytes (sign-flipped big-endian for signed ints, sign-aware mask for
floats, etc.).

The size pass is a constant `width-per-row` add for these types; the
encode pass walks rows and writes into the shared output buffer at
`offsets[i] + cursors[i]`. `row_width_for_dtype` classifies the column
based purely on its DType.

Scalar-level encoders (`encode_scalar_primitive` / `encode_scalar_bool`
/ `encode_scalar_null` / `encode_scalar` / `encoded_size_for_scalar`)
are included for the same fixed-width subset; varlen and nested
canonical variants bail with a clear "not yet supported" error and
land in follow-up commits.

The implementation is deliberately the simplest correct version:
bounds-checked array indexing, no `copy_nonoverlapping`, no validity
fast-path helper. Subsequent PRs evolve this toward the optimized form.

Signed-off-by: Claude <noreply@anthropic.com>
---
 Cargo.lock                 |   4 +
 vortex-row/Cargo.toml      |   4 +
 vortex-row/public-api.lock | 100 ++++++
 vortex-row/src/codec.rs    | 667 +++++++++++++++++++++++++++++++++++++
 vortex-row/src/lib.rs      |   1 +
 5 files changed, 776 insertions(+)
 create mode 100644 vortex-row/src/codec.rs

diff --git a/Cargo.lock b/Cargo.lock
index ce2b4e6d41b..0b84f6dd260 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11038,8 +11038,12 @@ dependencies = [
 name = "vortex-row"
 version = "0.1.0"
 dependencies = [
+ "bytes",
  "smallvec",
+ "vortex-array",
+ "vortex-buffer",
  "vortex-error",
+ "vortex-mask",
  "vortex-session",
 ]
 
diff --git a/vortex-row/Cargo.toml b/vortex-row/Cargo.toml
index 3e314fd7697..aaed9a55f51 100644
--- a/vortex-row/Cargo.toml
+++ b/vortex-row/Cargo.toml
@@ -17,6 +17,10 @@ version = { workspace = true }
 workspace = true
 
 [dependencies]
+bytes = { workspace = true }
 smallvec = { workspace = true }
+vortex-array = { workspace = true }
+vortex-buffer = { workspace = true }
 vortex-error = { workspace = true }
+vortex-mask = { workspace = true }
 vortex-session = { workspace = true }
diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index 998a7712f2d..4990e30ba16 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -1,5 +1,105 @@
 pub mod vortex_row
 
+pub mod vortex_row::codec
+
+pub enum vortex_row::codec::RowWidth
+
+pub vortex_row::codec::RowWidth::Fixed(u32)
+
+pub vortex_row::codec::RowWidth::Variable
+
+impl core::clone::Clone for vortex_row::codec::RowWidth
+
+pub fn vortex_row::codec::RowWidth::clone(&self) -> vortex_row::codec::RowWidth
+
+impl core::cmp::Eq for vortex_row::codec::RowWidth
+
+impl core::cmp::PartialEq for vortex_row::codec::RowWidth
+
+pub fn vortex_row::codec::RowWidth::eq(&self, &vortex_row::codec::RowWidth) -> bool
+
+impl core::fmt::Debug for vortex_row::codec::RowWidth
+
+pub fn vortex_row::codec::RowWidth::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::marker::Copy for vortex_row::codec::RowWidth
+
+impl core::marker::StructuralPartialEq for vortex_row::codec::RowWidth
+
+pub const vortex_row::codec::BOOL_ENCODED_SIZE: u32
+
+pub const vortex_row::codec::VARLEN_BLOCK_SIZE: usize
+
+pub const vortex_row::codec::VARLEN_BLOCK_TOTAL: usize
+
+pub trait vortex_row::codec::RowEncode: core::marker::Copy
+
+pub fn vortex_row::codec::RowEncode::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for f32
+
+pub fn f32::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for f64
+
+pub fn f64::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for half::binary16::f16
+
+pub fn half::binary16::f16::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for i128
+
+pub fn i128::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for i16
+
+pub fn i16::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for i32
+
+pub fn i32::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for i64
+
+pub fn i64::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for i8
+
+pub fn i8::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for u16
+
+pub fn u16::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for u32
+
+pub fn u32::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for u64
+
+pub fn u64::encode_to(self, &mut [u8], bool)
+
+impl vortex_row::codec::RowEncode for u8
+
+pub fn u8::encode_to(self, &mut [u8], bool)
+
+pub fn vortex_row::codec::encode_scalar(&vortex_array::scalar::Scalar, vortex_row::options::SortField) -> vortex_error::VortexResult<bytes::bytes::Bytes>
+
+pub fn vortex_row::codec::encode_scalar_bool(core::option::Option<bool>, vortex_row::options::SortField, &mut vortex_buffer::ByteBufferMut)
+
+pub fn vortex_row::codec::encode_scalar_null(vortex_row::options::SortField, bool, &mut vortex_buffer::ByteBufferMut)
+
+pub fn vortex_row::codec::encode_scalar_primitive(vortex_array::dtype::ptype::PType, vortex_array::scalar::typed_view::primitive::pvalue::PValue, vortex_row::options::SortField, bool, &mut vortex_buffer::ByteBufferMut) -> vortex_error::VortexResult<()>
+
+pub fn vortex_row::codec::encoded_size_for_scalar(&vortex_array::scalar::Scalar, vortex_row::options::SortField) -> vortex_error::VortexResult<u32>
+
+pub fn vortex_row::codec::field_encode(&vortex_array::canonical::Canonical, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
+
+pub fn vortex_row::codec::field_size(&vortex_array::canonical::Canonical, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
+
+pub fn vortex_row::codec::row_width_for_dtype(&vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_row::codec::RowWidth>
+
 pub mod vortex_row::options
 
 pub struct vortex_row::options::RowEncodeOptions
diff --git a/vortex-row/src/codec.rs b/vortex-row/src/codec.rs
new file mode 100644
index 00000000000..73aa7a37db4
--- /dev/null
+++ b/vortex-row/src/codec.rs
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+#![allow(
+    clippy::cast_possible_truncation,
+    clippy::expect_used,
+    reason = "row encoding indexes into u32-sized buffers; lengths are validated to fit in u32 elsewhere"
+)]
+
+//! Pure byte-encoding kernels for row-oriented output, operating on `Canonical` variants.
+//!
+//! The encoded byte format produces a lexicographically byte-comparable representation:
+//! comparing the byte slices of two encoded rows yields the same ordering as the
+//! original logical (tuple) comparison of their values, modulo nulls placement and
+//! descending-ness as configured by [`SortField`].
+//!
+//! Conventions:
+//! - Every value is preceded by a 1-byte sentinel that orders nulls relative to non-nulls.
+//! - For `descending`, only the **value** bytes are bit-inverted (XOR with 0xFF), not the
+//!   sentinel.
+//! - Fixed-width integers are big-endian, with the sign bit flipped for signed types.
+//! - Floats are bit-pattern big-endian with sign-aware mask: non-negative flips the top
+//!   bit; negative flips all bits.
+//!
+//! This commit covers only the fixed-width canonical variants (Null, Bool, Primitive,
+//! Decimal); variable-length and nested canonical variants land in later commits.
+
+use vortex_array::Canonical;
+use vortex_array::ExecutionCtx;
+use vortex_array::arrays::BoolArray;
+use vortex_array::arrays::DecimalArray;
+use vortex_array::arrays::NullArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::dtype::DType;
+use vortex_array::dtype::DecimalType;
+use vortex_array::dtype::NativePType;
+use vortex_array::dtype::PType;
+use vortex_array::dtype::half::f16;
+use vortex_array::match_each_native_ptype;
+use vortex_buffer::ByteBufferMut;
+use vortex_error::VortexResult;
+use vortex_error::vortex_bail;
+
+use crate::options::SortField;
+
+/// Size in bytes of the encoded form of a single bool value (sentinel + 1 content byte).
+pub const BOOL_ENCODED_SIZE: u32 = 2;
+
+/// Block size used in the variable-length encoding.
+pub const VARLEN_BLOCK_SIZE: usize = 32;
+/// Total bytes per varlen block including the trailing continuation marker.
+pub const VARLEN_BLOCK_TOTAL: usize = VARLEN_BLOCK_SIZE + 1;
+
+/// Returns the size in bytes of the encoded form of a variable-length value of the given length.
+#[inline]
+#[allow(
+    dead_code,
+    reason = "used once varlen support lands in a follow-up commit"
+)]
+fn encoded_size_for_varlen(len: usize) -> u32 {
+    // 1 sentinel + ceil(len/32)*33 content bytes (or 1 zero terminator if empty)
+    if len == 0 {
+        1 + 1
+    } else {
+        let blocks = len.div_ceil(VARLEN_BLOCK_SIZE);
+        1 + (blocks as u32) * (VARLEN_BLOCK_TOTAL as u32)
+    }
+}
+
+/// Constant per-row size in bytes for fixed-width encodings (including 1-byte sentinel).
+#[inline]
+const fn encoded_size_for_fixed(value_bytes: u32) -> u32 {
+    1 + value_bytes
+}
+
+/// Per-row width classification for a column.
+///
+/// `Fixed(w)` means every row encodes to exactly `w` bytes (sentinel + value), regardless
+/// of null-ness or value. `Variable` means per-row sizes depend on the data (Utf8/Binary,
+/// List, or any composite that recurses through a variable-width field).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum RowWidth {
+    /// Per-row width is the same constant for every row in the column.
+    Fixed(u32),
+    /// Per-row width is data-dependent.
+    Variable,
+}
+
+/// Classify a column's per-row encoded width by inspecting only its [`DType`].
+///
+/// Returns `Fixed(w)` when every row encodes to exactly `w` bytes (sentinel + value),
+/// regardless of null-ness or value. Returns `Variable` when per-row sizes depend on the
+/// data.
+///
+/// Classification does not depend on the [`SortField`]: null-vs-non-null encoding width is
+/// the same for fixed-width types (the sentinel byte plus zero-fill for nulls).
+///
+/// # Errors
+///
+/// Returns an error for dtypes that the row encoder does not yet support. Variable-length
+/// dtypes (Utf8/Binary), nested dtypes (Struct/FixedSizeList/Extension), and
+/// Variant/Union/List arrive in later commits.
+pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
+    match dtype {
+        DType::Null => Ok(RowWidth::Fixed(1)),
+        DType::Bool(_) => Ok(RowWidth::Fixed(BOOL_ENCODED_SIZE)),
+        DType::Primitive(ptype, _) => Ok(RowWidth::Fixed(encoded_size_for_fixed(
+            ptype.byte_width() as u32,
+        ))),
+        DType::Decimal(dt, _) => {
+            let vt = DecimalType::smallest_decimal_value_type(dt);
+            Ok(RowWidth::Fixed(encoded_size_for_fixed(
+                vt.byte_width() as u32
+            )))
+        }
+        DType::Utf8(_) | DType::Binary(_) => {
+            vortex_bail!("row encoding for {} is not yet supported", dtype)
+        }
+        DType::Struct(..) | DType::FixedSizeList(..) | DType::List(..) | DType::Extension(..) => {
+            vortex_bail!("row encoding for {} is not yet supported", dtype)
+        }
+        DType::Variant(_) => {
+            vortex_bail!("row encoding does not support Variant arrays (no well-defined ordering)")
+        }
+        DType::Union(_) => vortex_bail!("row encoding does not support Union arrays"),
+    }
+}
+
+/// Compute the per-row size in bytes for the given canonical view, adding into `sizes`.
+///
+/// `sizes` is expected to be initialized (typically zeroed). This function *adds* the
+/// per-row size to each entry so multiple columns can accumulate into the same buffer.
+///
+/// # Errors
+///
+/// Returns an error for unsupported canonical variants. Variable-length and nested
+/// variants land in later commits.
+pub fn field_size(
+    canonical: &Canonical,
+    _field: SortField,
+    sizes: &mut [u32],
+    _ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    match canonical {
+        Canonical::Null(arr) => add_size_null(arr, sizes),
+        Canonical::Bool(_) => add_size_const(sizes, encoded_size_for_fixed(1)),
+        Canonical::Primitive(arr) => add_size_primitive(arr, sizes),
+        Canonical::Decimal(arr) => add_size_decimal(arr, sizes),
+        Canonical::VarBinView(_)
+        | Canonical::Struct(_)
+        | Canonical::FixedSizeList(_)
+        | Canonical::Extension(_)
+        | Canonical::List(_) => vortex_bail!(
+            "row encoding does not yet support canonical type {:?}",
+            canonical.dtype()
+        ),
+        Canonical::Variant(_) => {
+            vortex_bail!("row encoding does not support Variant arrays (no well-defined ordering)")
+        }
+    }
+    Ok(())
+}
+
+/// Encode each row's bytes for the given canonical view into `out`, writing starting at
+/// `offsets[i] + cursors[i]` for row `i` and advancing `cursors[i]` by the number of
+/// bytes written.
+///
+/// After this call returns successfully, `cursors[i]` will have advanced by exactly the
+/// per-row contribution previously computed by [`field_size`] for the same column.
+pub fn field_encode(
+    canonical: &Canonical,
+    field: SortField,
+    offsets: &[u32],
+    cursors: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    match canonical {
+        Canonical::Null(arr) => encode_null(arr, field, offsets, cursors, out),
+        Canonical::Bool(arr) => encode_bool(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::Primitive(arr) => encode_primitive(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::Decimal(arr) => encode_decimal(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::VarBinView(_)
+        | Canonical::Struct(_)
+        | Canonical::FixedSizeList(_)
+        | Canonical::Extension(_)
+        | Canonical::List(_) => vortex_bail!(
+            "row encoding does not yet support canonical type {:?}",
+            canonical.dtype()
+        ),
+        Canonical::Variant(_) => {
+            vortex_bail!("row encoding does not support Variant arrays (no well-defined ordering)")
+        }
+    }
+    Ok(())
+}
+
+fn add_size_const(sizes: &mut [u32], add: u32) {
+    for s in sizes.iter_mut() {
+        *s += add;
+    }
+}
+
+fn add_size_null(arr: &NullArray, sizes: &mut [u32]) {
+    debug_assert_eq!(arr.len(), sizes.len());
+    // Just a sentinel byte per row.
+    for s in sizes.iter_mut() {
+        *s += 1;
+    }
+}
+
+fn add_size_primitive(arr: &PrimitiveArray, sizes: &mut [u32]) {
+    let width = arr.ptype().byte_width() as u32;
+    add_size_const(sizes, encoded_size_for_fixed(width));
+}
+
+fn add_size_decimal(arr: &DecimalArray, sizes: &mut [u32]) {
+    let width = arr.values_type().byte_width() as u32;
+    add_size_const(sizes, encoded_size_for_fixed(width));
+}
+
+fn encode_null(
+    arr: &NullArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+) {
+    let sentinel = field.null_sentinel();
+    for i in 0..arr.len() {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        out[pos] = sentinel;
+        col_offset[i] += 1;
+    }
+}
+
+fn encode_bool(
+    arr: &BoolArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
+    let bits = arr.clone().into_bit_buffer();
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+    let xor = if field.descending { 0xFF } else { 0x00 };
+    for i in 0..bits.len() {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        if mask.value(i) {
+            out[pos] = non_null;
+            // false=0x01, true=0x02 so false < true; XOR for descending
+            let raw = if bits.value(i) { 0x02u8 } else { 0x01u8 };
+            out[pos + 1] = raw ^ xor;
+        } else {
+            out[pos] = null;
+            out[pos + 1] = 0;
+        }
+        col_offset[i] += BOOL_ENCODED_SIZE;
+    }
+    Ok(())
+}
+
+fn encode_primitive(
+    arr: &PrimitiveArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    match_each_native_ptype!(arr.ptype(), |T| {
+        encode_primitive_typed::<T>(arr, field, row_offsets, col_offset, out, ctx)?;
+    });
+    Ok(())
+}
+
+fn encode_primitive_typed<T: NativePType + RowEncode>(
+    arr: &PrimitiveArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
+    let slice: &[T] = arr.as_slice();
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+    let value_bytes = size_of::<T>();
+    for (i, &v) in slice.iter().enumerate() {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        if mask.value(i) {
+            out[pos] = non_null;
+            v.encode_to(&mut out[pos + 1..pos + 1 + value_bytes], field.descending);
+        } else {
+            out[pos] = null;
+            // Zero-fill the value bytes.
+            for b in &mut out[pos + 1..pos + 1 + value_bytes] {
+                *b = 0;
+            }
+        }
+        col_offset[i] += encoded_size_for_fixed(value_bytes as u32);
+    }
+    Ok(())
+}
+
+fn encode_decimal(
+    arr: &DecimalArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
+    match arr.values_type() {
+        DecimalType::I8 => {
+            encode_decimal_typed::<i8>(arr, &mask, field, row_offsets, col_offset, out)
+        }
+        DecimalType::I16 => {
+            encode_decimal_typed::<i16>(arr, &mask, field, row_offsets, col_offset, out)
+        }
+        DecimalType::I32 => {
+            encode_decimal_typed::<i32>(arr, &mask, field, row_offsets, col_offset, out)
+        }
+        DecimalType::I64 => {
+            encode_decimal_typed::<i64>(arr, &mask, field, row_offsets, col_offset, out)
+        }
+        DecimalType::I128 => {
+            encode_decimal_typed::<i128>(arr, &mask, field, row_offsets, col_offset, out)
+        }
+        DecimalType::I256 => {
+            vortex_bail!("row encoding for Decimal256 is not yet implemented")
+        }
+    }
+    Ok(())
+}
+
+fn encode_decimal_typed<T>(
+    arr: &DecimalArray,
+    mask: &vortex_mask::Mask,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+) where
+    T: vortex_array::dtype::NativeDecimalType + RowEncode,
+{
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+    let value_bytes = size_of::<T>();
+    let total = encoded_size_for_fixed(value_bytes as u32);
+    let slice = arr.buffer::<T>();
+    for i in 0..slice.len() {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        if mask.value(i) {
+            out[pos] = non_null;
+            slice[i].encode_to(&mut out[pos + 1..pos + 1 + value_bytes], field.descending);
+        } else {
+            out[pos] = null;
+            for b in &mut out[pos + 1..pos + 1 + value_bytes] {
+                *b = 0;
+            }
+        }
+        col_offset[i] += total;
+    }
+}
+
+/// Internal trait for encoding a fixed-width native value into byte slots.
+///
+/// Implementations must produce a sequence of `size_of::<Self>()` bytes that is
+/// lexicographically byte-comparable according to the natural ordering of the type.
+pub trait RowEncode: Copy {
+    /// Encode this value into `out`, inverting the bytes for descending order.
+    fn encode_to(self, out: &mut [u8], descending: bool);
+}
+
+macro_rules! impl_row_encode_unsigned {
+    ($t:ty) => {
+        impl RowEncode for $t {
+            #[inline]
+            fn encode_to(self, out: &mut [u8], descending: bool) {
+                let bytes = self.to_be_bytes();
+                if descending {
+                    for (i, b) in bytes.iter().enumerate() {
+                        out[i] = b ^ 0xFF;
+                    }
+                } else {
+                    out.copy_from_slice(&bytes);
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_row_encode_signed {
+    ($t:ty) => {
+        impl RowEncode for $t {
+            #[inline]
+            fn encode_to(self, out: &mut [u8], descending: bool) {
+                let mut bytes = self.to_be_bytes();
+                // Flip sign bit so negatives < non-negatives lexicographically.
+                bytes[0] ^= 0x80;
+                if descending {
+                    for (i, b) in bytes.iter().enumerate() {
+                        out[i] = b ^ 0xFF;
+                    }
+                } else {
+                    out.copy_from_slice(&bytes);
+                }
+            }
+        }
+    };
+}
+
+impl_row_encode_unsigned!(u8);
+impl_row_encode_unsigned!(u16);
+impl_row_encode_unsigned!(u32);
+impl_row_encode_unsigned!(u64);
+impl_row_encode_signed!(i8);
+impl_row_encode_signed!(i16);
+impl_row_encode_signed!(i32);
+impl_row_encode_signed!(i64);
+impl_row_encode_signed!(i128);
+
+impl RowEncode for f32 {
+    fn encode_to(self, out: &mut [u8], descending: bool) {
+        let bits = self.to_bits();
+        let mask: u32 = if (bits >> 31) == 0 {
+            0x8000_0000
+        } else {
+            0xFFFF_FFFF
+        };
+        let mut bytes = (bits ^ mask).to_be_bytes();
+        if descending {
+            for b in bytes.iter_mut() {
+                *b ^= 0xFF;
+            }
+        }
+        out.copy_from_slice(&bytes);
+    }
+}
+
+impl RowEncode for f64 {
+    fn encode_to(self, out: &mut [u8], descending: bool) {
+        let bits = self.to_bits();
+        let mask: u64 = if (bits >> 63) == 0 {
+            0x8000_0000_0000_0000
+        } else {
+            0xFFFF_FFFF_FFFF_FFFF
+        };
+        let mut bytes = (bits ^ mask).to_be_bytes();
+        if descending {
+            for b in bytes.iter_mut() {
+                *b ^= 0xFF;
+            }
+        }
+        out.copy_from_slice(&bytes);
+    }
+}
+
+impl RowEncode for f16 {
+    fn encode_to(self, out: &mut [u8], descending: bool) {
+        let bits = self.to_bits();
+        let mask: u16 = if (bits >> 15) == 0 { 0x8000 } else { 0xFFFF };
+        let mut bytes = (bits ^ mask).to_be_bytes();
+        if descending {
+            for b in bytes.iter_mut() {
+                *b ^= 0xFF;
+            }
+        }
+        out.copy_from_slice(&bytes);
+    }
+}
+
+/// Encode a single scalar primitive value of a known PType into a buffer slot.
+pub fn encode_scalar_primitive(
+    ptype: PType,
+    value: vortex_array::scalar::PValue,
+    field: SortField,
+    is_null: bool,
+    out: &mut ByteBufferMut,
+) -> VortexResult<()> {
+    if is_null {
+        out.push(field.null_sentinel());
+        return Ok(());
+    }
+    out.push(field.non_null_sentinel());
+    let width = ptype.byte_width();
+    let mut tmp = [0u8; 16];
+    let buf = &mut tmp[..width];
+    match_each_native_ptype!(
+        ptype,
+        integral: |T| {
+            let v: T = T::try_from(value)?;
+            v.encode_to(buf, field.descending);
+        },
+        floating: |T| {
+            let v: T = T::try_from(value)?;
+            v.encode_to(buf, field.descending);
+        }
+    );
+    out.extend_from_slice(buf);
+    Ok(())
+}
+
+/// Encode a single boolean value.
+pub fn encode_scalar_bool(value: Option<bool>, field: SortField, out: &mut ByteBufferMut) {
+    match value {
+        None => {
+            out.push(field.null_sentinel());
+            out.push(0);
+        }
+        Some(b) => {
+            out.push(field.non_null_sentinel());
+            let raw = if b { 0x02u8 } else { 0x01u8 };
+            let xor = if field.descending { 0xFFu8 } else { 0 };
+            out.push(raw ^ xor);
+        }
+    }
+}
+
+/// Encode a single null-type value (only the sentinel).
+pub fn encode_scalar_null(field: SortField, is_null: bool, out: &mut ByteBufferMut) {
+    if is_null {
+        out.push(field.null_sentinel());
+    } else {
+        out.push(field.non_null_sentinel());
+    }
+}
+
+/// Returns the per-row encoded size for a scalar value (used for the Constant fast path).
+pub fn encoded_size_for_scalar(
+    scalar: &vortex_array::scalar::Scalar,
+    _field: SortField,
+) -> VortexResult<u32> {
+    if scalar.is_null() {
+        match scalar.dtype() {
+            DType::Null => Ok(1),
+            DType::Bool(_) => Ok(BOOL_ENCODED_SIZE),
+            DType::Primitive(ptype, _) => Ok(encoded_size_for_fixed(ptype.byte_width() as u32)),
+            DType::Decimal(dt, _) => {
+                let vt = DecimalType::smallest_decimal_value_type(dt);
+                Ok(encoded_size_for_fixed(vt.byte_width() as u32))
+            }
+            _ => vortex_bail!(
+                "unsupported scalar dtype for row encoding: {}",
+                scalar.dtype()
+            ),
+        }
+    } else {
+        match scalar.dtype() {
+            DType::Null => Ok(1),
+            DType::Bool(_) => Ok(BOOL_ENCODED_SIZE),
+            DType::Primitive(ptype, _) => Ok(encoded_size_for_fixed(ptype.byte_width() as u32)),
+            DType::Decimal(..) => {
+                let dec = scalar.as_decimal();
+                let vt = dec
+                    .decimal_value()
+                    .map(|v| v.decimal_type())
+                    .unwrap_or(DecimalType::I128);
+                Ok(encoded_size_for_fixed(vt.byte_width() as u32))
+            }
+            _ => vortex_bail!(
+                "unsupported scalar dtype for row encoding: {}",
+                scalar.dtype()
+            ),
+        }
+    }
+}
+
+/// Encode a single scalar value into a fresh `Bytes` buffer.
+pub fn encode_scalar(
+    scalar: &vortex_array::scalar::Scalar,
+    field: SortField,
+) -> VortexResult<bytes::Bytes> {
+    use vortex_array::scalar::PValue;
+    let size = encoded_size_for_scalar(scalar, field)? as usize;
+    let mut out = ByteBufferMut::with_capacity(size);
+    if scalar.is_null() {
+        match scalar.dtype() {
+            DType::Null => out.push(field.null_sentinel()),
+            DType::Bool(_) => {
+                out.push(field.null_sentinel());
+                out.push(0);
+            }
+            DType::Primitive(ptype, _) => {
+                out.push(field.null_sentinel());
+                let width = ptype.byte_width();
+                for _ in 0..width {
+                    out.push(0);
+                }
+            }
+            DType::Decimal(dt, _) => {
+                out.push(field.null_sentinel());
+                let vt = DecimalType::smallest_decimal_value_type(dt);
+                for _ in 0..vt.byte_width() {
+                    out.push(0);
+                }
+            }
+            _ => vortex_bail!(
+                "unsupported scalar dtype for row encoding: {}",
+                scalar.dtype()
+            ),
+        }
+    } else {
+        match scalar.dtype() {
+            DType::Null => out.push(field.non_null_sentinel()),
+            DType::Bool(_) => {
+                let v = scalar.as_bool().value().unwrap_or(false);
+                encode_scalar_bool(Some(v), field, &mut out);
+            }
+            DType::Primitive(ptype, _) => {
+                let v: PValue = scalar
+                    .as_primitive()
+                    .pvalue()
+                    .ok_or_else(|| vortex_error::vortex_err!("missing primitive value"))?;
+                encode_scalar_primitive(*ptype, v, field, false, &mut out)?;
+            }
+            DType::Decimal(..) => {
+                let dec = scalar.as_decimal();
+                out.push(field.non_null_sentinel());
+                let value = dec
+                    .decimal_value()
+                    .ok_or_else(|| vortex_error::vortex_err!("missing decimal value"))?;
+                match value {
+                    vortex_array::scalar::DecimalValue::I8(v) => {
+                        let mut tmp = [0u8; 1];
+                        v.encode_to(&mut tmp, field.descending);
+                        out.extend_from_slice(&tmp);
+                    }
+                    vortex_array::scalar::DecimalValue::I16(v) => {
+                        let mut tmp = [0u8; 2];
+                        v.encode_to(&mut tmp, field.descending);
+                        out.extend_from_slice(&tmp);
+                    }
+                    vortex_array::scalar::DecimalValue::I32(v) => {
+                        let mut tmp = [0u8; 4];
+                        v.encode_to(&mut tmp, field.descending);
+                        out.extend_from_slice(&tmp);
+                    }
+                    vortex_array::scalar::DecimalValue::I64(v) => {
+                        let mut tmp = [0u8; 8];
+                        v.encode_to(&mut tmp, field.descending);
+                        out.extend_from_slice(&tmp);
+                    }
+                    vortex_array::scalar::DecimalValue::I128(v) => {
+                        let mut tmp = [0u8; 16];
+                        v.encode_to(&mut tmp, field.descending);
+                        out.extend_from_slice(&tmp);
+                    }
+                    vortex_array::scalar::DecimalValue::I256(_) => {
+                        vortex_bail!("row encoding for Decimal256 is not yet implemented")
+                    }
+                }
+            }
+            _ => vortex_bail!(
+                "unsupported scalar dtype for row encoding: {}",
+                scalar.dtype()
+            ),
+        }
+    }
+    Ok(out.freeze().into_inner())
+}
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index 9e62f25caf2..bdac4c8f48e 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -6,6 +6,7 @@
 //! Subsequent commits add the encoder, decoder helpers, and per-encoding fast paths.
 //! This commit only establishes the crate skeleton and an `initialize` stub.
 
+pub mod codec;
 pub mod options;
 
 pub use options::RowEncodeOptions;

From d3f3da4b2bf165549876b7848f6aa8c1fddb40fe Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:06:19 +0000
Subject: [PATCH 04/10] vortex-row: codec for varlen canonical types

Extend the codec to handle Utf8/Binary via VarBinView arrays. Each value
encodes as a 1-byte sentinel followed by 32-byte chunks: every full
chunk has a 0xFF continuation marker; the final partial chunk pads with
zeros and writes the partial length (1..=32) as its trailing byte.

`encode_varlen_value` uses the simple byte-at-a-time XOR loop here; a
faster `copy_nonoverlapping` + stamped continuation version replaces it
in PR 2. `encode_varbinview` uses `arr.with_iterator(...)` for both the
nullable and non-nullable branches; a direct view walk for the no-nulls
branch lands in PR 2 too.

`row_width_for_dtype` now returns `Variable` for Utf8/Binary; the size
pass and encode dispatchers route through `add_size_varbinview` /
`encode_varbinview` correspondingly. The scalar encoder gains
`encode_scalar_varlen` and the matching Utf8/Binary arms.

Signed-off-by: Claude <noreply@anthropic.com>
---
 vortex-row/public-api.lock |   2 +
 vortex-row/src/codec.rs    | 147 ++++++++++++++++++++++++++++++++++---
 2 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index 4990e30ba16..1afc1f05442 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -92,6 +92,8 @@ pub fn vortex_row::codec::encode_scalar_null(vortex_row::options::SortField, boo
 
 pub fn vortex_row::codec::encode_scalar_primitive(vortex_array::dtype::ptype::PType, vortex_array::scalar::typed_view::primitive::pvalue::PValue, vortex_row::options::SortField, bool, &mut vortex_buffer::ByteBufferMut) -> vortex_error::VortexResult<()>
 
+pub fn vortex_row::codec::encode_scalar_varlen(core::option::Option<&[u8]>, vortex_row::options::SortField, &mut vortex_buffer::ByteBufferMut)
+
 pub fn vortex_row::codec::encoded_size_for_scalar(&vortex_array::scalar::Scalar, vortex_row::options::SortField) -> vortex_error::VortexResult<u32>
 
 pub fn vortex_row::codec::field_encode(&vortex_array::canonical::Canonical, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
diff --git a/vortex-row/src/codec.rs b/vortex-row/src/codec.rs
index 73aa7a37db4..4f70d80e5ae 100644
--- a/vortex-row/src/codec.rs
+++ b/vortex-row/src/codec.rs
@@ -27,10 +27,12 @@
 
 use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
+use vortex_array::accessor::ArrayAccessor;
 use vortex_array::arrays::BoolArray;
 use vortex_array::arrays::DecimalArray;
 use vortex_array::arrays::NullArray;
 use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::VarBinViewArray;
 use vortex_array::dtype::DType;
 use vortex_array::dtype::DecimalType;
 use vortex_array::dtype::NativePType;
@@ -53,10 +55,6 @@ pub const VARLEN_BLOCK_TOTAL: usize = VARLEN_BLOCK_SIZE + 1;
 
 /// Returns the size in bytes of the encoded form of a variable-length value of the given length.
 #[inline]
-#[allow(
-    dead_code,
-    reason = "used once varlen support lands in a follow-up commit"
-)]
 fn encoded_size_for_varlen(len: usize) -> u32 {
     // 1 sentinel + ceil(len/32)*33 content bytes (or 1 zero terminator if empty)
     if len == 0 {
@@ -113,9 +111,7 @@ pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
                 vt.byte_width() as u32
             )))
         }
-        DType::Utf8(_) | DType::Binary(_) => {
-            vortex_bail!("row encoding for {} is not yet supported", dtype)
-        }
+        DType::Utf8(_) | DType::Binary(_) => Ok(RowWidth::Variable),
         DType::Struct(..) | DType::FixedSizeList(..) | DType::List(..) | DType::Extension(..) => {
             vortex_bail!("row encoding for {} is not yet supported", dtype)
         }
@@ -139,15 +135,15 @@ pub fn field_size(
     canonical: &Canonical,
     _field: SortField,
     sizes: &mut [u32],
-    _ctx: &mut ExecutionCtx,
+    ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
     match canonical {
         Canonical::Null(arr) => add_size_null(arr, sizes),
         Canonical::Bool(_) => add_size_const(sizes, encoded_size_for_fixed(1)),
         Canonical::Primitive(arr) => add_size_primitive(arr, sizes),
         Canonical::Decimal(arr) => add_size_decimal(arr, sizes),
-        Canonical::VarBinView(_)
-        | Canonical::Struct(_)
+        Canonical::VarBinView(arr) => add_size_varbinview(arr, sizes, ctx)?,
+        Canonical::Struct(_)
         | Canonical::FixedSizeList(_)
         | Canonical::Extension(_)
         | Canonical::List(_) => vortex_bail!(
@@ -180,8 +176,8 @@ pub fn field_encode(
         Canonical::Bool(arr) => encode_bool(arr, field, offsets, cursors, out, ctx)?,
         Canonical::Primitive(arr) => encode_primitive(arr, field, offsets, cursors, out, ctx)?,
         Canonical::Decimal(arr) => encode_decimal(arr, field, offsets, cursors, out, ctx)?,
-        Canonical::VarBinView(_)
-        | Canonical::Struct(_)
+        Canonical::VarBinView(arr) => encode_varbinview(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::Struct(_)
         | Canonical::FixedSizeList(_)
         | Canonical::Extension(_)
         | Canonical::List(_) => vortex_bail!(
@@ -219,6 +215,25 @@ fn add_size_decimal(arr: &DecimalArray, sizes: &mut [u32]) {
     add_size_const(sizes, encoded_size_for_fixed(width));
 }
 
+fn add_size_varbinview(
+    arr: &VarBinViewArray,
+    sizes: &mut [u32],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
+    let views = arr.views();
+    for (i, view) in views.iter().enumerate() {
+        let valid = mask.value(i);
+        if !valid {
+            sizes[i] += 1; // sentinel only
+        } else {
+            let len = view.len() as usize;
+            sizes[i] += encoded_size_for_varlen(len);
+        }
+    }
+    Ok(())
+}
+
 fn encode_null(
     arr: &NullArray,
     field: SortField,
@@ -369,6 +384,69 @@ fn encode_decimal_typed<T>(
     }
 }
 
+fn encode_varbinview(
+    arr: &VarBinViewArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+
+    arr.with_iterator(|iter| {
+        for (i, maybe) in iter.enumerate() {
+            let pos = (row_offsets[i] + col_offset[i]) as usize;
+            if !mask.value(i) {
+                out[pos] = null;
+                col_offset[i] += 1;
+                continue;
+            }
+            let bytes: &[u8] = maybe.unwrap_or(&[]);
+            out[pos] = non_null;
+            let written = encode_varlen_value(bytes, &mut out[pos + 1..], field.descending);
+            col_offset[i] += 1 + written;
+        }
+    });
+    Ok(())
+}
+
+/// Encode a variable-length byte slice into `out` in 32-byte blocks with
+/// continuation markers. Returns the number of bytes written.
+fn encode_varlen_value(bytes: &[u8], out: &mut [u8], descending: bool) -> u32 {
+    let xor = if descending { 0xFFu8 } else { 0x00 };
+    if bytes.is_empty() {
+        // Single zero terminator.
+        out[0] = xor;
+        return 1;
+    }
+    let mut written = 0usize;
+    let mut remaining = bytes;
+    while remaining.len() > VARLEN_BLOCK_SIZE {
+        // Full block, continuation marker 0xFF (then XORed if descending).
+        let block = &remaining[..VARLEN_BLOCK_SIZE];
+        for (i, &b) in block.iter().enumerate() {
+            out[written + i] = b ^ xor;
+        }
+        out[written + VARLEN_BLOCK_SIZE] = 0xFF ^ xor;
+        written += VARLEN_BLOCK_TOTAL;
+        remaining = &remaining[VARLEN_BLOCK_SIZE..];
+    }
+    // Final partial block: pad with zeros, last byte = remaining.len() (1..=32).
+    let n = remaining.len();
+    for (i, &b) in remaining.iter().enumerate() {
+        out[written + i] = b ^ xor;
+    }
+    for j in n..VARLEN_BLOCK_SIZE {
+        out[written + j] = xor;
+    }
+    out[written + VARLEN_BLOCK_SIZE] = (n as u8) ^ xor;
+    written += VARLEN_BLOCK_TOTAL;
+    written as u32
+}
+
 /// Internal trait for encoding a fixed-width native value into byte slots.
 ///
 /// Implementations must produce a sequence of `size_of::<Self>()` bytes that is
@@ -507,6 +585,27 @@ pub fn encode_scalar_primitive(
     Ok(())
 }
 
+/// Encode a single varlen value into a buffer.
+pub fn encode_scalar_varlen(value: Option<&[u8]>, field: SortField, out: &mut ByteBufferMut) {
+    match value {
+        None => out.push(field.null_sentinel()),
+        Some(bytes) => {
+            out.push(field.non_null_sentinel());
+            let needed = if bytes.is_empty() {
+                1
+            } else {
+                bytes.len().div_ceil(VARLEN_BLOCK_SIZE) * VARLEN_BLOCK_TOTAL
+            };
+            let start = out.len();
+            for _ in 0..needed {
+                out.push(0);
+            }
+            let written = encode_varlen_value(bytes, &mut out[start..], field.descending);
+            debug_assert_eq!(written as usize, needed);
+        }
+    }
+}
+
 /// Encode a single boolean value.
 pub fn encode_scalar_bool(value: Option<bool>, field: SortField, out: &mut ByteBufferMut) {
     match value {
@@ -546,6 +645,7 @@ pub fn encoded_size_for_scalar(
                 let vt = DecimalType::smallest_decimal_value_type(dt);
                 Ok(encoded_size_for_fixed(vt.byte_width() as u32))
             }
+            DType::Utf8(_) | DType::Binary(_) => Ok(1),
             _ => vortex_bail!(
                 "unsupported scalar dtype for row encoding: {}",
                 scalar.dtype()
@@ -564,6 +664,18 @@ pub fn encoded_size_for_scalar(
                     .unwrap_or(DecimalType::I128);
                 Ok(encoded_size_for_fixed(vt.byte_width() as u32))
             }
+            DType::Utf8(_) => {
+                let bs = scalar
+                    .as_utf8()
+                    .value()
+                    .map(|s| s.as_str().len())
+                    .unwrap_or(0);
+                Ok(encoded_size_for_varlen(bs))
+            }
+            DType::Binary(_) => {
+                let bs = scalar.as_binary().value().map(|b| b.len()).unwrap_or(0);
+                Ok(encoded_size_for_varlen(bs))
+            }
             _ => vortex_bail!(
                 "unsupported scalar dtype for row encoding: {}",
                 scalar.dtype()
@@ -601,6 +713,7 @@ pub fn encode_scalar(
                     out.push(0);
                 }
             }
+            DType::Utf8(_) | DType::Binary(_) => out.push(field.null_sentinel()),
             _ => vortex_bail!(
                 "unsupported scalar dtype for row encoding: {}",
                 scalar.dtype()
@@ -657,6 +770,16 @@ pub fn encode_scalar(
                     }
                 }
             }
+            DType::Utf8(_) => {
+                let v = scalar.as_utf8();
+                let bytes = v.value().map(|s| s.as_str().as_bytes()).unwrap_or(&[]);
+                encode_scalar_varlen(Some(bytes), field, &mut out);
+            }
+            DType::Binary(_) => {
+                let v = scalar.as_binary();
+                let bytes = v.value().map(|b| b.as_slice()).unwrap_or(&[]);
+                encode_scalar_varlen(Some(bytes), field, &mut out);
+            }
             _ => vortex_bail!(
                 "unsupported scalar dtype for row encoding: {}",
                 scalar.dtype()

From 570d358939f4d8e7919ea377773c0cfdd6637fe8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:08:07 +0000
Subject: [PATCH 05/10] vortex-row: codec for nested canonical types

Extend the codec to handle Struct, FixedSizeList, and Extension
canonical variants. Each nested row encodes as `outer_sentinel | child
bytes...`; for null rows the child bytes are zero-filled after the
recursive encoders run so two null rows compare equal regardless of
which non-null values would have been written by the children.

`row_width_for_dtype` recurses through Struct fields and FSL elements
to return `Fixed(w)` when every leaf is fixed; otherwise `Variable`.
Extension delegates to its storage dtype. List remains `Variable` and
ListView still bails (the row encoder's output is itself a ListView, so
nested ListView isn't a near-term use case). Variant and Union bail
explicitly.

Signed-off-by: Claude <noreply@anthropic.com>
---
 vortex-row/src/codec.rs | 227 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 216 insertions(+), 11 deletions(-)

diff --git a/vortex-row/src/codec.rs b/vortex-row/src/codec.rs
index 4f70d80e5ae..8468301e5b3 100644
--- a/vortex-row/src/codec.rs
+++ b/vortex-row/src/codec.rs
@@ -30,9 +30,15 @@ use vortex_array::ExecutionCtx;
 use vortex_array::accessor::ArrayAccessor;
 use vortex_array::arrays::BoolArray;
 use vortex_array::arrays::DecimalArray;
+use vortex_array::arrays::ExtensionArray;
+use vortex_array::arrays::FixedSizeListArray;
 use vortex_array::arrays::NullArray;
 use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::StructArray;
 use vortex_array::arrays::VarBinViewArray;
+use vortex_array::arrays::extension::ExtensionArrayExt;
+use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt;
+use vortex_array::arrays::struct_::StructArrayExt;
 use vortex_array::dtype::DType;
 use vortex_array::dtype::DecimalType;
 use vortex_array::dtype::NativePType;
@@ -112,9 +118,28 @@ pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
             )))
         }
         DType::Utf8(_) | DType::Binary(_) => Ok(RowWidth::Variable),
-        DType::Struct(..) | DType::FixedSizeList(..) | DType::List(..) | DType::Extension(..) => {
-            vortex_bail!("row encoding for {} is not yet supported", dtype)
+        DType::FixedSizeList(elem, n, _) => match row_width_for_dtype(elem)? {
+            // FSL is fixed iff its element type is fixed. Add a sentinel byte for the FSL
+            // itself, then `n` copies of the element width.
+            RowWidth::Fixed(w) => {
+                let body = w.saturating_mul(*n);
+                Ok(RowWidth::Fixed(body.saturating_add(1)))
+            }
+            RowWidth::Variable => Ok(RowWidth::Variable),
+        },
+        DType::Struct(fields, _) => {
+            // Struct is fixed iff all its fields are fixed; sum their widths plus a sentinel.
+            let mut total: u32 = 1; // outer sentinel
+            for field_dtype in fields.fields() {
+                match row_width_for_dtype(&field_dtype)? {
+                    RowWidth::Fixed(w) => total = total.saturating_add(w),
+                    RowWidth::Variable => return Ok(RowWidth::Variable),
+                }
+            }
+            Ok(RowWidth::Fixed(total))
         }
+        DType::List(..) => Ok(RowWidth::Variable),
+        DType::Extension(ext) => row_width_for_dtype(ext.storage_dtype()),
         DType::Variant(_) => {
             vortex_bail!("row encoding does not support Variant arrays (no well-defined ordering)")
         }
@@ -133,7 +158,7 @@ pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
 /// variants land in later commits.
 pub fn field_size(
     canonical: &Canonical,
-    _field: SortField,
+    field: SortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
@@ -143,10 +168,10 @@ pub fn field_size(
         Canonical::Primitive(arr) => add_size_primitive(arr, sizes),
         Canonical::Decimal(arr) => add_size_decimal(arr, sizes),
         Canonical::VarBinView(arr) => add_size_varbinview(arr, sizes, ctx)?,
-        Canonical::Struct(_)
-        | Canonical::FixedSizeList(_)
-        | Canonical::Extension(_)
-        | Canonical::List(_) => vortex_bail!(
+        Canonical::Struct(arr) => add_size_struct(arr, field, sizes, ctx)?,
+        Canonical::FixedSizeList(arr) => add_size_fsl(arr, field, sizes, ctx)?,
+        Canonical::Extension(arr) => add_size_extension(arr, field, sizes, ctx)?,
+        Canonical::List(_) => vortex_bail!(
             "row encoding does not yet support canonical type {:?}",
             canonical.dtype()
         ),
@@ -177,10 +202,10 @@ pub fn field_encode(
         Canonical::Primitive(arr) => encode_primitive(arr, field, offsets, cursors, out, ctx)?,
         Canonical::Decimal(arr) => encode_decimal(arr, field, offsets, cursors, out, ctx)?,
         Canonical::VarBinView(arr) => encode_varbinview(arr, field, offsets, cursors, out, ctx)?,
-        Canonical::Struct(_)
-        | Canonical::FixedSizeList(_)
-        | Canonical::Extension(_)
-        | Canonical::List(_) => vortex_bail!(
+        Canonical::Struct(arr) => encode_struct(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::FixedSizeList(arr) => encode_fsl(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::Extension(arr) => encode_extension(arr, field, offsets, cursors, out, ctx)?,
+        Canonical::List(_) => vortex_bail!(
             "row encoding does not yet support canonical type {:?}",
             canonical.dtype()
         ),
@@ -234,6 +259,60 @@ fn add_size_varbinview(
     Ok(())
 }
 
+fn add_size_struct(
+    arr: &StructArray,
+    field: SortField,
+    sizes: &mut [u32],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    // null sentinel: 1 byte per row.
+    for s in sizes.iter_mut() {
+        *s += 1;
+    }
+    // Each field adds its own per-row size.
+    for child in arr.iter_unmasked_fields() {
+        let canonical = child.clone().execute::<Canonical>(ctx)?;
+        field_size(&canonical, field, sizes, ctx)?;
+    }
+    Ok(())
+}
+
+fn add_size_fsl(
+    arr: &FixedSizeListArray,
+    field: SortField,
+    sizes: &mut [u32],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let n = arr.len();
+    debug_assert_eq!(n, sizes.len());
+    let list_size = arr.list_size() as usize;
+    let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
+    debug_assert_eq!(elements.len(), n * list_size);
+    // Sizing: 1 sentinel + sum of element sizes (`list_size` per row).
+    // We compute element-wise sizes into a contiguous scratch buffer then reduce by row.
+    let mut elem_sizes = vec![0u32; n * list_size];
+    field_size(&elements, field, &mut elem_sizes, ctx)?;
+    for i in 0..n {
+        let mut sum: u32 = 1; // sentinel
+        let base = i * list_size;
+        for j in 0..list_size {
+            sum = sum.saturating_add(elem_sizes[base + j]);
+        }
+        sizes[i] += sum;
+    }
+    Ok(())
+}
+
+fn add_size_extension(
+    arr: &ExtensionArray,
+    field: SortField,
+    sizes: &mut [u32],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let storage = arr.storage_array().clone().execute::<Canonical>(ctx)?;
+    field_size(&storage, field, sizes, ctx)
+}
+
 fn encode_null(
     arr: &NullArray,
     field: SortField,
@@ -413,6 +492,132 @@ fn encode_varbinview(
     Ok(())
 }
 
+fn encode_struct(
+    arr: &StructArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let n = arr.len();
+    let mask = arr.as_ref().validity()?.execute_mask(n, ctx)?;
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+
+    // First, write the sentinel for each row. We track the post-sentinel cursor offsets
+    // for the body in `body_cursors` (which start exactly at +1 of the input cursor).
+    // For null rows we additionally need to zero-fill the (uniform-width) field bytes,
+    // but because struct widths are variable in general, we record null indexes first
+    // and zero-fill after we know each row's contribution.
+    //
+    // To keep the implementation simple we:
+    //   1) advance the cursor past the sentinel,
+    //   2) recursively encode each field's bytes (the field encoders ignore nullness of
+    //      the struct, but use their own per-field nullness),
+    //   3) for null struct rows, overwrite the body bytes with zeros so the encoded form
+    //      depends only on the sentinel.
+    let body_start: Vec<u32> = (0..n).map(|i| col_offset[i] + 1).collect();
+    for i in 0..n {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        out[pos] = if mask.value(i) { non_null } else { null };
+        col_offset[i] += 1;
+    }
+
+    for child in arr.iter_unmasked_fields() {
+        let canonical = child.clone().execute::<Canonical>(ctx)?;
+        field_encode(&canonical, field, row_offsets, col_offset, out, ctx)?;
+    }
+
+    // Zero-fill body bytes of null rows (the field encoders may have written values).
+    for i in 0..n {
+        if !mask.value(i) {
+            let start = (row_offsets[i] + body_start[i]) as usize;
+            let end = (row_offsets[i] + col_offset[i]) as usize;
+            for b in &mut out[start..end] {
+                *b = 0;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn encode_fsl(
+    arr: &FixedSizeListArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let n = arr.len();
+    let list_size = arr.list_size() as usize;
+    let mask = arr.as_ref().validity()?.execute_mask(n, ctx)?;
+    let non_null = field.non_null_sentinel();
+    let null = field.null_sentinel();
+    let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
+    debug_assert_eq!(elements.len(), n * list_size);
+
+    // Write sentinels and remember body start for null zero-fill.
+    let body_start: Vec<u32> = (0..n).map(|i| col_offset[i] + 1).collect();
+    for i in 0..n {
+        let pos = (row_offsets[i] + col_offset[i]) as usize;
+        out[pos] = if mask.value(i) { non_null } else { null };
+        col_offset[i] += 1;
+    }
+
+    // Encode all `n * list_size` elements into the body. Build a fresh
+    // (offsets, cursors) pair where each element gets one slot. Then sum bytes back
+    // into the parent col_offset.
+    let mut elem_sizes = vec![0u32; n * list_size];
+    field_size(&elements, field, &mut elem_sizes, ctx)?;
+    // Element offsets are sequential starting at each parent's current cursor position.
+    let mut elem_offsets = vec![0u32; n * list_size];
+    for i in 0..n {
+        let mut acc = row_offsets[i] + col_offset[i];
+        for j in 0..list_size {
+            elem_offsets[i * list_size + j] = acc;
+            acc = acc.saturating_add(elem_sizes[i * list_size + j]);
+        }
+    }
+    let mut elem_cursors = vec![0u32; n * list_size];
+    field_encode(&elements, field, &elem_offsets, &mut elem_cursors, out, ctx)?;
+    // Advance the parent cursors by the total per-row element bytes.
+    for i in 0..n {
+        let mut sum: u32 = 0;
+        for j in 0..list_size {
+            sum = sum.saturating_add(elem_sizes[i * list_size + j]);
+        }
+        col_offset[i] = col_offset[i].saturating_add(sum);
+    }
+
+    // Zero-fill null bodies.
+    for i in 0..n {
+        if !mask.value(i) {
+            let start = (row_offsets[i] + body_start[i]) as usize;
+            let end = (row_offsets[i] + col_offset[i]) as usize;
+            for b in &mut out[start..end] {
+                *b = 0;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn encode_extension(
+    arr: &ExtensionArray,
+    field: SortField,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let storage = arr.storage_array().clone().execute::<Canonical>(ctx)?;
+    field_encode(&storage, field, row_offsets, col_offset, out, ctx)
+}
+
 /// Encode a variable-length byte slice into `out` in 32-byte blocks with
 /// continuation markers. Returns the number of bytes written.
 fn encode_varlen_value(bytes: &[u8], out: &mut [u8], descending: bool) -> u32 {

From 5374f3b65c42c2c7bb7a646e61a956d12f1aef0c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:09:31 +0000
Subject: [PATCH 06/10] vortex-row: compute_sizes helper and RowSize ScalarFn

Add the size-pass machinery used by both RowSize and the upcoming
RowEncode pipeline. `compute_sizes` walks the N input columns once,
classifying each via `row_width_for_dtype` and accumulating
fixed-width-prefix sums in `fixed_per_row` while pushing per-row sums
of variable-length columns into a lazily allocated `var_lengths` vec.

The classification result (`ColKind` + `SizePassResult`) is private to
the crate; RowEncode consumes it in a later commit to choose between
the arithmetic and cursor encode paths.

`RowSize` returns a `Struct { fixed: U32, var: U32 }` so callers can
read the per-row width without realizing the constant `fixed` slot as
a per-row buffer (it's a `ConstantArray`); the `var` slot is a
`ConstantArray(0)` when no varlen column is present.

`dispatch_size` is the fallback-only path for PR 1 (canonicalize, then
codec::field_size). The `RowSizeKernel` trait exists but is unused; per-
encoding fast paths and the inventory registry arrive in PR 3.

`initialize()` does NOT register RowSize yet - that lands once
RowEncode is in place, so the session-registered pair appears together.

Signed-off-by: Claude <noreply@anthropic.com>
---
 vortex-row/public-api.lock |  76 ++++++++++
 vortex-row/src/lib.rs      |   3 +
 vortex-row/src/size.rs     | 288 +++++++++++++++++++++++++++++++++++++
 3 files changed, 367 insertions(+)
 create mode 100644 vortex-row/src/size.rs

diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index 1afc1f05442..85985bf7521 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -182,6 +182,46 @@ impl core::marker::StructuralPartialEq for vortex_row::options::SortField
 
 pub const vortex_row::options::FIELDS_INLINE: usize
 
+pub mod vortex_row::size
+
+pub struct vortex_row::size::RowSize
+
+impl core::clone::Clone for vortex_row::size::RowSize
+
+pub fn vortex_row::size::RowSize::clone(&self) -> vortex_row::size::RowSize
+
+impl core::fmt::Debug for vortex_row::size::RowSize
+
+pub fn vortex_row::size::RowSize::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::size::RowSize
+
+pub type vortex_row::size::RowSize::Options = vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::size::RowSize::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+
+pub fn vortex_row::size::RowSize::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+
+pub fn vortex_row::size::RowSize::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+
+pub fn vortex_row::size::RowSize::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::size::RowSize::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+
+pub fn vortex_row::size::RowSize::is_fallible(&self, &Self::Options) -> bool
+
+pub fn vortex_row::size::RowSize::is_null_sensitive(&self, &Self::Options) -> bool
+
+pub fn vortex_row::size::RowSize::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+
+pub fn vortex_row::size::RowSize::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+
+pub trait vortex_row::size::RowSizeKernel: vortex_array::array::vtable::VTable
+
+pub fn vortex_row::size::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+
+pub fn vortex_row::size::dispatch_size(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
+
 pub struct vortex_row::RowEncodeOptions
 
 pub vortex_row::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
@@ -214,6 +254,38 @@ pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&sel
 
 impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions
 
+pub struct vortex_row::RowSize
+
+impl core::clone::Clone for vortex_row::size::RowSize
+
+pub fn vortex_row::size::RowSize::clone(&self) -> vortex_row::size::RowSize
+
+impl core::fmt::Debug for vortex_row::size::RowSize
+
+pub fn vortex_row::size::RowSize::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::size::RowSize
+
+pub type vortex_row::size::RowSize::Options = vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::size::RowSize::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+
+pub fn vortex_row::size::RowSize::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+
+pub fn vortex_row::size::RowSize::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+
+pub fn vortex_row::size::RowSize::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::size::RowSize::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+
+pub fn vortex_row::size::RowSize::is_fallible(&self, &Self::Options) -> bool
+
+pub fn vortex_row::size::RowSize::is_null_sensitive(&self, &Self::Options) -> bool
+
+pub fn vortex_row::size::RowSize::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+
+pub fn vortex_row::size::RowSize::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+
 pub struct vortex_row::SortField
 
 pub vortex_row::SortField::descending: bool
@@ -258,4 +330,8 @@ impl core::marker::Copy for vortex_row::options::SortField
 
 impl core::marker::StructuralPartialEq for vortex_row::options::SortField
 
+pub trait vortex_row::RowSizeKernel: vortex_array::array::vtable::VTable
+
+pub fn vortex_row::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+
 pub fn vortex_row::initialize(&vortex_session::VortexSession)
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index bdac4c8f48e..6f1d8fbeab3 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -8,9 +8,12 @@
 
 pub mod codec;
 pub mod options;
+pub mod size;
 
 pub use options::RowEncodeOptions;
 pub use options::SortField;
+pub use size::RowSize;
+pub use size::RowSizeKernel;
 use vortex_session::VortexSession;
 
 /// Register the row-encoding scalar functions on the given session.
diff --git a/vortex-row/src/size.rs b/vortex-row/src/size.rs
new file mode 100644
index 00000000000..fbde52e1863
--- /dev/null
+++ b/vortex-row/src/size.rs
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! `RowSize` variadic scalar function: aggregate per-row byte sizes for N input columns.
+
+use std::sync::Arc;
+
+use vortex_array::ArrayRef;
+use vortex_array::ArrayView;
+use vortex_array::Canonical;
+use vortex_array::ExecutionCtx;
+use vortex_array::IntoArray;
+use vortex_array::VTable;
+use vortex_array::arrays::ConstantArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::StructArray;
+use vortex_array::dtype::DType;
+use vortex_array::dtype::FieldName;
+use vortex_array::dtype::FieldNames;
+use vortex_array::dtype::Nullability;
+use vortex_array::dtype::PType;
+use vortex_array::dtype::StructFields;
+use vortex_array::scalar::Scalar;
+use vortex_array::scalar_fn::Arity;
+use vortex_array::scalar_fn::ChildName;
+use vortex_array::scalar_fn::ExecutionArgs;
+use vortex_array::scalar_fn::ScalarFnId;
+use vortex_array::scalar_fn::ScalarFnVTable;
+use vortex_array::validity::Validity;
+use vortex_buffer::Buffer;
+use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
+use vortex_error::vortex_bail;
+use vortex_session::VortexSession;
+
+use crate::codec;
+use crate::codec::RowWidth;
+use crate::options::RowEncodeOptions;
+use crate::options::SortField;
+use crate::options::deserialize_row_encode_options;
+use crate::options::serialize_row_encode_options;
+
+/// Classification of a single input column for the size pass.
+///
+/// Tracks each column's within-row byte offset (the constant prefix from all preceding
+/// fixed-width columns) and, for fixed columns, whether any variable-length column has
+/// appeared yet — the encode pass uses this to choose between the arithmetic-write fast
+/// path (no varlen before this column, so the within-row position is constant) and the
+/// cursor-write path.
+#[derive(Clone, Copy, Debug)]
+#[allow(
+    dead_code,
+    reason = "fields read by the RowEncode pipeline in a later commit"
+)]
+pub(crate) enum ColKind {
+    /// Column has fixed width `width`. `prefix` is the within-row byte offset of this
+    /// column's first byte. If `before_varlen` is true, no variable-length column precedes
+    /// this one, so the within-row offset is constant for every row.
+    Fixed {
+        width: u32,
+        prefix: u32,
+        before_varlen: bool,
+    },
+    /// Column has variable per-row width. `fixed_prefix` is the sum of widths of all
+    /// preceding fixed columns; the varlen contribution from earlier varlen columns is
+    /// added per row.
+    Variable { fixed_prefix: u32 },
+}
+
+/// Result of the size pass: enough information for both [`RowSize::execute`] and the
+/// downstream [`RowEncode`](super::encode::RowEncode) pipeline.
+pub(crate) struct SizePassResult {
+    pub fixed_per_row: u32,
+    pub var_lengths: Option<Vec<u32>>,
+    pub col_kinds: Vec<ColKind>,
+    pub first_varlen_idx: Option<usize>,
+    pub columns: Vec<ArrayRef>,
+}
+
+/// Walk N input columns once, classifying each as fixed-width or variable-length and
+/// accumulating per-row size contributions.
+///
+/// Fixed-width columns contribute a single scalar increment to `fixed_per_row`; they do
+/// not touch `var_lengths`. Variable-length columns add per-row contributions into the
+/// lazily-allocated `var_lengths` vec via [`dispatch_size`].
+///
+/// This is shared by [`RowSize::execute`] (which wraps the result into a
+/// `Struct { fixed, var }`) and the [`RowEncode`](super::encode::RowEncode) pipeline
+/// (which uses the full result, including `col_kinds`, to drive the encode pass).
+pub(crate) fn compute_sizes(
+    options: &RowEncodeOptions,
+    args: &dyn ExecutionArgs,
+    ctx: &mut ExecutionCtx,
+    op_name: &'static str,
+) -> VortexResult<SizePassResult> {
+    let n_inputs = args.num_inputs();
+    if n_inputs == 0 {
+        vortex_bail!("{} requires at least one input column", op_name);
+    }
+    if options.fields.len() != n_inputs {
+        vortex_bail!(
+            "{} options.fields.len()={} does not match num_inputs={}",
+            op_name,
+            options.fields.len(),
+            n_inputs
+        );
+    }
+    let nrows = args.row_count();
+
+    let mut columns: Vec<ArrayRef> = Vec::with_capacity(n_inputs);
+    let mut col_kinds: Vec<ColKind> = Vec::with_capacity(n_inputs);
+    let mut fixed_per_row: u32 = 0;
+    let mut var_lengths: Option<Vec<u32>> = None;
+    let mut first_varlen_idx: Option<usize> = None;
+    let mut running_fixed_prefix: u32 = 0;
+
+    for i in 0..n_inputs {
+        let col = args.get(i)?;
+        if col.len() != nrows {
+            vortex_bail!(
+                "{}: column {} has length {} but expected {}",
+                op_name,
+                i,
+                col.len(),
+                nrows
+            );
+        }
+        match codec::row_width_for_dtype(col.dtype())? {
+            RowWidth::Fixed(w) => {
+                col_kinds.push(ColKind::Fixed {
+                    width: w,
+                    prefix: running_fixed_prefix,
+                    before_varlen: first_varlen_idx.is_none(),
+                });
+                fixed_per_row = fixed_per_row
+                    .checked_add(w)
+                    .vortex_expect("row width overflow");
+                running_fixed_prefix = running_fixed_prefix
+                    .checked_add(w)
+                    .vortex_expect("row width overflow");
+            }
+            RowWidth::Variable => {
+                if first_varlen_idx.is_none() {
+                    first_varlen_idx = Some(i);
+                }
+                let v = var_lengths.get_or_insert_with(|| vec![0u32; nrows]);
+                dispatch_size(&col, options.fields[i], v, ctx)?;
+                col_kinds.push(ColKind::Variable {
+                    fixed_prefix: running_fixed_prefix,
+                });
+            }
+        }
+        columns.push(col);
+    }
+
+    Ok(SizePassResult {
+        fixed_per_row,
+        var_lengths,
+        col_kinds,
+        first_varlen_idx,
+        columns,
+    })
+}
+
+/// Variadic scalar function that, given N input columns and per-column [`SortField`]s,
+/// returns a `Struct { fixed: U32, var: U32 }` array of per-row byte sizes for the
+/// row-oriented encoding produced by [`RowEncode`](super::encode::RowEncode).
+///
+/// The `fixed` field is always a [`ConstantArray`] holding the sum of the per-column
+/// constant widths of fixed-width inputs (sentinel + value bytes). The `var` field is a
+/// `ConstantArray(0)` when there are no variable-length input columns, and a
+/// [`PrimitiveArray<u32>`] of per-row varlen-byte sums otherwise.
+///
+/// The total per-row byte size is `fixed + var`.
+#[derive(Clone, Debug)]
+pub struct RowSize;
+
+/// Returns the [`FieldNames`] used by the [`RowSize`] output struct.
+pub(crate) fn row_size_field_names() -> FieldNames {
+    FieldNames::from([FieldName::from("fixed"), FieldName::from("var")])
+}
+
+/// Returns the output [`DType`] of [`RowSize`].
+pub(crate) fn row_size_struct_dtype() -> DType {
+    DType::Struct(
+        StructFields::new(
+            row_size_field_names(),
+            vec![
+                DType::Primitive(PType::U32, Nullability::NonNullable),
+                DType::Primitive(PType::U32, Nullability::NonNullable),
+            ],
+        ),
+        Nullability::NonNullable,
+    )
+}
+
+impl ScalarFnVTable for RowSize {
+    type Options = RowEncodeOptions;
+
+    fn id(&self) -> ScalarFnId {
+        ScalarFnId::from("vortex.row_size")
+    }
+
+    fn serialize(&self, options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
+        Ok(Some(serialize_row_encode_options(options)))
+    }
+
+    fn deserialize(
+        &self,
+        metadata: &[u8],
+        _session: &VortexSession,
+    ) -> VortexResult<Self::Options> {
+        deserialize_row_encode_options(metadata)
+    }
+
+    fn arity(&self, _options: &Self::Options) -> Arity {
+        Arity::Variadic { min: 1, max: None }
+    }
+
+    fn child_name(&self, _options: &Self::Options, child_idx: usize) -> ChildName {
+        ChildName::from(Arc::from(format!("col_{}", child_idx)))
+    }
+
+    fn return_dtype(&self, _options: &Self::Options, _args: &[DType]) -> VortexResult<DType> {
+        Ok(row_size_struct_dtype())
+    }
+
+    fn execute(
+        &self,
+        options: &Self::Options,
+        args: &dyn ExecutionArgs,
+        ctx: &mut ExecutionCtx,
+    ) -> VortexResult<ArrayRef> {
+        let nrows = args.row_count();
+        let result = compute_sizes(options, args, ctx, "RowSize")?;
+        let fixed_array =
+            ConstantArray::new(Scalar::from(result.fixed_per_row), nrows).into_array();
+        let var_array = match result.var_lengths {
+            Some(v) => PrimitiveArray::new(Buffer::<u32>::copy_from(&v), Validity::NonNullable)
+                .into_array(),
+            None => ConstantArray::new(Scalar::from(0u32), nrows).into_array(),
+        };
+        Ok(StructArray::try_new(
+            row_size_field_names(),
+            vec![fixed_array, var_array],
+            nrows,
+            Validity::NonNullable,
+        )?
+        .into_array())
+    }
+
+    fn is_null_sensitive(&self, _options: &Self::Options) -> bool {
+        true
+    }
+
+    fn is_fallible(&self, _options: &Self::Options) -> bool {
+        false
+    }
+}
+
+/// Dispatch a single column's per-row size contribution.
+///
+/// For PR 1 this is just the canonicalize-then-`codec::field_size` fallback path. In-crate
+/// fast paths for `Constant`/`Dict`/`Patched` and the inventory-based registry for
+/// downstream encodings are added in PR 3.
+pub fn dispatch_size(
+    col: &ArrayRef,
+    field: SortField,
+    sizes: &mut [u32],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let canonical = col.clone().execute::<Canonical>(ctx)?;
+    codec::field_size(&canonical, field, sizes, ctx)
+}
+
+/// Mutate-buffer kernel: add this column's per-row byte contribution into the shared
+/// `sizes` slice. Return `Ok(None)` to decline and fall back to the canonical path.
+///
+/// Trait is defined now; per-encoding impls and dispatch wiring land in PR 3.
+pub trait RowSizeKernel: VTable {
+    /// Add this column's per-row byte contribution into `sizes`.
+    fn row_size_contribution(
+        column: ArrayView<'_, Self>,
+        field: SortField,
+        sizes: &mut [u32],
+        ctx: &mut ExecutionCtx,
+    ) -> VortexResult<Option<()>>;
+}

From 40783a62bcfdc5b5d92782144c101cd577801d11 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:11:06 +0000
Subject: [PATCH 07/10] vortex-row: RowEncode ScalarFn

Add the RowEncode variadic scalar function: encode N input columns into
a single ListView<u8> in a five-phase pipeline.

  Phase 1: size pass via `compute_sizes`.
  Phase 2: allocate a zero-initialized output buffer sized to fit every
           row's encoded bytes; bail if the total exceeds u32::MAX.
  Phase 3: build per-row `listview_offsets`: i * fixed_per_row for the
           pure-fixed case, or i * fixed_per_row + exclusive cumsum of
           varlen lengths otherwise. Uses the simple `Vec::push` +
           `checked_add` loop.
  Phase 4: walk columns left-to-right and call `dispatch_encode` for
           every column (cursor path for all). Each call writes its
           per-row bytes at `offsets[i] + cursors[i]` and advances the
           cursor.
  Phase 5: build the ListView<u8> via the validating `try_new`
           constructor.

`dispatch_encode` is the canonicalize-then-`codec::field_encode`
fallback; in-crate kernel arms and the inventory registry land in PR 3.
The `RowEncodeKernel` trait is defined but unused. PR 2 will iterate
on this pipeline (skip zero-init, skip ListView validation, auto-
vectorize the offsets loop, etc.).

Signed-off-by: Claude <noreply@anthropic.com>
---
 vortex-row/public-api.lock |  76 ++++++++++++
 vortex-row/src/encode.rs   | 238 +++++++++++++++++++++++++++++++++++++
 vortex-row/src/lib.rs      |   3 +
 vortex-row/src/size.rs     |   8 ++
 4 files changed, 325 insertions(+)
 create mode 100644 vortex-row/src/encode.rs

diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index 85985bf7521..f999303948d 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -102,6 +102,46 @@ pub fn vortex_row::codec::field_size(&vortex_array::canonical::Canonical, vortex
 
 pub fn vortex_row::codec::row_width_for_dtype(&vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_row::codec::RowWidth>
 
+pub mod vortex_row::encode
+
+pub struct vortex_row::encode::RowEncode
+
+impl core::clone::Clone for vortex_row::encode::RowEncode
+
+pub fn vortex_row::encode::RowEncode::clone(&self) -> vortex_row::encode::RowEncode
+
+impl core::fmt::Debug for vortex_row::encode::RowEncode
+
+pub fn vortex_row::encode::RowEncode::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::encode::RowEncode
+
+pub type vortex_row::encode::RowEncode::Options = vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::encode::RowEncode::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+
+pub fn vortex_row::encode::RowEncode::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+
+pub fn vortex_row::encode::RowEncode::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+
+pub fn vortex_row::encode::RowEncode::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::encode::RowEncode::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+
+pub fn vortex_row::encode::RowEncode::is_fallible(&self, &Self::Options) -> bool
+
+pub fn vortex_row::encode::RowEncode::is_null_sensitive(&self, &Self::Options) -> bool
+
+pub fn vortex_row::encode::RowEncode::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+
+pub fn vortex_row::encode::RowEncode::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+
+pub trait vortex_row::encode::RowEncodeKernel: vortex_array::array::vtable::VTable
+
+pub fn vortex_row::encode::RowEncodeKernel::row_encode_into(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+
+pub fn vortex_row::encode::dispatch_encode(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
+
 pub mod vortex_row::options
 
 pub struct vortex_row::options::RowEncodeOptions
@@ -222,6 +262,38 @@ pub fn vortex_row::size::RowSizeKernel::row_size_contribution(vortex_array::arra
 
 pub fn vortex_row::size::dispatch_size(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
 
+pub struct vortex_row::RowEncode
+
+impl core::clone::Clone for vortex_row::encode::RowEncode
+
+pub fn vortex_row::encode::RowEncode::clone(&self) -> vortex_row::encode::RowEncode
+
+impl core::fmt::Debug for vortex_row::encode::RowEncode
+
+pub fn vortex_row::encode::RowEncode::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::encode::RowEncode
+
+pub type vortex_row::encode::RowEncode::Options = vortex_row::options::RowEncodeOptions
+
+pub fn vortex_row::encode::RowEncode::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+
+pub fn vortex_row::encode::RowEncode::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+
+pub fn vortex_row::encode::RowEncode::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+
+pub fn vortex_row::encode::RowEncode::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::encode::RowEncode::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+
+pub fn vortex_row::encode::RowEncode::is_fallible(&self, &Self::Options) -> bool
+
+pub fn vortex_row::encode::RowEncode::is_null_sensitive(&self, &Self::Options) -> bool
+
+pub fn vortex_row::encode::RowEncode::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+
+pub fn vortex_row::encode::RowEncode::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+
 pub struct vortex_row::RowEncodeOptions
 
 pub vortex_row::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
@@ -330,6 +402,10 @@ impl core::marker::Copy for vortex_row::options::SortField
 
 impl core::marker::StructuralPartialEq for vortex_row::options::SortField
 
+pub trait vortex_row::RowEncodeKernel: vortex_array::array::vtable::VTable
+
+pub fn vortex_row::RowEncodeKernel::row_encode_into(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+
 pub trait vortex_row::RowSizeKernel: vortex_array::array::vtable::VTable
 
 pub fn vortex_row::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
diff --git a/vortex-row/src/encode.rs b/vortex-row/src/encode.rs
new file mode 100644
index 00000000000..1b77d955964
--- /dev/null
+++ b/vortex-row/src/encode.rs
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+#![allow(
+    clippy::cast_possible_truncation,
+    reason = "row encoding indexes into u32-sized buffers; lengths are validated to fit in u32"
+)]
+
+//! `RowEncode` variadic scalar function: encode N input columns into a single `ListView<u8>`.
+//!
+//! The output's `(elements, offsets, sizes)` triple is built up in a single left-to-right
+//! pass over the input columns. The `sizes` array doubles as the per-row write cursor, so
+//! when the last column finishes encoding, the accumulator is the final array - no separate
+//! conversion step is needed.
+
+use std::sync::Arc;
+
+use vortex_array::ArrayRef;
+use vortex_array::ArrayView;
+use vortex_array::Canonical;
+use vortex_array::ExecutionCtx;
+use vortex_array::IntoArray;
+use vortex_array::VTable;
+use vortex_array::arrays::ListViewArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::dtype::DType;
+use vortex_array::dtype::Nullability;
+use vortex_array::dtype::PType;
+use vortex_array::scalar_fn::Arity;
+use vortex_array::scalar_fn::ChildName;
+use vortex_array::scalar_fn::ExecutionArgs;
+use vortex_array::scalar_fn::ScalarFnId;
+use vortex_array::scalar_fn::ScalarFnVTable;
+use vortex_array::validity::Validity;
+use vortex_buffer::Buffer;
+use vortex_buffer::BufferMut;
+use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
+use vortex_error::vortex_bail;
+use vortex_session::VortexSession;
+
+use crate::codec;
+use crate::options::RowEncodeOptions;
+use crate::options::SortField;
+use crate::options::deserialize_row_encode_options;
+use crate::options::serialize_row_encode_options;
+use crate::size::compute_sizes;
+
+/// Variadic scalar function that encodes N input columns into a single `List<u8>`
+/// [`ListViewArray`] where row `i` contains the row-encoded bytes for column values
+/// `cols[0][i], cols[1][i], ...` concatenated left-to-right.
+#[derive(Clone, Debug)]
+pub struct RowEncode;
+
+impl ScalarFnVTable for RowEncode {
+    type Options = RowEncodeOptions;
+
+    fn id(&self) -> ScalarFnId {
+        ScalarFnId::from("vortex.row_encode")
+    }
+
+    fn serialize(&self, options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
+        Ok(Some(serialize_row_encode_options(options)))
+    }
+
+    fn deserialize(
+        &self,
+        metadata: &[u8],
+        _session: &VortexSession,
+    ) -> VortexResult<Self::Options> {
+        deserialize_row_encode_options(metadata)
+    }
+
+    fn arity(&self, _options: &Self::Options) -> Arity {
+        Arity::Variadic { min: 1, max: None }
+    }
+
+    fn child_name(&self, _options: &Self::Options, child_idx: usize) -> ChildName {
+        ChildName::from(Arc::from(format!("col_{}", child_idx)))
+    }
+
+    fn return_dtype(&self, _options: &Self::Options, _args: &[DType]) -> VortexResult<DType> {
+        Ok(DType::List(
+            Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
+            Nullability::NonNullable,
+        ))
+    }
+
+    fn execute(
+        &self,
+        options: &Self::Options,
+        args: &dyn ExecutionArgs,
+        ctx: &mut ExecutionCtx,
+    ) -> VortexResult<ArrayRef> {
+        execute_row_encode(options, args, ctx)
+    }
+
+    fn is_null_sensitive(&self, _options: &Self::Options) -> bool {
+        true
+    }
+
+    fn is_fallible(&self, _options: &Self::Options) -> bool {
+        false
+    }
+}
+
+fn execute_row_encode(
+    options: &RowEncodeOptions,
+    args: &dyn ExecutionArgs,
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<ArrayRef> {
+    let nrows = args.row_count();
+
+    // ===== Phase 1: classify + size pass =====
+    let crate::size::SizePassResult {
+        fixed_per_row,
+        var_lengths,
+        col_kinds: _,
+        first_varlen_idx: _,
+        columns,
+    } = compute_sizes(options, args, ctx, "RowEncode")?;
+
+    // ===== Phase 2: totals + buffer =====
+    let var_total: u64 = var_lengths
+        .as_ref()
+        .map_or(0, |v| v.iter().map(|&x| u64::from(x)).sum());
+    let total: u64 = (nrows as u64)
+        .checked_mul(u64::from(fixed_per_row))
+        .and_then(|t| t.checked_add(var_total))
+        .vortex_expect("row-encoded total bytes overflow");
+    if total > u32::MAX as u64 {
+        vortex_bail!("row-encoded output size {} bytes exceeds u32::MAX", total);
+    }
+    let total_len = total as usize;
+
+    // Allocate the elements buffer (zero-initialized). The zero-init lets every encoder
+    // assume previously-untouched bytes are zero, simplifying the null-row fill paths.
+    // PR 2 skips this memset because every byte in the output range is written by some
+    // encoder.
+    let mut out_buf: BufferMut<u8> = BufferMut::with_capacity(total_len);
+    out_buf.push_n(0u8, total_len);
+
+    // ===== Phase 3: per-row offsets =====
+    // listview_offsets[i] is the absolute byte offset where row `i` begins.
+    // For pure-fixed: i * fixed_per_row.
+    // For mixed: i * fixed_per_row + exclusive prefix sum of var_lengths.
+    let mut listview_offsets: Vec<u32> = Vec::with_capacity(nrows);
+    match var_lengths.as_ref() {
+        None => {
+            for i in 0..nrows {
+                listview_offsets.push(
+                    (i as u32)
+                        .checked_mul(fixed_per_row)
+                        .vortex_expect("row offset overflow (already validated total fits in u32)"),
+                );
+            }
+        }
+        Some(v) => {
+            let mut acc: u32 = 0;
+            for (i, &l) in v.iter().enumerate() {
+                let off = (i as u32)
+                    .checked_mul(fixed_per_row)
+                    .and_then(|t| t.checked_add(acc))
+                    .vortex_expect("row offset overflow");
+                listview_offsets.push(off);
+                acc = acc.checked_add(l).vortex_expect("varlen prefix overflow");
+            }
+        }
+    }
+
+    // Per-row write cursor (also doubles as the ListView `sizes` slot when done).
+    let mut row_cursors = vec![0u32; nrows];
+
+    // ===== Phase 4: encode columns via the cursor path =====
+    for (i, col) in columns.iter().enumerate() {
+        dispatch_encode(
+            col,
+            options.fields[i],
+            &listview_offsets,
+            &mut row_cursors,
+            &mut out_buf,
+            ctx,
+        )?;
+    }
+
+    // ===== Phase 5: build ListView output =====
+    let elements = PrimitiveArray::new(out_buf.freeze(), Validity::NonNullable).into_array();
+    let offsets_arr = PrimitiveArray::new(
+        Buffer::<u32>::copy_from(&listview_offsets),
+        Validity::NonNullable,
+    )
+    .into_array();
+    let sizes_arr = PrimitiveArray::new(
+        Buffer::<u32>::copy_from(&row_cursors),
+        Validity::NonNullable,
+    )
+    .into_array();
+    Ok(
+        ListViewArray::try_new(elements, offsets_arr, sizes_arr, Validity::NonNullable)?
+            .into_array(),
+    )
+}
+
+/// Dispatch a single column's encoding into the shared `out` buffer.
+///
+/// For PR 1 this is just the canonicalize-then-`codec::field_encode` fallback path.
+/// In-crate fast paths for `Constant`/`Dict`/`Patched` and the inventory-based registry
+/// for downstream encodings are added in PR 3.
+pub fn dispatch_encode(
+    col: &ArrayRef,
+    field: SortField,
+    offsets: &[u32],
+    cursors: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let canonical = col.clone().execute::<Canonical>(ctx)?;
+    codec::field_encode(&canonical, field, offsets, cursors, out, ctx)
+}
+
+/// Mutate-buffer kernel: write this column's per-row bytes into `out` at
+/// `offsets[i] + cursors[i]`, advancing `cursors[i]` by the bytes written.
+///
+/// Return `Ok(None)` to decline and fall back to the canonical path.
+///
+/// Trait is defined now; per-encoding impls and dispatch wiring land in PR 3.
+pub trait RowEncodeKernel: VTable {
+    /// Write this column's per-row bytes into `out` at `offsets[i] + cursors[i]`, advancing
+    /// `cursors[i]` by the bytes written.
+    fn row_encode_into(
+        column: ArrayView<'_, Self>,
+        field: SortField,
+        offsets: &[u32],
+        cursors: &mut [u32],
+        out: &mut [u8],
+        ctx: &mut ExecutionCtx,
+    ) -> VortexResult<Option<()>>;
+}
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index 6f1d8fbeab3..ef0209f3d9c 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -7,9 +7,12 @@
 //! This commit only establishes the crate skeleton and an `initialize` stub.
 
 pub mod codec;
+pub mod encode;
 pub mod options;
 pub mod size;
 
+pub use encode::RowEncode;
+pub use encode::RowEncodeKernel;
 pub use options::RowEncodeOptions;
 pub use options::SortField;
 pub use size::RowSize;
diff --git a/vortex-row/src/size.rs b/vortex-row/src/size.rs
index fbde52e1863..7148a2a21d8 100644
--- a/vortex-row/src/size.rs
+++ b/vortex-row/src/size.rs
@@ -72,7 +72,15 @@ pub(crate) enum ColKind {
 pub(crate) struct SizePassResult {
     pub fixed_per_row: u32,
     pub var_lengths: Option<Vec<u32>>,
+    #[allow(
+        dead_code,
+        reason = "consumed by the arithmetic-write fast path added in PR 2"
+    )]
     pub col_kinds: Vec<ColKind>,
+    #[allow(
+        dead_code,
+        reason = "consumed by the arithmetic-write fast path added in PR 2"
+    )]
     pub first_varlen_idx: Option<usize>,
     pub columns: Vec<ArrayRef>,
 }

From 87febfe4a71de78b52c0dd15917ca78390524889 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 17 May 2026 22:16:19 +0000
Subject: [PATCH 08/10] vortex-row: convert_columns + tests + bench scaffolding

Wire the RowSize/RowEncode scalar functions to the user-facing API:

- `convert_columns` accepts a slice of input arrays and per-column
  SortFields, constructs `RowEncodeOptions` + `VecExecutionArgs`, and
  returns the encoded `ListViewArray<u8>`.
- `compute_row_sizes` returns just the per-row sizes (the `Struct
  { fixed: u32, var: u32 }` output of `RowSize`).
- `initialize()` now registers `RowSize` and `RowEncode` on the given
  session so they are reachable via the expression layer.

Tests cover sort-order round-trips for bool, primitive (i64 asc/desc,
u32, f64), utf8, multi-column, nulls_first/last, struct sort-order, the
single-buffer invariant of the ListView output, and the structural
shape of `RowSize`. Tests that exercise per-encoding fast paths
(`constant_path_matches_canonical`, `dict_path_matches_canonical`) land
together with their respective kernels in PR 3.

The bench file uses divan + mimalloc and reports throughput in GB/s of
encoded output bytes for primitive_i64, utf8, and struct_mixed. Each
has an `arrow_row` baseline and a `vortex` measurement. Per-encoding
fast-path scenarios (constant/dict/patched/bitpacked/for/delta) gain
their triplets in PR 3.

Baseline measurements at this commit (sample-count=10):
  primitive_i64_vortex  ~1.97 GB/s  (vs arrow-row 4.12 GB/s)
  utf8_vortex           ~0.87 GB/s  (vs arrow-row 1.56 GB/s)
  struct_mixed_vortex   ~0.95 GB/s  (vs arrow-row 1.19 GB/s)

PR 2 closes most of the gap by replacing the validating
`ListViewArray::try_new` with `new_unchecked`, skipping the buffer
zero-init, auto-vectorizing the offsets and varlen-block paths, etc.

Signed-off-by: Claude <noreply@anthropic.com>
---
 Cargo.lock                       |   7 +
 Cargo.toml                       |   3 +-
 vortex-row/Cargo.toml            |  14 ++
 vortex-row/benches/row_encode.rs | 177 +++++++++++++++++
 vortex-row/public-api.lock       |  10 +
 vortex-row/src/convert.rs        |  75 +++++++
 vortex-row/src/lib.rs            |  41 +++-
 vortex-row/src/tests.rs          | 324 +++++++++++++++++++++++++++++++
 8 files changed, 645 insertions(+), 6 deletions(-)
 create mode 100644 vortex-row/benches/row_encode.rs
 create mode 100644 vortex-row/src/convert.rs
 create mode 100644 vortex-row/src/tests.rs

diff --git a/Cargo.lock b/Cargo.lock
index 0b84f6dd260..23ca027d592 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11038,7 +11038,14 @@ dependencies = [
 name = "vortex-row"
 version = "0.1.0"
 dependencies = [
+ "arrow-array 58.2.0",
+ "arrow-row 58.2.0",
+ "arrow-schema 58.2.0",
  "bytes",
+ "codspeed-divan-compat",
+ "mimalloc",
+ "rand 0.10.1",
+ "rstest",
  "smallvec",
  "vortex-array",
  "vortex-buffer",
diff --git a/Cargo.toml b/Cargo.toml
index 9fae5b564bf..cdf28137563 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,12 +7,12 @@ members = [
     "vortex-mask",
     "vortex-utils",
     "vortex-session",
-    "vortex-row",
     "vortex-flatbuffers",
     "vortex-metrics",
     "vortex-io",
     "vortex-proto",
     "vortex-array",
+    "vortex-row",
     "vortex-tensor",
     "vortex-turboquant",
     "vortex-compressor",
@@ -103,6 +103,7 @@ arrow-cast = "58"
 arrow-data = "58"
 arrow-ipc = "58"
 arrow-ord = "58"
+arrow-row = "58"
 arrow-schema = "58"
 arrow-select = "58"
 arrow-string = "58"
diff --git a/vortex-row/Cargo.toml b/vortex-row/Cargo.toml
index aaed9a55f51..50d6547474a 100644
--- a/vortex-row/Cargo.toml
+++ b/vortex-row/Cargo.toml
@@ -24,3 +24,17 @@ vortex-buffer = { workspace = true }
 vortex-error = { workspace = true }
 vortex-mask = { workspace = true }
 vortex-session = { workspace = true }
+
+[dev-dependencies]
+arrow-array = { workspace = true }
+arrow-row = { workspace = true }
+arrow-schema = { workspace = true }
+divan = { workspace = true }
+mimalloc = { workspace = true }
+rand = { workspace = true }
+rstest = { workspace = true }
+vortex-array = { workspace = true, features = ["_test-harness"] }
+
+[[bench]]
+name = "row_encode"
+harness = false
diff --git a/vortex-row/benches/row_encode.rs b/vortex-row/benches/row_encode.rs
new file mode 100644
index 00000000000..8d631d785da
--- /dev/null
+++ b/vortex-row/benches/row_encode.rs
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+#![expect(
+    clippy::unwrap_used,
+    clippy::clone_on_ref_ptr,
+    clippy::cloned_ref_to_slice_refs,
+    clippy::cast_possible_truncation,
+    clippy::cast_possible_wrap,
+    clippy::redundant_clone
+)]
+
+//! Row-encode throughput benchmarks comparing `arrow-row` against vortex's `convert_columns`
+//! for the canonical scenarios shipped in PR 1: a primitive i64 column, a Utf8 column,
+//! and a mixed-field struct. Per-encoding fast paths (Constant, Dict, Patched, BitPacked,
+//! FoR, Delta) gain their own triplets in PR 3.
+
+use std::sync::Arc;
+
+use arrow_array::Int64Array;
+use arrow_array::StringArray;
+use arrow_array::StructArray as ArrowStructArray;
+use arrow_row::RowConverter;
+use arrow_row::SortField as ArrowSortField;
+use arrow_schema::DataType;
+use arrow_schema::Field;
+use divan::counter::BytesCount;
+use mimalloc::MiMalloc;
+use rand::RngExt;
+use rand::SeedableRng;
+use rand::distr::Alphanumeric;
+use rand::rngs::StdRng;
+use vortex_array::IntoArray;
+use vortex_array::LEGACY_SESSION;
+use vortex_array::VortexSessionExecute;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::StructArray;
+use vortex_array::arrays::VarBinViewArray;
+use vortex_row::SortField;
+use vortex_row::convert_columns;
+
+#[global_allocator]
+static GLOBAL: MiMalloc = MiMalloc;
+
+const N: usize = 100_000;
+
+fn main() {
+    divan::main();
+}
+
+fn gen_i64(n: usize, seed: u64) -> Vec<i64> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n)
+        .map(|_| rng.random_range(i64::MIN..i64::MAX))
+        .collect()
+}
+
+fn gen_words(n: usize, mean_len: usize, seed: u64) -> Vec<String> {
+    let rng = &mut StdRng::seed_from_u64(seed);
+    (0..n)
+        .map(|_| {
+            let len = rng.random_range(mean_len.saturating_sub(4)..=mean_len + 4);
+            rng.sample_iter(&Alphanumeric)
+                .take(len)
+                .map(char::from)
+                .collect::<String>()
+        })
+        .collect()
+}
+
+// ---------- primitive_i64 ----------
+
+#[divan::bench]
+fn primitive_i64_arrow_row(bencher: divan::Bencher) {
+    let v = gen_i64(N, 0);
+    let arr = Arc::new(Int64Array::from(v.clone())) as arrow_array::ArrayRef;
+    let conv = RowConverter::new(vec![ArrowSortField::new(DataType::Int64)]).unwrap();
+    let bytes = (N * (1 + 8)) as u64;
+    bencher
+        .counter(BytesCount::new(bytes))
+        .bench_local(|| conv.convert_columns(&[arr.clone()]).unwrap())
+}
+
+#[divan::bench]
+fn primitive_i64_vortex(bencher: divan::Bencher) {
+    let v = gen_i64(N, 0);
+    let col = PrimitiveArray::from_iter(v.clone()).into_array();
+    let bytes = (N * (1 + 8)) as u64;
+    bencher.counter(BytesCount::new(bytes)).bench_local(|| {
+        let mut ctx = LEGACY_SESSION.create_execution_ctx();
+        convert_columns(&[col.clone()], &[SortField::default()], &mut ctx).unwrap()
+    })
+}
+
+// ---------- utf8 ----------
+
+#[divan::bench]
+fn utf8_arrow_row(bencher: divan::Bencher) {
+    let words = gen_words(N, 16, 7);
+    let total: u64 = words
+        .iter()
+        .map(|w| 1 + (w.len().div_ceil(32) * 33) as u64)
+        .sum();
+    let arr = Arc::new(StringArray::from(words.clone())) as arrow_array::ArrayRef;
+    let conv = RowConverter::new(vec![ArrowSortField::new(DataType::Utf8)]).unwrap();
+    bencher
+        .counter(BytesCount::new(total))
+        .bench_local(|| conv.convert_columns(&[arr.clone()]).unwrap())
+}
+
+#[divan::bench]
+fn utf8_vortex(bencher: divan::Bencher) {
+    let words = gen_words(N, 16, 7);
+    let total: u64 = words
+        .iter()
+        .map(|w| 1 + (w.len().div_ceil(32) * 33) as u64)
+        .sum();
+    let col = VarBinViewArray::from_iter_str(words.iter().map(String::as_str)).into_array();
+    bencher.counter(BytesCount::new(total)).bench_local(|| {
+        let mut ctx = LEGACY_SESSION.create_execution_ctx();
+        convert_columns(&[col.clone()], &[SortField::default()], &mut ctx).unwrap()
+    })
+}
+
+// ---------- struct_mixed ----------
+
+fn struct_mixed_inputs() -> (Vec<i64>, Vec<String>, u64) {
+    let ids = gen_i64(N, 1);
+    let names = gen_words(N, 16, 2);
+    // sentinel (1) + i64 (1+8=9) + utf8-name (1 + ceil(len/32)*33)
+    let total: u64 = (0..N)
+        .map(|i| {
+            let name_bytes = 1 + (names[i].len().div_ceil(32) * 33) as u64;
+            1u64 + 9u64 + name_bytes
+        })
+        .sum();
+    (ids, names, total)
+}
+
+#[divan::bench]
+fn struct_mixed_arrow_row(bencher: divan::Bencher) {
+    let (ids, names, total) = struct_mixed_inputs();
+    let id_arr = Arc::new(Int64Array::from(ids)) as arrow_array::ArrayRef;
+    let name_arr = Arc::new(StringArray::from(names)) as arrow_array::ArrayRef;
+    let arrow_struct = Arc::new(ArrowStructArray::from(vec![
+        (Arc::new(Field::new("id", DataType::Int64, false)), id_arr),
+        (
+            Arc::new(Field::new("name", DataType::Utf8, false)),
+            name_arr,
+        ),
+    ])) as arrow_array::ArrayRef;
+    let struct_fields = vec![
+        Arc::new(Field::new("id", DataType::Int64, false)),
+        Arc::new(Field::new("name", DataType::Utf8, false)),
+    ];
+    let conv = RowConverter::new(vec![ArrowSortField::new(DataType::Struct(
+        struct_fields.into(),
+    ))])
+    .unwrap();
+    bencher
+        .counter(BytesCount::new(total))
+        .bench_local(|| conv.convert_columns(&[arrow_struct.clone()]).unwrap())
+}
+
+#[divan::bench]
+fn struct_mixed_vortex(bencher: divan::Bencher) {
+    let (ids, names, total) = struct_mixed_inputs();
+    let id_arr = PrimitiveArray::from_iter(ids).into_array();
+    let name_arr = VarBinViewArray::from_iter_str(names.iter().map(String::as_str)).into_array();
+    let struct_arr = StructArray::from_fields(&[("id", id_arr), ("name", name_arr)])
+        .unwrap()
+        .into_array();
+    bencher.counter(BytesCount::new(total)).bench_local(|| {
+        let mut ctx = LEGACY_SESSION.create_execution_ctx();
+        convert_columns(&[struct_arr.clone()], &[SortField::default()], &mut ctx).unwrap()
+    })
+}
diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index f999303948d..ed231a1e556 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -102,6 +102,12 @@ pub fn vortex_row::codec::field_size(&vortex_array::canonical::Canonical, vortex
 
 pub fn vortex_row::codec::row_width_for_dtype(&vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_row::codec::RowWidth>
 
+pub mod vortex_row::convert
+
+pub fn vortex_row::convert::compute_row_sizes(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::convert::convert_columns(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
+
 pub mod vortex_row::encode
 
 pub struct vortex_row::encode::RowEncode
@@ -410,4 +416,8 @@ pub trait vortex_row::RowSizeKernel: vortex_array::array::vtable::VTable
 
 pub fn vortex_row::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
 
+pub fn vortex_row::compute_row_sizes(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
+pub fn vortex_row::convert_columns(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
+
 pub fn vortex_row::initialize(&vortex_session::VortexSession)
diff --git a/vortex-row/src/convert.rs b/vortex-row/src/convert.rs
new file mode 100644
index 00000000000..c3b06d92748
--- /dev/null
+++ b/vortex-row/src/convert.rs
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! User-facing entry point: turn N columnar arrays into one row-encoded `ListView<u8>`.
+
+use vortex_array::ArrayRef;
+use vortex_array::ExecutionCtx;
+use vortex_array::arrays::ListViewArray;
+use vortex_array::scalar_fn::ScalarFnVTable;
+use vortex_array::scalar_fn::VecExecutionArgs;
+use vortex_error::VortexResult;
+use vortex_error::vortex_bail;
+
+use crate::encode::RowEncode;
+use crate::options::RowEncodeOptions;
+use crate::options::SortField;
+use crate::size::RowSize;
+
+/// Convert N columnar arrays into a single row-oriented [`ListViewArray`] of `u8` whose
+/// bytes are lexicographically comparable in the same order as a tuple comparison of the
+/// input values according to `fields`.
+pub fn convert_columns(
+    cols: &[ArrayRef],
+    fields: &[SortField],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<ListViewArray> {
+    if cols.len() != fields.len() {
+        vortex_bail!(
+            "convert_columns: cols.len() ({}) does not match fields.len() ({})",
+            cols.len(),
+            fields.len()
+        );
+    }
+    if cols.is_empty() {
+        vortex_bail!("convert_columns: at least one column is required");
+    }
+    let nrows = cols[0].len();
+    for (i, col) in cols.iter().enumerate() {
+        if col.len() != nrows {
+            vortex_bail!(
+                "convert_columns: column {} has length {} but expected {}",
+                i,
+                col.len(),
+                nrows
+            );
+        }
+    }
+
+    let options = RowEncodeOptions::new(fields.iter().copied());
+    let args = VecExecutionArgs::new(cols.to_vec(), nrows);
+    let result = RowEncode.execute(&options, &args, ctx)?;
+    result.execute::<ListViewArray>(ctx)
+}
+
+/// Compute only the per-row sizes (in bytes) of the row-encoded form for N columns.
+pub fn compute_row_sizes(
+    cols: &[ArrayRef],
+    fields: &[SortField],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<ArrayRef> {
+    if cols.len() != fields.len() {
+        vortex_bail!(
+            "compute_row_sizes: cols.len() ({}) does not match fields.len() ({})",
+            cols.len(),
+            fields.len()
+        );
+    }
+    if cols.is_empty() {
+        vortex_bail!("compute_row_sizes: at least one column is required");
+    }
+    let nrows = cols[0].len();
+    let options = RowEncodeOptions::new(fields.iter().copied());
+    let args = VecExecutionArgs::new(cols.to_vec(), nrows);
+    RowSize.execute(&options, &args, ctx)
+}
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index ef0209f3d9c..fddcca665c1 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -3,23 +3,54 @@
 
 //! Row-oriented byte encoder, analogous to Apache Arrow's `arrow-row` crate.
 //!
-//! Subsequent commits add the encoder, decoder helpers, and per-encoding fast paths.
-//! This commit only establishes the crate skeleton and an `initialize` stub.
+//! The encoder converts N columnar arrays into a single `List<u8>` array where each row's
+//! bytes are lexicographically comparable in the same order as a tuple comparison of the
+//! original values. This is useful for sorting, hashing into row containers, and other
+//! operations that benefit from a sort-friendly opaque byte representation of a multi-column
+//! key.
+//!
+//! Two variadic scalar functions drive the implementation:
+//! - [`RowSize`] computes per-row byte sizes across all N input columns.
+//! - [`RowEncode`] writes the row-encoded bytes into a single `ListView<u8>` accumulator
+//!   in one left-to-right pass.
+//!
+//! Each scalar function exposes a per-encoding fast-path trait
+//! ([`RowSizeKernel`] / [`RowEncodeKernel`]) for downstream encodings to plug into; PR 3
+//! adds in-crate impls for `Constant`, `Dict`, and `Patched` and an inventory-based
+//! registry for external encodings.
+//!
+//! The user-facing entry point is [`convert_columns`].
+//!
+//! Row-encoding scalar functions are not registered in the default
+//! [`VortexSession`]. Call [`initialize`] on a session to make `RowSize` and `RowEncode`
+//! available via the expression layer.
 
 pub mod codec;
+pub mod convert;
 pub mod encode;
 pub mod options;
 pub mod size;
 
+#[cfg(test)]
+mod tests;
+
+pub use convert::compute_row_sizes;
+pub use convert::convert_columns;
 pub use encode::RowEncode;
 pub use encode::RowEncodeKernel;
 pub use options::RowEncodeOptions;
 pub use options::SortField;
 pub use size::RowSize;
 pub use size::RowSizeKernel;
+use vortex_array::scalar_fn::session::ScalarFnSessionExt;
 use vortex_session::VortexSession;
 
-/// Register the row-encoding scalar functions on the given session.
+/// Register the row-encoding scalar functions ([`RowSize`] and [`RowEncode`]) on the given
+/// session.
 ///
-/// Currently a stub: subsequent commits register `RowSize` and `RowEncode` here.
-pub fn initialize(_session: &VortexSession) {}
+/// Call once on session construction if you want row encoding available via the expression
+/// layer or via [`convert_columns`].
+pub fn initialize(session: &VortexSession) {
+    session.scalar_fns().register(RowSize);
+    session.scalar_fns().register(RowEncode);
+}
diff --git a/vortex-row/src/tests.rs b/vortex-row/src/tests.rs
new file mode 100644
index 00000000000..ff7d8fb274a
--- /dev/null
+++ b/vortex-row/src/tests.rs
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+#![allow(
+    clippy::approx_constant,
+    clippy::cloned_ref_to_slice_refs,
+    clippy::redundant_clone,
+    reason = "tests value clarity over micro-optimization"
+)]
+
+//! Tests for the row encoder.
+
+use rstest::rstest;
+use vortex_array::IntoArray;
+use vortex_array::LEGACY_SESSION;
+use vortex_array::VortexSessionExecute;
+use vortex_array::arrays::BoolArray;
+use vortex_array::arrays::ListViewArray;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::VarBinViewArray;
+use vortex_array::arrays::listview::ListViewArrayExt;
+use vortex_error::VortexResult;
+
+use crate::SortField;
+use crate::convert_columns;
+
+fn collect_row_bytes(array: &ListViewArray) -> Vec<Vec<u8>> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let nrows = array.len();
+    (0..nrows)
+        .map(|i| {
+            let slice = array.list_elements_at(i).unwrap();
+            let p = slice.execute::<PrimitiveArray>(&mut ctx).unwrap();
+            p.as_slice::<u8>().to_vec()
+        })
+        .collect()
+}
+
+/// Encode each column independently, sort the resulting row bytes, and check the permutation
+/// matches the natural sort order of `values`.
+fn assert_sort_order_i64(values: Vec<i64>, descending: bool) -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let col = PrimitiveArray::from_iter(values.clone()).into_array();
+    let field = SortField {
+        descending,
+        nulls_first: true,
+    };
+    let encoded = convert_columns(&[col], &[field], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    // Build expected permutation: sort values naturally then compare to bytes-sorted order.
+    let mut idx: Vec<usize> = (0..values.len()).collect();
+    if descending {
+        idx.sort_by(|a, b| values[*b].cmp(&values[*a]));
+    } else {
+        idx.sort_by(|a, b| values[*a].cmp(&values[*b]));
+    }
+    let expected_order: Vec<Vec<u8>> = idx.iter().map(|&i| rows[i].clone()).collect();
+
+    let mut sorted = rows.clone();
+    sorted.sort();
+    assert_eq!(
+        sorted, expected_order,
+        "Row-encoded bytes do not match natural sort order"
+    );
+    Ok(())
+}
+
+#[rstest]
+#[case::ascending(false)]
+#[case::descending(true)]
+fn primitive_i64_roundtrip(#[case] descending: bool) -> VortexResult<()> {
+    let values: Vec<i64> = vec![-5, 0, 5, i64::MIN, i64::MAX, 7, -7, 1];
+    assert_sort_order_i64(values, descending)
+}
+
+#[test]
+fn primitive_u32_sort_order() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let values: Vec<u32> = vec![0, 1, 100, u32::MAX, 42, 17];
+    let col = PrimitiveArray::from_iter(values.clone()).into_array();
+    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted_rows = rows.clone();
+    sorted_rows.sort();
+
+    let mut sorted_idx: Vec<usize> = (0..values.len()).collect();
+    sorted_idx.sort_by(|a, b| values[*a].cmp(&values[*b]));
+    let expected: Vec<Vec<u8>> = sorted_idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted_rows, expected);
+    Ok(())
+}
+
+#[test]
+fn primitive_f64_sort_order() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    // We use IEEE total-ordering semantics: -0.0 < +0.0 in the byte encoding (matches
+    // `arrow-row`). Avoid -0.0 in the natural-order baseline since partial_cmp says
+    // -0.0 == 0.0.
+    let values: Vec<f64> = vec![-1.5, 0.0, 1.5, f64::INFINITY, f64::NEG_INFINITY, 3.14];
+    let col = PrimitiveArray::from_iter(values.clone()).into_array();
+    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted_rows = rows.clone();
+    sorted_rows.sort();
+
+    let mut sorted_idx: Vec<usize> = (0..values.len()).collect();
+    sorted_idx.sort_by(|a, b| values[*a].partial_cmp(&values[*b]).unwrap());
+    let expected: Vec<Vec<u8>> = sorted_idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted_rows, expected);
+    Ok(())
+}
+
+#[test]
+fn bool_sort_order() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let col = BoolArray::from_iter([true, false, true, false]).into_array();
+    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted = rows.clone();
+    sorted.sort();
+    // false rows come first (2x), true rows after (2x)
+    assert_eq!(sorted[0], rows[1]);
+    assert_eq!(sorted[1], rows[3]);
+    assert_eq!(sorted[2], rows[0]);
+    assert_eq!(sorted[3], rows[2]);
+    Ok(())
+}
+
+#[test]
+fn utf8_sort_order() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let values = vec![
+        "banana",
+        "apple",
+        "",
+        "cherry",
+        "ban",
+        "banana_loaf_for_test",
+    ];
+    let col = VarBinViewArray::from_iter_str(values.clone()).into_array();
+    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted = rows.clone();
+    sorted.sort();
+
+    let mut sorted_idx: Vec<usize> = (0..values.len()).collect();
+    sorted_idx.sort_by(|a, b| values[*a].cmp(values[*b]));
+    let expected: Vec<Vec<u8>> = sorted_idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted, expected);
+    Ok(())
+}
+
+#[test]
+fn multi_column_sort() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let ints: Vec<i32> = vec![1, 2, 1, 2, 1, 3];
+    let strs = vec!["b", "a", "a", "b", "c", "z"];
+    let col0 = PrimitiveArray::from_iter(ints.clone()).into_array();
+    let col1 = VarBinViewArray::from_iter_str(strs.clone()).into_array();
+    let encoded = convert_columns(
+        &[col0, col1],
+        &[SortField::default(), SortField::default()],
+        &mut ctx,
+    )?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted = rows.clone();
+    sorted.sort();
+    let mut idx: Vec<usize> = (0..ints.len()).collect();
+    idx.sort_by(|a, b| ints[*a].cmp(&ints[*b]).then_with(|| strs[*a].cmp(strs[*b])));
+    let expected: Vec<Vec<u8>> = idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted, expected);
+    Ok(())
+}
+
+#[test]
+fn nulls_first_and_last() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let values: Vec<Option<i32>> = vec![Some(5), None, Some(1), None, Some(3)];
+    let col = PrimitiveArray::from_option_iter(values.clone()).into_array();
+
+    // nulls_first=true
+    let encoded = convert_columns(
+        &[col.clone()],
+        &[SortField {
+            descending: false,
+            nulls_first: true,
+        }],
+        &mut ctx,
+    )?;
+    let rows = collect_row_bytes(&encoded);
+    let mut sorted = rows.clone();
+    sorted.sort();
+    // The first two sorted entries should be nulls
+    let null_count = values.iter().filter(|v| v.is_none()).count();
+    for i in 0..null_count {
+        // a null encoded row begins with 0x00
+        assert_eq!(sorted[i][0], 0x00);
+    }
+    // nulls_first=false
+    let encoded = convert_columns(
+        &[col],
+        &[SortField {
+            descending: false,
+            nulls_first: false,
+        }],
+        &mut ctx,
+    )?;
+    let rows = collect_row_bytes(&encoded);
+    let mut sorted = rows.clone();
+    sorted.sort();
+    // The last two sorted entries should be nulls
+    for i in 0..null_count {
+        let pos = sorted.len() - 1 - i;
+        assert_eq!(sorted[pos][0], 0x02);
+    }
+    Ok(())
+}
+
+#[test]
+fn struct_sort_order() -> VortexResult<()> {
+    use vortex_array::arrays::StructArray;
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let ids: Vec<i64> = vec![3, 1, 3, 1, 2];
+    let names = vec!["b", "a", "a", "b", "z"];
+    let id_arr = PrimitiveArray::from_iter(ids.clone()).into_array();
+    let name_arr = VarBinViewArray::from_iter_str(names.clone()).into_array();
+    let struct_arr = StructArray::from_fields(&[("id", id_arr), ("name", name_arr)])?.into_array();
+
+    let encoded = convert_columns(&[struct_arr], &[SortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    let mut sorted = rows.clone();
+    sorted.sort();
+    let mut idx: Vec<usize> = (0..ids.len()).collect();
+    idx.sort_by(|a, b| ids[*a].cmp(&ids[*b]).then_with(|| names[*a].cmp(names[*b])));
+    let expected: Vec<Vec<u8>> = idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted, expected);
+    Ok(())
+}
+
+#[test]
+fn row_size_struct_shape() -> VortexResult<()> {
+    use vortex_array::arrays::Constant;
+    use vortex_array::arrays::StructArray;
+    use vortex_array::arrays::struct_::StructArrayExt;
+
+    use crate::compute_row_sizes;
+
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let ints: Vec<i32> = vec![1, 2, 3, 4, 5];
+    let strs = vec!["a", "bb", "ccc", "", "eeeee"];
+    let col0 = PrimitiveArray::from_iter(ints).into_array();
+    let col1 = VarBinViewArray::from_iter_str(strs).into_array();
+
+    let sizes = compute_row_sizes(
+        &[col0, col1],
+        &[SortField::default(), SortField::default()],
+        &mut ctx,
+    )?;
+    // Shape must be Struct { fixed, var }
+    let struct_arr = sizes.execute::<StructArray>(&mut ctx)?;
+    assert_eq!(struct_arr.struct_fields().nfields(), 2);
+    let fixed = struct_arr.unmasked_field(0);
+    let var = struct_arr.unmasked_field(1);
+
+    // `fixed` must be ConstantArray with value = encoded i32 width = 1 + 4 = 5.
+    let fixed_const = fixed
+        .as_opt::<Constant>()
+        .expect("fixed field should be a ConstantArray");
+    assert_eq!(
+        fixed_const.scalar(),
+        &vortex_array::scalar::Scalar::from(5u32),
+        "fixed scalar should be encoded primitive i32 width"
+    );
+
+    // `var` must be a PrimitiveArray<u32>, since we have a varlen column.
+    let var_prim = var.clone().execute::<PrimitiveArray>(&mut ctx)?;
+    let v: &[u32] = var_prim.as_slice();
+    assert_eq!(v.len(), 5);
+    // empty string: sentinel(1) + 1 byte; non-empty: sentinel(1) + 33 bytes (single block).
+    let expected: Vec<u32> = vec![34, 34, 34, 2, 34];
+    assert_eq!(v, expected.as_slice());
+    Ok(())
+}
+
+#[test]
+fn single_buffer_invariant() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    // Encoded rows here are all > 12 bytes, forcing the Ref-view path that points back into
+    // the shared data buffer.
+    let nrows = 64usize;
+    let primitives: Vec<i64> = (0..nrows as i64).collect();
+    let strings: Vec<String> = (0..nrows)
+        .map(|i| format!("row_{}_with_padding", i))
+        .collect();
+    let col0 = PrimitiveArray::from_iter(primitives.clone()).into_array();
+    let col1 = VarBinViewArray::from_iter_str(strings.iter().map(String::as_str)).into_array();
+    let encoded = convert_columns(
+        &[col0, col1],
+        &[SortField::default(), SortField::default()],
+        &mut ctx,
+    )?;
+
+    let rows = collect_row_bytes(&encoded);
+    let expected_total: usize = rows.iter().map(|r| r.len()).sum();
+
+    // The shared data buffer holds the contiguous concatenation of every row's encoded bytes;
+    // per-row allocations would produce many small buffers instead of one shared buffer.
+    // ListView's elements array is a single contiguous primitive (u8) array; its length
+    // equals the sum of all per-row sizes. A per-row allocation strategy would instead
+    // produce N separate elements arrays or a sparse one.
+    let elements_len = encoded.elements().len();
+    assert_eq!(
+        elements_len, expected_total,
+        "elements buffer size mismatch"
+    );
+    Ok(())
+}

From 74d89f1f07cc5074832d12e7b17549979c529197 Mon Sep 17 00:00:00 2001
From: Joe Isaacs <joe.isaacs@live.co.uk>
Date: Fri, 22 May 2026 12:49:36 +0100
Subject: [PATCH 09/10] t

Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
---
 vortex-row/benches/row_encode.rs |  21 +-
 vortex-row/public-api.lock       | 420 ++++++++-----------------------
 vortex-row/src/codec.rs          | 345 ++++---------------------
 vortex-row/src/convert.rs        |  75 ------
 vortex-row/src/encode.rs         |  76 ++----
 vortex-row/src/lib.rs            |  68 ++---
 vortex-row/src/options.rs        | 140 +++++++----
 vortex-row/src/size.rs           | 122 ++-------
 vortex-row/src/tests.rs          | 116 ++++++---
 9 files changed, 418 insertions(+), 965 deletions(-)
 delete mode 100644 vortex-row/src/convert.rs

diff --git a/vortex-row/benches/row_encode.rs b/vortex-row/benches/row_encode.rs
index 8d631d785da..07493d6ad48 100644
--- a/vortex-row/benches/row_encode.rs
+++ b/vortex-row/benches/row_encode.rs
@@ -5,15 +5,12 @@
     clippy::unwrap_used,
     clippy::clone_on_ref_ptr,
     clippy::cloned_ref_to_slice_refs,
-    clippy::cast_possible_truncation,
-    clippy::cast_possible_wrap,
     clippy::redundant_clone
 )]
 
-//! Row-encode throughput benchmarks comparing `arrow-row` against vortex's `convert_columns`
-//! for the canonical scenarios shipped in PR 1: a primitive i64 column, a Utf8 column,
-//! and a mixed-field struct. Per-encoding fast paths (Constant, Dict, Patched, BitPacked,
-//! FoR, Delta) gain their own triplets in PR 3.
+//! Row-encode throughput benchmarks comparing `arrow-row` against Vortex's [`RowEncoder`]
+//! for the core canonical scenarios: a primitive i64 column, a Utf8 column, and a
+//! mixed-field struct.
 
 use std::sync::Arc;
 
@@ -36,8 +33,7 @@ use vortex_array::VortexSessionExecute;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::StructArray;
 use vortex_array::arrays::VarBinViewArray;
-use vortex_row::SortField;
-use vortex_row::convert_columns;
+use vortex_row::RowEncoder;
 
 #[global_allocator]
 static GLOBAL: MiMalloc = MiMalloc;
@@ -86,9 +82,10 @@ fn primitive_i64_vortex(bencher: divan::Bencher) {
     let v = gen_i64(N, 0);
     let col = PrimitiveArray::from_iter(v.clone()).into_array();
     let bytes = (N * (1 + 8)) as u64;
+    let encoder = RowEncoder::default();
     bencher.counter(BytesCount::new(bytes)).bench_local(|| {
         let mut ctx = LEGACY_SESSION.create_execution_ctx();
-        convert_columns(&[col.clone()], &[SortField::default()], &mut ctx).unwrap()
+        encoder.encode(&[col.clone()], &mut ctx).unwrap()
     })
 }
 
@@ -116,9 +113,10 @@ fn utf8_vortex(bencher: divan::Bencher) {
         .map(|w| 1 + (w.len().div_ceil(32) * 33) as u64)
         .sum();
     let col = VarBinViewArray::from_iter_str(words.iter().map(String::as_str)).into_array();
+    let encoder = RowEncoder::default();
     bencher.counter(BytesCount::new(total)).bench_local(|| {
         let mut ctx = LEGACY_SESSION.create_execution_ctx();
-        convert_columns(&[col.clone()], &[SortField::default()], &mut ctx).unwrap()
+        encoder.encode(&[col.clone()], &mut ctx).unwrap()
     })
 }
 
@@ -170,8 +168,9 @@ fn struct_mixed_vortex(bencher: divan::Bencher) {
     let struct_arr = StructArray::from_fields(&[("id", id_arr), ("name", name_arr)])
         .unwrap()
         .into_array();
+    let encoder = RowEncoder::default();
     bencher.counter(BytesCount::new(total)).bench_local(|| {
         let mut ctx = LEGACY_SESSION.create_execution_ctx();
-        convert_columns(&[struct_arr.clone()], &[SortField::default()], &mut ctx).unwrap()
+        encoder.encode(&[struct_arr.clone()], &mut ctx).unwrap()
     })
 }
diff --git a/vortex-row/public-api.lock b/vortex-row/public-api.lock
index ed231a1e556..83c40788349 100644
--- a/vortex-row/public-api.lock
+++ b/vortex-row/public-api.lock
@@ -1,423 +1,207 @@
 pub mod vortex_row
 
-pub mod vortex_row::codec
-
-pub enum vortex_row::codec::RowWidth
-
-pub vortex_row::codec::RowWidth::Fixed(u32)
-
-pub vortex_row::codec::RowWidth::Variable
-
-impl core::clone::Clone for vortex_row::codec::RowWidth
-
-pub fn vortex_row::codec::RowWidth::clone(&self) -> vortex_row::codec::RowWidth
-
-impl core::cmp::Eq for vortex_row::codec::RowWidth
-
-impl core::cmp::PartialEq for vortex_row::codec::RowWidth
-
-pub fn vortex_row::codec::RowWidth::eq(&self, &vortex_row::codec::RowWidth) -> bool
-
-impl core::fmt::Debug for vortex_row::codec::RowWidth
-
-pub fn vortex_row::codec::RowWidth::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
-
-impl core::marker::Copy for vortex_row::codec::RowWidth
-
-impl core::marker::StructuralPartialEq for vortex_row::codec::RowWidth
-
-pub const vortex_row::codec::BOOL_ENCODED_SIZE: u32
-
-pub const vortex_row::codec::VARLEN_BLOCK_SIZE: usize
-
-pub const vortex_row::codec::VARLEN_BLOCK_TOTAL: usize
-
-pub trait vortex_row::codec::RowEncode: core::marker::Copy
-
-pub fn vortex_row::codec::RowEncode::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for f32
-
-pub fn f32::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for f64
-
-pub fn f64::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for half::binary16::f16
-
-pub fn half::binary16::f16::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for i128
-
-pub fn i128::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for i16
-
-pub fn i16::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for i32
-
-pub fn i32::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for i64
-
-pub fn i64::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for i8
-
-pub fn i8::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for u16
-
-pub fn u16::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for u32
-
-pub fn u32::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for u64
-
-pub fn u64::encode_to(self, &mut [u8], bool)
-
-impl vortex_row::codec::RowEncode for u8
-
-pub fn u8::encode_to(self, &mut [u8], bool)
-
-pub fn vortex_row::codec::encode_scalar(&vortex_array::scalar::Scalar, vortex_row::options::SortField) -> vortex_error::VortexResult<bytes::bytes::Bytes>
-
-pub fn vortex_row::codec::encode_scalar_bool(core::option::Option<bool>, vortex_row::options::SortField, &mut vortex_buffer::ByteBufferMut)
-
-pub fn vortex_row::codec::encode_scalar_null(vortex_row::options::SortField, bool, &mut vortex_buffer::ByteBufferMut)
-
-pub fn vortex_row::codec::encode_scalar_primitive(vortex_array::dtype::ptype::PType, vortex_array::scalar::typed_view::primitive::pvalue::PValue, vortex_row::options::SortField, bool, &mut vortex_buffer::ByteBufferMut) -> vortex_error::VortexResult<()>
-
-pub fn vortex_row::codec::encode_scalar_varlen(core::option::Option<&[u8]>, vortex_row::options::SortField, &mut vortex_buffer::ByteBufferMut)
-
-pub fn vortex_row::codec::encoded_size_for_scalar(&vortex_array::scalar::Scalar, vortex_row::options::SortField) -> vortex_error::VortexResult<u32>
-
-pub fn vortex_row::codec::field_encode(&vortex_array::canonical::Canonical, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
-
-pub fn vortex_row::codec::field_size(&vortex_array::canonical::Canonical, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
-
-pub fn vortex_row::codec::row_width_for_dtype(&vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_row::codec::RowWidth>
-
-pub mod vortex_row::convert
-
-pub fn vortex_row::convert::compute_row_sizes(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
-
-pub fn vortex_row::convert::convert_columns(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
-
-pub mod vortex_row::encode
-
-pub struct vortex_row::encode::RowEncode
-
-impl core::clone::Clone for vortex_row::encode::RowEncode
-
-pub fn vortex_row::encode::RowEncode::clone(&self) -> vortex_row::encode::RowEncode
-
-impl core::fmt::Debug for vortex_row::encode::RowEncode
-
-pub fn vortex_row::encode::RowEncode::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
-
-impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::encode::RowEncode
-
-pub type vortex_row::encode::RowEncode::Options = vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::encode::RowEncode::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
-
-pub fn vortex_row::encode::RowEncode::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
-
-pub fn vortex_row::encode::RowEncode::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
-
-pub fn vortex_row::encode::RowEncode::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
-
-pub fn vortex_row::encode::RowEncode::id(&self) -> vortex_array::scalar_fn::ScalarFnId
-
-pub fn vortex_row::encode::RowEncode::is_fallible(&self, &Self::Options) -> bool
-
-pub fn vortex_row::encode::RowEncode::is_null_sensitive(&self, &Self::Options) -> bool
-
-pub fn vortex_row::encode::RowEncode::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
-
-pub fn vortex_row::encode::RowEncode::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
-
-pub trait vortex_row::encode::RowEncodeKernel: vortex_array::array::vtable::VTable
-
-pub fn vortex_row::encode::RowEncodeKernel::row_encode_into(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
-
-pub fn vortex_row::encode::dispatch_encode(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
-
-pub mod vortex_row::options
-
-pub struct vortex_row::options::RowEncodeOptions
-
-pub vortex_row::options::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
-
-impl vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self
-
-impl core::clone::Clone for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions
-
-impl core::cmp::Eq for vortex_row::options::RowEncodeOptions
-
-impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool
-
-impl core::fmt::Debug for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
-
-impl core::fmt::Display for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
-
-impl core::hash::Hash for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)
-
-impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions
-
-pub struct vortex_row::options::SortField
-
-pub vortex_row::options::SortField::descending: bool
-
-pub vortex_row::options::SortField::nulls_first: bool
-
-impl vortex_row::options::SortField
-
-pub fn vortex_row::options::SortField::new(bool, bool) -> Self
-
-pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8
-
-pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8
-
-impl core::clone::Clone for vortex_row::options::SortField
-
-pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField
-
-impl core::cmp::Eq for vortex_row::options::SortField
-
-impl core::cmp::PartialEq for vortex_row::options::SortField
-
-pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool
-
-impl core::default::Default for vortex_row::options::SortField
-
-pub fn vortex_row::options::SortField::default() -> Self
-
-impl core::fmt::Debug for vortex_row::options::SortField
-
-pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
-
-impl core::fmt::Display for vortex_row::options::SortField
+pub struct vortex_row::RowEncode
 
-pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+impl core::clone::Clone for vortex_row::RowEncode
 
-impl core::hash::Hash for vortex_row::options::SortField
+pub fn vortex_row::RowEncode::clone(&self) -> vortex_row::RowEncode
 
-pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)
+impl core::fmt::Debug for vortex_row::RowEncode
 
-impl core::marker::Copy for vortex_row::options::SortField
+pub fn vortex_row::RowEncode::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl core::marker::StructuralPartialEq for vortex_row::options::SortField
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::RowEncode
 
-pub const vortex_row::options::FIELDS_INLINE: usize
+pub type vortex_row::RowEncode::Options = vortex_row::RowEncodingOptions
 
-pub mod vortex_row::size
+pub fn vortex_row::RowEncode::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
 
-pub struct vortex_row::size::RowSize
+pub fn vortex_row::RowEncode::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
 
-impl core::clone::Clone for vortex_row::size::RowSize
+pub fn vortex_row::RowEncode::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
 
-pub fn vortex_row::size::RowSize::clone(&self) -> vortex_row::size::RowSize
+pub fn vortex_row::RowEncode::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
-impl core::fmt::Debug for vortex_row::size::RowSize
+pub fn vortex_row::RowEncode::id(&self) -> vortex_array::scalar_fn::ScalarFnId
 
-pub fn vortex_row::size::RowSize::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+pub fn vortex_row::RowEncode::is_fallible(&self, &Self::Options) -> bool
 
-impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::size::RowSize
+pub fn vortex_row::RowEncode::is_null_sensitive(&self, &Self::Options) -> bool
 
-pub type vortex_row::size::RowSize::Options = vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncode::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
 
-pub fn vortex_row::size::RowSize::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+pub fn vortex_row::RowEncode::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
 
-pub fn vortex_row::size::RowSize::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+pub struct vortex_row::RowEncoder
 
-pub fn vortex_row::size::RowSize::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+impl vortex_row::RowEncoder
 
-pub fn vortex_row::size::RowSize::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+pub fn vortex_row::RowEncoder::encode(&self, &[vortex_array::array::erased::ArrayRef], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
 
-pub fn vortex_row::size::RowSize::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+pub fn vortex_row::RowEncoder::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::RowSortField>) -> Self
 
-pub fn vortex_row::size::RowSize::is_fallible(&self, &Self::Options) -> bool
+pub fn vortex_row::RowEncoder::options(&self) -> core::option::Option<&vortex_row::RowEncodingOptions>
 
-pub fn vortex_row::size::RowSize::is_null_sensitive(&self, &Self::Options) -> bool
+pub fn vortex_row::RowEncoder::row_sizes(&self, &[vortex_array::array::erased::ArrayRef], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
-pub fn vortex_row::size::RowSize::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+pub fn vortex_row::RowEncoder::with_options(vortex_row::RowEncodingOptions) -> Self
 
-pub fn vortex_row::size::RowSize::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+impl core::clone::Clone for vortex_row::RowEncoder
 
-pub trait vortex_row::size::RowSizeKernel: vortex_array::array::vtable::VTable
+pub fn vortex_row::RowEncoder::clone(&self) -> vortex_row::RowEncoder
 
-pub fn vortex_row::size::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+impl core::cmp::Eq for vortex_row::RowEncoder
 
-pub fn vortex_row::size::dispatch_size(&vortex_array::array::erased::ArrayRef, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()>
+impl core::cmp::PartialEq for vortex_row::RowEncoder
 
-pub struct vortex_row::RowEncode
+pub fn vortex_row::RowEncoder::eq(&self, &vortex_row::RowEncoder) -> bool
 
-impl core::clone::Clone for vortex_row::encode::RowEncode
+impl core::default::Default for vortex_row::RowEncoder
 
-pub fn vortex_row::encode::RowEncode::clone(&self) -> vortex_row::encode::RowEncode
+pub fn vortex_row::RowEncoder::default() -> vortex_row::RowEncoder
 
-impl core::fmt::Debug for vortex_row::encode::RowEncode
+impl core::fmt::Debug for vortex_row::RowEncoder
 
-pub fn vortex_row::encode::RowEncode::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+pub fn vortex_row::RowEncoder::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::encode::RowEncode
+impl core::hash::Hash for vortex_row::RowEncoder
 
-pub type vortex_row::encode::RowEncode::Options = vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncoder::hash<__H: core::hash::Hasher>(&self, &mut __H)
 
-pub fn vortex_row::encode::RowEncode::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+impl core::marker::StructuralPartialEq for vortex_row::RowEncoder
 
-pub fn vortex_row::encode::RowEncode::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+pub struct vortex_row::RowEncodingOptions
 
-pub fn vortex_row::encode::RowEncode::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+impl vortex_row::RowEncodingOptions
 
-pub fn vortex_row::encode::RowEncode::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+pub fn vortex_row::RowEncodingOptions::default_for_columns(usize) -> Self
 
-pub fn vortex_row::encode::RowEncode::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+pub fn vortex_row::RowEncodingOptions::fields(&self) -> &[vortex_row::RowSortField]
 
-pub fn vortex_row::encode::RowEncode::is_fallible(&self, &Self::Options) -> bool
+pub fn vortex_row::RowEncodingOptions::is_empty(&self) -> bool
 
-pub fn vortex_row::encode::RowEncode::is_null_sensitive(&self, &Self::Options) -> bool
+pub fn vortex_row::RowEncodingOptions::len(&self) -> usize
 
-pub fn vortex_row::encode::RowEncode::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+pub fn vortex_row::RowEncodingOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::RowSortField>) -> Self
 
-pub fn vortex_row::encode::RowEncode::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+impl core::clone::Clone for vortex_row::RowEncodingOptions
 
-pub struct vortex_row::RowEncodeOptions
+pub fn vortex_row::RowEncodingOptions::clone(&self) -> vortex_row::RowEncodingOptions
 
-pub vortex_row::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>
+impl core::cmp::Eq for vortex_row::RowEncodingOptions
 
-impl vortex_row::options::RowEncodeOptions
+impl core::cmp::PartialEq for vortex_row::RowEncodingOptions
 
-pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self
+pub fn vortex_row::RowEncodingOptions::eq(&self, &vortex_row::RowEncodingOptions) -> bool
 
-impl core::clone::Clone for vortex_row::options::RowEncodeOptions
+impl core::fmt::Debug for vortex_row::RowEncodingOptions
 
-pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncodingOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl core::cmp::Eq for vortex_row::options::RowEncodeOptions
+impl core::fmt::Display for vortex_row::RowEncodingOptions
 
-impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncodingOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool
+impl core::hash::Hash for vortex_row::RowEncodingOptions
 
-impl core::fmt::Debug for vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncodingOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)
 
-pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+impl core::iter::traits::collect::FromIterator<vortex_row::RowSortField> for vortex_row::RowEncodingOptions
 
-impl core::fmt::Display for vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowEncodingOptions::from_iter<T: core::iter::traits::collect::IntoIterator<Item = vortex_row::RowSortField>>(T) -> Self
 
-pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+impl core::marker::StructuralPartialEq for vortex_row::RowEncodingOptions
 
-impl core::hash::Hash for vortex_row::options::RowEncodeOptions
-
-pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)
+pub struct vortex_row::RowSize
 
-impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions
+impl core::clone::Clone for vortex_row::RowSize
 
-pub struct vortex_row::RowSize
+pub fn vortex_row::RowSize::clone(&self) -> vortex_row::RowSize
 
-impl core::clone::Clone for vortex_row::size::RowSize
+impl core::fmt::Debug for vortex_row::RowSize
 
-pub fn vortex_row::size::RowSize::clone(&self) -> vortex_row::size::RowSize
+pub fn vortex_row::RowSize::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl core::fmt::Debug for vortex_row::size::RowSize
+impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::RowSize
 
-pub fn vortex_row::size::RowSize::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+pub type vortex_row::RowSize::Options = vortex_row::RowEncodingOptions
 
-impl vortex_array::scalar_fn::vtable::ScalarFnVTable for vortex_row::size::RowSize
+pub fn vortex_row::RowSize::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
 
-pub type vortex_row::size::RowSize::Options = vortex_row::options::RowEncodeOptions
+pub fn vortex_row::RowSize::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
 
-pub fn vortex_row::size::RowSize::arity(&self, &Self::Options) -> vortex_array::scalar_fn::vtable::Arity
+pub fn vortex_row::RowSize::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
 
-pub fn vortex_row::size::RowSize::child_name(&self, &Self::Options, usize) -> vortex_array::scalar_fn::vtable::ChildName
+pub fn vortex_row::RowSize::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
-pub fn vortex_row::size::RowSize::deserialize(&self, &[u8], &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
+pub fn vortex_row::RowSize::id(&self) -> vortex_array::scalar_fn::ScalarFnId
 
-pub fn vortex_row::size::RowSize::execute(&self, &Self::Options, &dyn vortex_array::scalar_fn::vtable::ExecutionArgs, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+pub fn vortex_row::RowSize::is_fallible(&self, &Self::Options) -> bool
 
-pub fn vortex_row::size::RowSize::id(&self) -> vortex_array::scalar_fn::ScalarFnId
+pub fn vortex_row::RowSize::is_null_sensitive(&self, &Self::Options) -> bool
 
-pub fn vortex_row::size::RowSize::is_fallible(&self, &Self::Options) -> bool
+pub fn vortex_row::RowSize::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
 
-pub fn vortex_row::size::RowSize::is_null_sensitive(&self, &Self::Options) -> bool
+pub fn vortex_row::RowSize::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
 
-pub fn vortex_row::size::RowSize::return_dtype(&self, &Self::Options, &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
+pub struct vortex_row::RowSortField
 
-pub fn vortex_row::size::RowSize::serialize(&self, &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
+pub vortex_row::RowSortField::descending: bool
 
-pub struct vortex_row::SortField
+pub vortex_row::RowSortField::nulls_first: bool
 
-pub vortex_row::SortField::descending: bool
+impl vortex_row::RowSortField
 
-pub vortex_row::SortField::nulls_first: bool
+pub const fn vortex_row::RowSortField::ascending() -> Self
 
-impl vortex_row::options::SortField
+pub const fn vortex_row::RowSortField::descending() -> Self
 
-pub fn vortex_row::options::SortField::new(bool, bool) -> Self
+pub const fn vortex_row::RowSortField::new(bool, bool) -> Self
 
-pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8
+pub const fn vortex_row::RowSortField::nulls_first(self) -> Self
 
-pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8
+pub const fn vortex_row::RowSortField::nulls_last(self) -> Self
 
-impl core::clone::Clone for vortex_row::options::SortField
+impl core::clone::Clone for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField
+pub fn vortex_row::RowSortField::clone(&self) -> vortex_row::RowSortField
 
-impl core::cmp::Eq for vortex_row::options::SortField
+impl core::cmp::Eq for vortex_row::RowSortField
 
-impl core::cmp::PartialEq for vortex_row::options::SortField
+impl core::cmp::PartialEq for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool
+pub fn vortex_row::RowSortField::eq(&self, &vortex_row::RowSortField) -> bool
 
-impl core::default::Default for vortex_row::options::SortField
+impl core::default::Default for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::default() -> Self
+pub fn vortex_row::RowSortField::default() -> Self
 
-impl core::fmt::Debug for vortex_row::options::SortField
+impl core::fmt::Debug for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+pub fn vortex_row::RowSortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl core::fmt::Display for vortex_row::options::SortField
+impl core::fmt::Display for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+pub fn vortex_row::RowSortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-impl core::hash::Hash for vortex_row::options::SortField
+impl core::hash::Hash for vortex_row::RowSortField
 
-pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)
+pub fn vortex_row::RowSortField::hash<__H: core::hash::Hasher>(&self, &mut __H)
 
-impl core::marker::Copy for vortex_row::options::SortField
+impl core::iter::traits::collect::FromIterator<vortex_row::RowSortField> for vortex_row::RowEncodingOptions
 
-impl core::marker::StructuralPartialEq for vortex_row::options::SortField
+pub fn vortex_row::RowEncodingOptions::from_iter<T: core::iter::traits::collect::IntoIterator<Item = vortex_row::RowSortField>>(T) -> Self
 
-pub trait vortex_row::RowEncodeKernel: vortex_array::array::vtable::VTable
+impl core::marker::Copy for vortex_row::RowSortField
 
-pub fn vortex_row::RowEncodeKernel::row_encode_into(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &[u32], &mut [u32], &mut [u8], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+impl core::marker::StructuralPartialEq for vortex_row::RowSortField
 
-pub trait vortex_row::RowSizeKernel: vortex_array::array::vtable::VTable
+pub fn vortex_row::compute_row_sizes(&[vortex_array::array::erased::ArrayRef], &[vortex_row::RowSortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
-pub fn vortex_row::RowSizeKernel::row_size_contribution(vortex_array::array::view::ArrayView<'_, Self>, vortex_row::options::SortField, &mut [u32], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<()>>
+pub fn vortex_row::compute_row_sizes_with_options(&[vortex_array::array::erased::ArrayRef], &vortex_row::RowEncodingOptions, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
-pub fn vortex_row::compute_row_sizes(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+pub fn vortex_row::convert_columns(&[vortex_array::array::erased::ArrayRef], &[vortex_row::RowSortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
 
-pub fn vortex_row::convert_columns(&[vortex_array::array::erased::ArrayRef], &[vortex_row::options::SortField], &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
+pub fn vortex_row::convert_columns_with_options(&[vortex_array::array::erased::ArrayRef], &vortex_row::RowEncodingOptions, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::listview::vtable::ListViewArray>
 
 pub fn vortex_row::initialize(&vortex_session::VortexSession)
diff --git a/vortex-row/src/codec.rs b/vortex-row/src/codec.rs
index 8468301e5b3..33270b0ad43 100644
--- a/vortex-row/src/codec.rs
+++ b/vortex-row/src/codec.rs
@@ -1,18 +1,12 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-#![allow(
-    clippy::cast_possible_truncation,
-    clippy::expect_used,
-    reason = "row encoding indexes into u32-sized buffers; lengths are validated to fit in u32 elsewhere"
-)]
-
 //! Pure byte-encoding kernels for row-oriented output, operating on `Canonical` variants.
 //!
 //! The encoded byte format produces a lexicographically byte-comparable representation:
 //! comparing the byte slices of two encoded rows yields the same ordering as the
 //! original logical (tuple) comparison of their values, modulo nulls placement and
-//! descending-ness as configured by [`SortField`].
+//! descending-ness as configured by [`RowSortField`].
 //!
 //! Conventions:
 //! - Every value is preceded by a 1-byte sentinel that orders nulls relative to non-nulls.
@@ -21,9 +15,6 @@
 //! - Fixed-width integers are big-endian, with the sign bit flipped for signed types.
 //! - Floats are bit-pattern big-endian with sign-aware mask: non-negative flips the top
 //!   bit; negative flips all bits.
-//!
-//! This commit covers only the fixed-width canonical variants (Null, Bool, Primitive,
-//! Decimal); variable-length and nested canonical variants land in later commits.
 
 use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
@@ -42,22 +33,22 @@ use vortex_array::arrays::struct_::StructArrayExt;
 use vortex_array::dtype::DType;
 use vortex_array::dtype::DecimalType;
 use vortex_array::dtype::NativePType;
-use vortex_array::dtype::PType;
 use vortex_array::dtype::half::f16;
 use vortex_array::match_each_native_ptype;
-use vortex_buffer::ByteBufferMut;
+use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
 
-use crate::options::SortField;
+use crate::options::RowSortField;
 
 /// Size in bytes of the encoded form of a single bool value (sentinel + 1 content byte).
-pub const BOOL_ENCODED_SIZE: u32 = 2;
+pub(crate) const BOOL_ENCODED_SIZE: u32 = 2;
 
 /// Block size used in the variable-length encoding.
-pub const VARLEN_BLOCK_SIZE: usize = 32;
+pub(crate) const VARLEN_BLOCK_SIZE: usize = 32;
 /// Total bytes per varlen block including the trailing continuation marker.
-pub const VARLEN_BLOCK_TOTAL: usize = VARLEN_BLOCK_SIZE + 1;
+pub(crate) const VARLEN_BLOCK_TOTAL: usize = VARLEN_BLOCK_SIZE + 1;
+const VARLEN_BLOCK_TOTAL_U32: u32 = 33;
 
 /// Returns the size in bytes of the encoded form of a variable-length value of the given length.
 #[inline]
@@ -66,8 +57,9 @@ fn encoded_size_for_varlen(len: usize) -> u32 {
     if len == 0 {
         1 + 1
     } else {
-        let blocks = len.div_ceil(VARLEN_BLOCK_SIZE);
-        1 + (blocks as u32) * (VARLEN_BLOCK_TOTAL as u32)
+        let blocks = u32::try_from(len.div_ceil(VARLEN_BLOCK_SIZE))
+            .vortex_expect("varlen block count must fit in u32");
+        1 + blocks * VARLEN_BLOCK_TOTAL_U32
     }
 }
 
@@ -77,13 +69,17 @@ const fn encoded_size_for_fixed(value_bytes: u32) -> u32 {
     1 + value_bytes
 }
 
+fn byte_width_u32(width: usize) -> u32 {
+    u32::try_from(width).vortex_expect("native byte width must fit in u32")
+}
+
 /// Per-row width classification for a column.
 ///
 /// `Fixed(w)` means every row encodes to exactly `w` bytes (sentinel + value), regardless
 /// of null-ness or value. `Variable` means per-row sizes depend on the data (Utf8/Binary,
 /// List, or any composite that recurses through a variable-width field).
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum RowWidth {
+pub(crate) enum RowWidth {
     /// Per-row width is the same constant for every row in the column.
     Fixed(u32),
     /// Per-row width is data-dependent.
@@ -96,26 +92,24 @@ pub enum RowWidth {
 /// regardless of null-ness or value. Returns `Variable` when per-row sizes depend on the
 /// data.
 ///
-/// Classification does not depend on the [`SortField`]: null-vs-non-null encoding width is
+/// Classification does not depend on the [`RowSortField`]: null-vs-non-null encoding width is
 /// the same for fixed-width types (the sentinel byte plus zero-fill for nulls).
 ///
 /// # Errors
 ///
-/// Returns an error for dtypes that the row encoder does not yet support. Variable-length
-/// dtypes (Utf8/Binary), nested dtypes (Struct/FixedSizeList/Extension), and
-/// Variant/Union/List arrive in later commits.
-pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
+/// Returns an error for dtypes that the row encoder does not support.
+pub(crate) fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
     match dtype {
         DType::Null => Ok(RowWidth::Fixed(1)),
         DType::Bool(_) => Ok(RowWidth::Fixed(BOOL_ENCODED_SIZE)),
-        DType::Primitive(ptype, _) => Ok(RowWidth::Fixed(encoded_size_for_fixed(
-            ptype.byte_width() as u32,
-        ))),
+        DType::Primitive(ptype, _) => Ok(RowWidth::Fixed(encoded_size_for_fixed(byte_width_u32(
+            ptype.byte_width(),
+        )))),
         DType::Decimal(dt, _) => {
             let vt = DecimalType::smallest_decimal_value_type(dt);
-            Ok(RowWidth::Fixed(encoded_size_for_fixed(
-                vt.byte_width() as u32
-            )))
+            Ok(RowWidth::Fixed(encoded_size_for_fixed(byte_width_u32(
+                vt.byte_width(),
+            ))))
         }
         DType::Utf8(_) | DType::Binary(_) => Ok(RowWidth::Variable),
         DType::FixedSizeList(elem, n, _) => match row_width_for_dtype(elem)? {
@@ -154,11 +148,10 @@ pub fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
 ///
 /// # Errors
 ///
-/// Returns an error for unsupported canonical variants. Variable-length and nested
-/// variants land in later commits.
-pub fn field_size(
+/// Returns an error for unsupported canonical variants.
+pub(crate) fn field_size(
     canonical: &Canonical,
-    field: SortField,
+    field: RowSortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
@@ -172,7 +165,7 @@ pub fn field_size(
         Canonical::FixedSizeList(arr) => add_size_fsl(arr, field, sizes, ctx)?,
         Canonical::Extension(arr) => add_size_extension(arr, field, sizes, ctx)?,
         Canonical::List(_) => vortex_bail!(
-            "row encoding does not yet support canonical type {:?}",
+            "row encoding does not support canonical List arrays: {:?}",
             canonical.dtype()
         ),
         Canonical::Variant(_) => {
@@ -188,9 +181,9 @@ pub fn field_size(
 ///
 /// After this call returns successfully, `cursors[i]` will have advanced by exactly the
 /// per-row contribution previously computed by [`field_size`] for the same column.
-pub fn field_encode(
+pub(crate) fn field_encode(
     canonical: &Canonical,
-    field: SortField,
+    field: RowSortField,
     offsets: &[u32],
     cursors: &mut [u32],
     out: &mut [u8],
@@ -206,7 +199,7 @@ pub fn field_encode(
         Canonical::FixedSizeList(arr) => encode_fsl(arr, field, offsets, cursors, out, ctx)?,
         Canonical::Extension(arr) => encode_extension(arr, field, offsets, cursors, out, ctx)?,
         Canonical::List(_) => vortex_bail!(
-            "row encoding does not yet support canonical type {:?}",
+            "row encoding does not support canonical List arrays: {:?}",
             canonical.dtype()
         ),
         Canonical::Variant(_) => {
@@ -231,12 +224,12 @@ fn add_size_null(arr: &NullArray, sizes: &mut [u32]) {
 }
 
 fn add_size_primitive(arr: &PrimitiveArray, sizes: &mut [u32]) {
-    let width = arr.ptype().byte_width() as u32;
+    let width = byte_width_u32(arr.ptype().byte_width());
     add_size_const(sizes, encoded_size_for_fixed(width));
 }
 
 fn add_size_decimal(arr: &DecimalArray, sizes: &mut [u32]) {
-    let width = arr.values_type().byte_width() as u32;
+    let width = byte_width_u32(arr.values_type().byte_width());
     add_size_const(sizes, encoded_size_for_fixed(width));
 }
 
@@ -261,7 +254,7 @@ fn add_size_varbinview(
 
 fn add_size_struct(
     arr: &StructArray,
-    field: SortField,
+    field: RowSortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
@@ -279,7 +272,7 @@ fn add_size_struct(
 
 fn add_size_fsl(
     arr: &FixedSizeListArray,
-    field: SortField,
+    field: RowSortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
@@ -305,7 +298,7 @@ fn add_size_fsl(
 
 fn add_size_extension(
     arr: &ExtensionArray,
-    field: SortField,
+    field: RowSortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
@@ -315,7 +308,7 @@ fn add_size_extension(
 
 fn encode_null(
     arr: &NullArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -330,7 +323,7 @@ fn encode_null(
 
 fn encode_bool(
     arr: &BoolArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -359,7 +352,7 @@ fn encode_bool(
 
 fn encode_primitive(
     arr: &PrimitiveArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -373,7 +366,7 @@ fn encode_primitive(
 
 fn encode_primitive_typed<T: NativePType + RowEncode>(
     arr: &PrimitiveArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -396,14 +389,14 @@ fn encode_primitive_typed<T: NativePType + RowEncode>(
                 *b = 0;
             }
         }
-        col_offset[i] += encoded_size_for_fixed(value_bytes as u32);
+        col_offset[i] += encoded_size_for_fixed(byte_width_u32(value_bytes));
     }
     Ok(())
 }
 
 fn encode_decimal(
     arr: &DecimalArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -436,7 +429,7 @@ fn encode_decimal(
 fn encode_decimal_typed<T>(
     arr: &DecimalArray,
     mask: &vortex_mask::Mask,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -446,7 +439,7 @@ fn encode_decimal_typed<T>(
     let non_null = field.non_null_sentinel();
     let null = field.null_sentinel();
     let value_bytes = size_of::<T>();
-    let total = encoded_size_for_fixed(value_bytes as u32);
+    let total = encoded_size_for_fixed(byte_width_u32(value_bytes));
     let slice = arr.buffer::<T>();
     for i in 0..slice.len() {
         let pos = (row_offsets[i] + col_offset[i]) as usize;
@@ -465,7 +458,7 @@ fn encode_decimal_typed<T>(
 
 fn encode_varbinview(
     arr: &VarBinViewArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -494,7 +487,7 @@ fn encode_varbinview(
 
 fn encode_struct(
     arr: &StructArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -545,7 +538,7 @@ fn encode_struct(
 
 fn encode_fsl(
     arr: &FixedSizeListArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -608,7 +601,7 @@ fn encode_fsl(
 
 fn encode_extension(
     arr: &ExtensionArray,
-    field: SortField,
+    field: RowSortField,
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
@@ -647,16 +640,17 @@ fn encode_varlen_value(bytes: &[u8], out: &mut [u8], descending: bool) -> u32 {
     for j in n..VARLEN_BLOCK_SIZE {
         out[written + j] = xor;
     }
-    out[written + VARLEN_BLOCK_SIZE] = (n as u8) ^ xor;
+    out[written + VARLEN_BLOCK_SIZE] =
+        u8::try_from(n).vortex_expect("final varlen block length must fit in u8") ^ xor;
     written += VARLEN_BLOCK_TOTAL;
-    written as u32
+    u32::try_from(written).vortex_expect("encoded varlen byte length must fit in u32")
 }
 
 /// Internal trait for encoding a fixed-width native value into byte slots.
 ///
 /// Implementations must produce a sequence of `size_of::<Self>()` bytes that is
 /// lexicographically byte-comparable according to the natural ordering of the type.
-pub trait RowEncode: Copy {
+pub(crate) trait RowEncode: Copy {
     /// Encode this value into `out`, inverting the bytes for descending order.
     fn encode_to(self, out: &mut [u8], descending: bool);
 }
@@ -758,238 +752,3 @@ impl RowEncode for f16 {
         out.copy_from_slice(&bytes);
     }
 }
-
-/// Encode a single scalar primitive value of a known PType into a buffer slot.
-pub fn encode_scalar_primitive(
-    ptype: PType,
-    value: vortex_array::scalar::PValue,
-    field: SortField,
-    is_null: bool,
-    out: &mut ByteBufferMut,
-) -> VortexResult<()> {
-    if is_null {
-        out.push(field.null_sentinel());
-        return Ok(());
-    }
-    out.push(field.non_null_sentinel());
-    let width = ptype.byte_width();
-    let mut tmp = [0u8; 16];
-    let buf = &mut tmp[..width];
-    match_each_native_ptype!(
-        ptype,
-        integral: |T| {
-            let v: T = T::try_from(value)?;
-            v.encode_to(buf, field.descending);
-        },
-        floating: |T| {
-            let v: T = T::try_from(value)?;
-            v.encode_to(buf, field.descending);
-        }
-    );
-    out.extend_from_slice(buf);
-    Ok(())
-}
-
-/// Encode a single varlen value into a buffer.
-pub fn encode_scalar_varlen(value: Option<&[u8]>, field: SortField, out: &mut ByteBufferMut) {
-    match value {
-        None => out.push(field.null_sentinel()),
-        Some(bytes) => {
-            out.push(field.non_null_sentinel());
-            let needed = if bytes.is_empty() {
-                1
-            } else {
-                bytes.len().div_ceil(VARLEN_BLOCK_SIZE) * VARLEN_BLOCK_TOTAL
-            };
-            let start = out.len();
-            for _ in 0..needed {
-                out.push(0);
-            }
-            let written = encode_varlen_value(bytes, &mut out[start..], field.descending);
-            debug_assert_eq!(written as usize, needed);
-        }
-    }
-}
-
-/// Encode a single boolean value.
-pub fn encode_scalar_bool(value: Option<bool>, field: SortField, out: &mut ByteBufferMut) {
-    match value {
-        None => {
-            out.push(field.null_sentinel());
-            out.push(0);
-        }
-        Some(b) => {
-            out.push(field.non_null_sentinel());
-            let raw = if b { 0x02u8 } else { 0x01u8 };
-            let xor = if field.descending { 0xFFu8 } else { 0 };
-            out.push(raw ^ xor);
-        }
-    }
-}
-
-/// Encode a single null-type value (only the sentinel).
-pub fn encode_scalar_null(field: SortField, is_null: bool, out: &mut ByteBufferMut) {
-    if is_null {
-        out.push(field.null_sentinel());
-    } else {
-        out.push(field.non_null_sentinel());
-    }
-}
-
-/// Returns the per-row encoded size for a scalar value (used for the Constant fast path).
-pub fn encoded_size_for_scalar(
-    scalar: &vortex_array::scalar::Scalar,
-    _field: SortField,
-) -> VortexResult<u32> {
-    if scalar.is_null() {
-        match scalar.dtype() {
-            DType::Null => Ok(1),
-            DType::Bool(_) => Ok(BOOL_ENCODED_SIZE),
-            DType::Primitive(ptype, _) => Ok(encoded_size_for_fixed(ptype.byte_width() as u32)),
-            DType::Decimal(dt, _) => {
-                let vt = DecimalType::smallest_decimal_value_type(dt);
-                Ok(encoded_size_for_fixed(vt.byte_width() as u32))
-            }
-            DType::Utf8(_) | DType::Binary(_) => Ok(1),
-            _ => vortex_bail!(
-                "unsupported scalar dtype for row encoding: {}",
-                scalar.dtype()
-            ),
-        }
-    } else {
-        match scalar.dtype() {
-            DType::Null => Ok(1),
-            DType::Bool(_) => Ok(BOOL_ENCODED_SIZE),
-            DType::Primitive(ptype, _) => Ok(encoded_size_for_fixed(ptype.byte_width() as u32)),
-            DType::Decimal(..) => {
-                let dec = scalar.as_decimal();
-                let vt = dec
-                    .decimal_value()
-                    .map(|v| v.decimal_type())
-                    .unwrap_or(DecimalType::I128);
-                Ok(encoded_size_for_fixed(vt.byte_width() as u32))
-            }
-            DType::Utf8(_) => {
-                let bs = scalar
-                    .as_utf8()
-                    .value()
-                    .map(|s| s.as_str().len())
-                    .unwrap_or(0);
-                Ok(encoded_size_for_varlen(bs))
-            }
-            DType::Binary(_) => {
-                let bs = scalar.as_binary().value().map(|b| b.len()).unwrap_or(0);
-                Ok(encoded_size_for_varlen(bs))
-            }
-            _ => vortex_bail!(
-                "unsupported scalar dtype for row encoding: {}",
-                scalar.dtype()
-            ),
-        }
-    }
-}
-
-/// Encode a single scalar value into a fresh `Bytes` buffer.
-pub fn encode_scalar(
-    scalar: &vortex_array::scalar::Scalar,
-    field: SortField,
-) -> VortexResult<bytes::Bytes> {
-    use vortex_array::scalar::PValue;
-    let size = encoded_size_for_scalar(scalar, field)? as usize;
-    let mut out = ByteBufferMut::with_capacity(size);
-    if scalar.is_null() {
-        match scalar.dtype() {
-            DType::Null => out.push(field.null_sentinel()),
-            DType::Bool(_) => {
-                out.push(field.null_sentinel());
-                out.push(0);
-            }
-            DType::Primitive(ptype, _) => {
-                out.push(field.null_sentinel());
-                let width = ptype.byte_width();
-                for _ in 0..width {
-                    out.push(0);
-                }
-            }
-            DType::Decimal(dt, _) => {
-                out.push(field.null_sentinel());
-                let vt = DecimalType::smallest_decimal_value_type(dt);
-                for _ in 0..vt.byte_width() {
-                    out.push(0);
-                }
-            }
-            DType::Utf8(_) | DType::Binary(_) => out.push(field.null_sentinel()),
-            _ => vortex_bail!(
-                "unsupported scalar dtype for row encoding: {}",
-                scalar.dtype()
-            ),
-        }
-    } else {
-        match scalar.dtype() {
-            DType::Null => out.push(field.non_null_sentinel()),
-            DType::Bool(_) => {
-                let v = scalar.as_bool().value().unwrap_or(false);
-                encode_scalar_bool(Some(v), field, &mut out);
-            }
-            DType::Primitive(ptype, _) => {
-                let v: PValue = scalar
-                    .as_primitive()
-                    .pvalue()
-                    .ok_or_else(|| vortex_error::vortex_err!("missing primitive value"))?;
-                encode_scalar_primitive(*ptype, v, field, false, &mut out)?;
-            }
-            DType::Decimal(..) => {
-                let dec = scalar.as_decimal();
-                out.push(field.non_null_sentinel());
-                let value = dec
-                    .decimal_value()
-                    .ok_or_else(|| vortex_error::vortex_err!("missing decimal value"))?;
-                match value {
-                    vortex_array::scalar::DecimalValue::I8(v) => {
-                        let mut tmp = [0u8; 1];
-                        v.encode_to(&mut tmp, field.descending);
-                        out.extend_from_slice(&tmp);
-                    }
-                    vortex_array::scalar::DecimalValue::I16(v) => {
-                        let mut tmp = [0u8; 2];
-                        v.encode_to(&mut tmp, field.descending);
-                        out.extend_from_slice(&tmp);
-                    }
-                    vortex_array::scalar::DecimalValue::I32(v) => {
-                        let mut tmp = [0u8; 4];
-                        v.encode_to(&mut tmp, field.descending);
-                        out.extend_from_slice(&tmp);
-                    }
-                    vortex_array::scalar::DecimalValue::I64(v) => {
-                        let mut tmp = [0u8; 8];
-                        v.encode_to(&mut tmp, field.descending);
-                        out.extend_from_slice(&tmp);
-                    }
-                    vortex_array::scalar::DecimalValue::I128(v) => {
-                        let mut tmp = [0u8; 16];
-                        v.encode_to(&mut tmp, field.descending);
-                        out.extend_from_slice(&tmp);
-                    }
-                    vortex_array::scalar::DecimalValue::I256(_) => {
-                        vortex_bail!("row encoding for Decimal256 is not yet implemented")
-                    }
-                }
-            }
-            DType::Utf8(_) => {
-                let v = scalar.as_utf8();
-                let bytes = v.value().map(|s| s.as_str().as_bytes()).unwrap_or(&[]);
-                encode_scalar_varlen(Some(bytes), field, &mut out);
-            }
-            DType::Binary(_) => {
-                let v = scalar.as_binary();
-                let bytes = v.value().map(|b| b.as_slice()).unwrap_or(&[]);
-                encode_scalar_varlen(Some(bytes), field, &mut out);
-            }
-            _ => vortex_bail!(
-                "unsupported scalar dtype for row encoding: {}",
-                scalar.dtype()
-            ),
-        }
-    }
-    Ok(out.freeze().into_inner())
-}
diff --git a/vortex-row/src/convert.rs b/vortex-row/src/convert.rs
deleted file mode 100644
index c3b06d92748..00000000000
--- a/vortex-row/src/convert.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-//! User-facing entry point: turn N columnar arrays into one row-encoded `ListView<u8>`.
-
-use vortex_array::ArrayRef;
-use vortex_array::ExecutionCtx;
-use vortex_array::arrays::ListViewArray;
-use vortex_array::scalar_fn::ScalarFnVTable;
-use vortex_array::scalar_fn::VecExecutionArgs;
-use vortex_error::VortexResult;
-use vortex_error::vortex_bail;
-
-use crate::encode::RowEncode;
-use crate::options::RowEncodeOptions;
-use crate::options::SortField;
-use crate::size::RowSize;
-
-/// Convert N columnar arrays into a single row-oriented [`ListViewArray`] of `u8` whose
-/// bytes are lexicographically comparable in the same order as a tuple comparison of the
-/// input values according to `fields`.
-pub fn convert_columns(
-    cols: &[ArrayRef],
-    fields: &[SortField],
-    ctx: &mut ExecutionCtx,
-) -> VortexResult<ListViewArray> {
-    if cols.len() != fields.len() {
-        vortex_bail!(
-            "convert_columns: cols.len() ({}) does not match fields.len() ({})",
-            cols.len(),
-            fields.len()
-        );
-    }
-    if cols.is_empty() {
-        vortex_bail!("convert_columns: at least one column is required");
-    }
-    let nrows = cols[0].len();
-    for (i, col) in cols.iter().enumerate() {
-        if col.len() != nrows {
-            vortex_bail!(
-                "convert_columns: column {} has length {} but expected {}",
-                i,
-                col.len(),
-                nrows
-            );
-        }
-    }
-
-    let options = RowEncodeOptions::new(fields.iter().copied());
-    let args = VecExecutionArgs::new(cols.to_vec(), nrows);
-    let result = RowEncode.execute(&options, &args, ctx)?;
-    result.execute::<ListViewArray>(ctx)
-}
-
-/// Compute only the per-row sizes (in bytes) of the row-encoded form for N columns.
-pub fn compute_row_sizes(
-    cols: &[ArrayRef],
-    fields: &[SortField],
-    ctx: &mut ExecutionCtx,
-) -> VortexResult<ArrayRef> {
-    if cols.len() != fields.len() {
-        vortex_bail!(
-            "compute_row_sizes: cols.len() ({}) does not match fields.len() ({})",
-            cols.len(),
-            fields.len()
-        );
-    }
-    if cols.is_empty() {
-        vortex_bail!("compute_row_sizes: at least one column is required");
-    }
-    let nrows = cols[0].len();
-    let options = RowEncodeOptions::new(fields.iter().copied());
-    let args = VecExecutionArgs::new(cols.to_vec(), nrows);
-    RowSize.execute(&options, &args, ctx)
-}
diff --git a/vortex-row/src/encode.rs b/vortex-row/src/encode.rs
index 1b77d955964..4bc4962503e 100644
--- a/vortex-row/src/encode.rs
+++ b/vortex-row/src/encode.rs
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-#![allow(
-    clippy::cast_possible_truncation,
-    reason = "row encoding indexes into u32-sized buffers; lengths are validated to fit in u32"
-)]
-
 //! `RowEncode` variadic scalar function: encode N input columns into a single `ListView<u8>`.
 //!
 //! The output's `(elements, offsets, sizes)` triple is built up in a single left-to-right
@@ -16,11 +11,9 @@
 use std::sync::Arc;
 
 use vortex_array::ArrayRef;
-use vortex_array::ArrayView;
 use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
 use vortex_array::IntoArray;
-use vortex_array::VTable;
 use vortex_array::arrays::ListViewArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::dtype::DType;
@@ -40,27 +33,31 @@ use vortex_error::vortex_bail;
 use vortex_session::VortexSession;
 
 use crate::codec;
-use crate::options::RowEncodeOptions;
-use crate::options::SortField;
-use crate::options::deserialize_row_encode_options;
-use crate::options::serialize_row_encode_options;
+use crate::options::RowEncodingOptions;
+use crate::options::RowSortField;
+use crate::options::deserialize_row_encoding_options;
+use crate::options::serialize_row_encoding_options;
 use crate::size::compute_sizes;
 
 /// Variadic scalar function that encodes N input columns into a single `List<u8>`
 /// [`ListViewArray`] where row `i` contains the row-encoded bytes for column values
 /// `cols[0][i], cols[1][i], ...` concatenated left-to-right.
+///
+/// This scalar function is public for session registration and encoding extension work.
+/// Most callers should use [`RowEncoder`](crate::RowEncoder) rather than invoking the scalar
+/// function directly.
 #[derive(Clone, Debug)]
 pub struct RowEncode;
 
 impl ScalarFnVTable for RowEncode {
-    type Options = RowEncodeOptions;
+    type Options = RowEncodingOptions;
 
     fn id(&self) -> ScalarFnId {
         ScalarFnId::from("vortex.row_encode")
     }
 
     fn serialize(&self, options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
-        Ok(Some(serialize_row_encode_options(options)))
+        Ok(Some(serialize_row_encoding_options(options)))
     }
 
     fn deserialize(
@@ -68,7 +65,7 @@ impl ScalarFnVTable for RowEncode {
         metadata: &[u8],
         _session: &VortexSession,
     ) -> VortexResult<Self::Options> {
-        deserialize_row_encode_options(metadata)
+        deserialize_row_encoding_options(metadata)
     }
 
     fn arity(&self, _options: &Self::Options) -> Arity {
@@ -105,7 +102,7 @@ impl ScalarFnVTable for RowEncode {
 }
 
 fn execute_row_encode(
-    options: &RowEncodeOptions,
+    options: &RowEncodingOptions,
     args: &dyn ExecutionArgs,
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
@@ -115,10 +112,8 @@ fn execute_row_encode(
     let crate::size::SizePassResult {
         fixed_per_row,
         var_lengths,
-        col_kinds: _,
-        first_varlen_idx: _,
         columns,
-    } = compute_sizes(options, args, ctx, "RowEncode")?;
+    } = compute_sizes(options, args, ctx)?;
 
     // ===== Phase 2: totals + buffer =====
     let var_total: u64 = var_lengths
@@ -131,12 +126,11 @@ fn execute_row_encode(
     if total > u32::MAX as u64 {
         vortex_bail!("row-encoded output size {} bytes exceeds u32::MAX", total);
     }
-    let total_len = total as usize;
+    let total_len =
+        usize::try_from(total).vortex_expect("validated row-encoded output size must fit usize");
 
     // Allocate the elements buffer (zero-initialized). The zero-init lets every encoder
-    // assume previously-untouched bytes are zero, simplifying the null-row fill paths.
-    // PR 2 skips this memset because every byte in the output range is written by some
-    // encoder.
+    // assume previously untouched bytes are zero, simplifying the null-row fill paths.
     let mut out_buf: BufferMut<u8> = BufferMut::with_capacity(total_len);
     out_buf.push_n(0u8, total_len);
 
@@ -148,8 +142,10 @@ fn execute_row_encode(
     match var_lengths.as_ref() {
         None => {
             for i in 0..nrows {
+                let row_idx =
+                    u32::try_from(i).vortex_expect("row index must fit in u32 after validation");
                 listview_offsets.push(
-                    (i as u32)
+                    row_idx
                         .checked_mul(fixed_per_row)
                         .vortex_expect("row offset overflow (already validated total fits in u32)"),
                 );
@@ -158,7 +154,9 @@ fn execute_row_encode(
         Some(v) => {
             let mut acc: u32 = 0;
             for (i, &l) in v.iter().enumerate() {
-                let off = (i as u32)
+                let row_idx =
+                    u32::try_from(i).vortex_expect("row index must fit in u32 after validation");
+                let off = row_idx
                     .checked_mul(fixed_per_row)
                     .and_then(|t| t.checked_add(acc))
                     .vortex_expect("row offset overflow");
@@ -201,14 +199,13 @@ fn execute_row_encode(
     )
 }
 
-/// Dispatch a single column's encoding into the shared `out` buffer.
+/// Dispatch a single column's encoding into the shared `out` buffer through the canonical path.
 ///
-/// For PR 1 this is just the canonicalize-then-`codec::field_encode` fallback path.
-/// In-crate fast paths for `Constant`/`Dict`/`Patched` and the inventory-based registry
-/// for downstream encodings are added in PR 3.
-pub fn dispatch_encode(
+/// TODO(row): add per-encoding fast paths here so Constant, Dictionary, and compressed arrays
+/// can write row bytes without canonicalizing.
+pub(crate) fn dispatch_encode(
     col: &ArrayRef,
-    field: SortField,
+    field: RowSortField,
     offsets: &[u32],
     cursors: &mut [u32],
     out: &mut [u8],
@@ -217,22 +214,3 @@ pub fn dispatch_encode(
     let canonical = col.clone().execute::<Canonical>(ctx)?;
     codec::field_encode(&canonical, field, offsets, cursors, out, ctx)
 }
-
-/// Mutate-buffer kernel: write this column's per-row bytes into `out` at
-/// `offsets[i] + cursors[i]`, advancing `cursors[i]` by the bytes written.
-///
-/// Return `Ok(None)` to decline and fall back to the canonical path.
-///
-/// Trait is defined now; per-encoding impls and dispatch wiring land in PR 3.
-pub trait RowEncodeKernel: VTable {
-    /// Write this column's per-row bytes into `out` at `offsets[i] + cursors[i]`, advancing
-    /// `cursors[i]` by the bytes written.
-    fn row_encode_into(
-        column: ArrayView<'_, Self>,
-        field: SortField,
-        offsets: &[u32],
-        cursors: &mut [u32],
-        out: &mut [u8],
-        ctx: &mut ExecutionCtx,
-    ) -> VortexResult<Option<()>>;
-}
diff --git a/vortex-row/src/lib.rs b/vortex-row/src/lib.rs
index fddcca665c1..d921e2998e3 100644
--- a/vortex-row/src/lib.rs
+++ b/vortex-row/src/lib.rs
@@ -1,55 +1,59 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Row-oriented byte encoder, analogous to Apache Arrow's `arrow-row` crate.
+//! Row-oriented byte encoding for Vortex arrays.
 //!
-//! The encoder converts N columnar arrays into a single `List<u8>` array where each row's
-//! bytes are lexicographically comparable in the same order as a tuple comparison of the
-//! original values. This is useful for sorting, hashing into row containers, and other
-//! operations that benefit from a sort-friendly opaque byte representation of a multi-column
-//! key.
+//! This crate converts one or more columnar arrays into a single `ListView<u8>` array whose
+//! row byte slices can be compared lexicographically. The byte ordering matches tuple
+//! ordering of the input values under the requested [`RowSortField`] settings, making the
+//! representation useful for sort keys and other row-key operations.
 //!
-//! Two variadic scalar functions drive the implementation:
-//! - [`RowSize`] computes per-row byte sizes across all N input columns.
-//! - [`RowEncode`] writes the row-encoded bytes into a single `ListView<u8>` accumulator
-//!   in one left-to-right pass.
+//! The public entry points are:
+//! - [`RowEncoder`], the primary API for encoding columns into row bytes.
+//! - [`RowEncoder::row_sizes`], which computes the fixed and variable byte contributions
+//!   without materializing the encoded rows.
+//! - [`convert_columns`] and [`compute_row_sizes`], compatibility helpers around
+//!   [`RowEncoder`].
+//! - [`initialize`], which registers the [`RowSize`] and [`RowEncode`] scalar functions on a
+//!   [`VortexSession`].
 //!
-//! Each scalar function exposes a per-encoding fast-path trait
-//! ([`RowSizeKernel`] / [`RowEncodeKernel`]) for downstream encodings to plug into; PR 3
-//! adds in-crate impls for `Constant`, `Dict`, and `Patched` and an inventory-based
-//! registry for external encodings.
+//! Internally, encoding is split into two scalar functions. [`RowSize`] performs the sizing
+//! pass and classifies fixed-width versus variable-width input columns. [`RowEncode`] uses
+//! those sizes to allocate one contiguous elements buffer, then writes each column's bytes
+//! into the per-row slots from left to right.
 //!
-//! The user-facing entry point is [`convert_columns`].
-//!
-//! Row-encoding scalar functions are not registered in the default
-//! [`VortexSession`]. Call [`initialize`] on a session to make `RowSize` and `RowEncode`
-//! available via the expression layer.
+//! Supported logical types are nulls, booleans, primitive integers and floats, decimals up to
+//! 128 bits, UTF-8 and binary values, structs, fixed-size lists, and extensions whose storage
+//! type is supported. Variant, union, and variable-size list arrays are rejected because this
+//! crate does not define an ordering for them.
 
-pub mod codec;
-pub mod convert;
-pub mod encode;
-pub mod options;
-pub mod size;
+mod codec;
+mod encode;
+mod encoder;
+mod options;
+mod size;
 
 #[cfg(test)]
 mod tests;
 
-pub use convert::compute_row_sizes;
-pub use convert::convert_columns;
 pub use encode::RowEncode;
-pub use encode::RowEncodeKernel;
-pub use options::RowEncodeOptions;
-pub use options::SortField;
+pub use encoder::RowEncoder;
+pub use encoder::compute_row_sizes;
+pub use encoder::compute_row_sizes_with_options;
+pub use encoder::convert_columns;
+pub use encoder::convert_columns_with_options;
+pub use options::RowEncodingOptions;
+pub use options::RowSortField;
 pub use size::RowSize;
-pub use size::RowSizeKernel;
 use vortex_array::scalar_fn::session::ScalarFnSessionExt;
 use vortex_session::VortexSession;
 
 /// Register the row-encoding scalar functions ([`RowSize`] and [`RowEncode`]) on the given
 /// session.
 ///
-/// Call once on session construction if you want row encoding available via the expression
-/// layer or via [`convert_columns`].
+/// Call this during session construction when row encoding must be available through the
+/// expression layer. The direct [`RowEncoder`] API constructs the scalar-function calls
+/// itself and does not require global registration.
 pub fn initialize(session: &VortexSession) {
     session.scalar_fns().register(RowSize);
     session.scalar_fns().register(RowEncode);
diff --git a/vortex-row/src/options.rs b/vortex-row/src/options.rs
index a9e5e2b18ab..380c9a3827f 100644
--- a/vortex-row/src/options.rs
+++ b/vortex-row/src/options.rs
@@ -6,35 +6,26 @@ use std::fmt::Formatter;
 
 use smallvec::SmallVec;
 
-/// Per-column options for the row-oriented byte encoder.
+/// Per-column ordering options for row-oriented encoding.
 ///
-/// These options control how a single column is encoded into row bytes:
-/// - `descending`: if true, the encoded value bytes are bit-inverted so that
-///   lexicographic byte comparison reflects the reverse of the natural ordering.
-///   The null sentinel byte is NOT inverted, so nulls keep their requested
-///   position relative to non-nulls.
-/// - `nulls_first`: if true, nulls sort before non-nulls. If false, nulls sort
-///   after non-nulls. Implemented via the sentinel byte that precedes every
-///   value's encoded bytes.
+/// A `RowSortField` describes how one input column contributes to a row key. Descending order
+/// reverses the encoded value bytes for that column. Null placement is controlled separately,
+/// so nulls keep the requested position relative to non-null values in either direction.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub struct SortField {
-    /// If true, encoded value bytes are bit-inverted so lexicographic byte
-    /// comparison reflects the reverse of the natural ordering.
+pub struct RowSortField {
+    /// If true, this column sorts in descending order.
     pub descending: bool,
-    /// If true, nulls sort before non-null values; otherwise nulls sort after.
+    /// If true, nulls sort before non-null values.
     pub nulls_first: bool,
 }
 
-impl Default for SortField {
+impl Default for RowSortField {
     fn default() -> Self {
-        Self {
-            descending: false,
-            nulls_first: true,
-        }
+        Self::ascending()
     }
 }
 
-impl Display for SortField {
+impl Display for RowSortField {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
@@ -44,25 +35,47 @@ impl Display for SortField {
     }
 }
 
-impl SortField {
-    /// Construct a new `SortField` with explicit options.
-    pub fn new(descending: bool, nulls_first: bool) -> Self {
+impl RowSortField {
+    /// Construct a new `RowSortField` with explicit options.
+    pub const fn new(descending: bool, nulls_first: bool) -> Self {
         Self {
             descending,
             nulls_first,
         }
     }
 
+    /// Construct an ascending field with nulls first.
+    pub const fn ascending() -> Self {
+        Self::new(false, true)
+    }
+
+    /// Construct a descending field with nulls first.
+    pub const fn descending() -> Self {
+        Self::new(true, true)
+    }
+
+    /// Return this field with nulls ordered before non-null values.
+    pub const fn nulls_first(mut self) -> Self {
+        self.nulls_first = true;
+        self
+    }
+
+    /// Return this field with nulls ordered after non-null values.
+    pub const fn nulls_last(mut self) -> Self {
+        self.nulls_first = false;
+        self
+    }
+
     /// Returns the sentinel byte to write for a non-null value.
     #[inline]
-    pub fn non_null_sentinel(&self) -> u8 {
+    pub(crate) fn non_null_sentinel(&self) -> u8 {
         // Non-null is always 0x01. Null choices are < or > 0x01.
         0x01
     }
 
     /// Returns the sentinel byte to write for a null value.
     #[inline]
-    pub fn null_sentinel(&self) -> u8 {
+    pub(crate) fn null_sentinel(&self) -> u8 {
         if self.nulls_first {
             // Nulls before non-nulls (smaller byte sorts first).
             0x00
@@ -73,34 +86,55 @@ impl SortField {
     }
 }
 
-/// Inline capacity for [`RowEncodeOptions::fields`]. Up to this many [`SortField`]s
-/// are held inline without a heap allocation; beyond, the storage spills.
-pub const FIELDS_INLINE: usize = 4;
+const FIELDS_INLINE: usize = 4;
 
-/// Options for the variadic [`RowSize`] and [`RowEncode`] scalar functions:
-/// one [`SortField`] per input column.
-///
-/// Stored in a [`SmallVec`] so that typical 1–4 column keys avoid a heap
-/// allocation; longer field lists spill to the heap transparently.
+/// Ordering options for row-oriented encoding.
 ///
-/// [`RowSize`]: super::size::RowSize
-/// [`RowEncode`]: super::encode::RowEncode
+/// The options contain one [`RowSortField`] per input column, in the same order as the columns
+/// passed to [`convert_columns`](crate::convert_columns),
+/// [`compute_row_sizes`](crate::compute_row_sizes), [`RowSize`](crate::RowSize), or
+/// [`RowEncode`](crate::RowEncode).
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct RowEncodeOptions {
-    /// Per-column sort fields, in left-to-right column order.
-    pub fields: SmallVec<[SortField; FIELDS_INLINE]>,
+pub struct RowEncodingOptions {
+    pub(crate) fields: SmallVec<[RowSortField; FIELDS_INLINE]>,
 }
 
-impl RowEncodeOptions {
-    /// Construct a new `RowEncodeOptions` from any iterator of [`SortField`]s.
-    pub fn new(fields: impl IntoIterator<Item = SortField>) -> Self {
+impl RowEncodingOptions {
+    /// Construct a new `RowEncodingOptions` from any iterator of [`RowSortField`]s.
+    pub fn new(fields: impl IntoIterator<Item = RowSortField>) -> Self {
         Self {
             fields: fields.into_iter().collect(),
         }
     }
+
+    /// Construct default ascending, nulls-first options for `column_count` input columns.
+    pub fn default_for_columns(column_count: usize) -> Self {
+        Self::new(std::iter::repeat_n(RowSortField::default(), column_count))
+    }
+
+    /// Borrow the per-column sort fields.
+    pub fn fields(&self) -> &[RowSortField] {
+        &self.fields
+    }
+
+    /// Return the number of input columns described by these options.
+    pub fn len(&self) -> usize {
+        self.fields.len()
+    }
+
+    /// Return true when the options do not describe any input columns.
+    pub fn is_empty(&self) -> bool {
+        self.fields.is_empty()
+    }
+}
+
+impl FromIterator<RowSortField> for RowEncodingOptions {
+    fn from_iter<T: IntoIterator<Item = RowSortField>>(iter: T) -> Self {
+        Self::new(iter)
+    }
 }
 
-impl Display for RowEncodeOptions {
+impl Display for RowEncodingOptions {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(f, "[")?;
         for (i, field) in self.fields.iter().enumerate() {
@@ -113,12 +147,12 @@ impl Display for RowEncodeOptions {
     }
 }
 
-/// Serialize a [`RowEncodeOptions`] to a compact byte vector: 4-byte LE length followed by
+/// Serialize a [`RowEncodingOptions`] to a compact byte vector: 4-byte LE length followed by
 /// `2 * len` bytes (descending + nulls_first booleans for each field).
-pub(crate) fn serialize_row_encode_options(opts: &RowEncodeOptions) -> Vec<u8> {
+pub(crate) fn serialize_row_encoding_options(opts: &RowEncodingOptions) -> Vec<u8> {
     use vortex_error::VortexExpect;
     let n =
-        u32::try_from(opts.fields.len()).vortex_expect("RowEncodeOptions length must fit in u32");
+        u32::try_from(opts.fields.len()).vortex_expect("RowEncodingOptions length must fit in u32");
     let mut out = Vec::with_capacity(4 + 2 * opts.fields.len());
     out.extend_from_slice(&n.to_le_bytes());
     for f in &opts.fields {
@@ -128,30 +162,32 @@ pub(crate) fn serialize_row_encode_options(opts: &RowEncodeOptions) -> Vec<u8> {
     out
 }
 
-/// Deserialize a [`RowEncodeOptions`] produced by [`serialize_row_encode_options`].
-pub(crate) fn deserialize_row_encode_options(
+/// Deserialize a [`RowEncodingOptions`] produced by [`serialize_row_encoding_options`].
+pub(crate) fn deserialize_row_encoding_options(
     bytes: &[u8],
-) -> vortex_error::VortexResult<RowEncodeOptions> {
+) -> vortex_error::VortexResult<RowEncodingOptions> {
     if bytes.len() < 4 {
-        vortex_error::vortex_bail!("RowEncodeOptions metadata must contain a 4-byte length prefix");
+        vortex_error::vortex_bail!(
+            "RowEncodingOptions metadata must contain a 4-byte length prefix"
+        );
     }
     let n = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
     let expected = 4 + 2 * n;
     if bytes.len() != expected {
         vortex_error::vortex_bail!(
-            "RowEncodeOptions metadata wrong size: got {}, expected {}",
+            "RowEncodingOptions metadata wrong size: got {}, expected {}",
             bytes.len(),
             expected
         );
     }
-    let mut fields: SmallVec<[SortField; FIELDS_INLINE]> = SmallVec::with_capacity(n);
+    let mut fields: SmallVec<[RowSortField; FIELDS_INLINE]> = SmallVec::with_capacity(n);
     let mut i = 4;
     for _ in 0..n {
-        fields.push(SortField {
+        fields.push(RowSortField {
             descending: bytes[i] != 0,
             nulls_first: bytes[i + 1] != 0,
         });
         i += 2;
     }
-    Ok(RowEncodeOptions { fields })
+    Ok(RowEncodingOptions { fields })
 }
diff --git a/vortex-row/src/size.rs b/vortex-row/src/size.rs
index 7148a2a21d8..48d4f8e4dbc 100644
--- a/vortex-row/src/size.rs
+++ b/vortex-row/src/size.rs
@@ -6,11 +6,9 @@
 use std::sync::Arc;
 
 use vortex_array::ArrayRef;
-use vortex_array::ArrayView;
 use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
 use vortex_array::IntoArray;
-use vortex_array::VTable;
 use vortex_array::arrays::ConstantArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::StructArray;
@@ -35,53 +33,16 @@ use vortex_session::VortexSession;
 
 use crate::codec;
 use crate::codec::RowWidth;
-use crate::options::RowEncodeOptions;
-use crate::options::SortField;
-use crate::options::deserialize_row_encode_options;
-use crate::options::serialize_row_encode_options;
-
-/// Classification of a single input column for the size pass.
-///
-/// Tracks each column's within-row byte offset (the constant prefix from all preceding
-/// fixed-width columns) and, for fixed columns, whether any variable-length column has
-/// appeared yet — the encode pass uses this to choose between the arithmetic-write fast
-/// path (no varlen before this column, so the within-row position is constant) and the
-/// cursor-write path.
-#[derive(Clone, Copy, Debug)]
-#[allow(
-    dead_code,
-    reason = "fields read by the RowEncode pipeline in a later commit"
-)]
-pub(crate) enum ColKind {
-    /// Column has fixed width `width`. `prefix` is the within-row byte offset of this
-    /// column's first byte. If `before_varlen` is true, no variable-length column precedes
-    /// this one, so the within-row offset is constant for every row.
-    Fixed {
-        width: u32,
-        prefix: u32,
-        before_varlen: bool,
-    },
-    /// Column has variable per-row width. `fixed_prefix` is the sum of widths of all
-    /// preceding fixed columns; the varlen contribution from earlier varlen columns is
-    /// added per row.
-    Variable { fixed_prefix: u32 },
-}
+use crate::options::RowEncodingOptions;
+use crate::options::RowSortField;
+use crate::options::deserialize_row_encoding_options;
+use crate::options::serialize_row_encoding_options;
 
 /// Result of the size pass: enough information for both [`RowSize::execute`] and the
 /// downstream [`RowEncode`](super::encode::RowEncode) pipeline.
 pub(crate) struct SizePassResult {
     pub fixed_per_row: u32,
     pub var_lengths: Option<Vec<u32>>,
-    #[allow(
-        dead_code,
-        reason = "consumed by the arithmetic-write fast path added in PR 2"
-    )]
-    pub col_kinds: Vec<ColKind>,
-    #[allow(
-        dead_code,
-        reason = "consumed by the arithmetic-write fast path added in PR 2"
-    )]
-    pub first_varlen_idx: Option<usize>,
     pub columns: Vec<ArrayRef>,
 }
 
@@ -94,40 +55,34 @@ pub(crate) struct SizePassResult {
 ///
 /// This is shared by [`RowSize::execute`] (which wraps the result into a
 /// `Struct { fixed, var }`) and the [`RowEncode`](super::encode::RowEncode) pipeline
-/// (which uses the full result, including `col_kinds`, to drive the encode pass).
+/// (which reuses the canonicalized columns for the encode pass).
 pub(crate) fn compute_sizes(
-    options: &RowEncodeOptions,
+    options: &RowEncodingOptions,
     args: &dyn ExecutionArgs,
     ctx: &mut ExecutionCtx,
-    op_name: &'static str,
 ) -> VortexResult<SizePassResult> {
     let n_inputs = args.num_inputs();
     if n_inputs == 0 {
-        vortex_bail!("{} requires at least one input column", op_name);
+        vortex_bail!("at least one input column is required");
     }
-    if options.fields.len() != n_inputs {
+    if options.len() != n_inputs {
         vortex_bail!(
-            "{} options.fields.len()={} does not match num_inputs={}",
-            op_name,
-            options.fields.len(),
+            "options len ({}) does not match num_inputs ({})",
+            options.len(),
             n_inputs
         );
     }
     let nrows = args.row_count();
 
     let mut columns: Vec<ArrayRef> = Vec::with_capacity(n_inputs);
-    let mut col_kinds: Vec<ColKind> = Vec::with_capacity(n_inputs);
     let mut fixed_per_row: u32 = 0;
     let mut var_lengths: Option<Vec<u32>> = None;
-    let mut first_varlen_idx: Option<usize> = None;
-    let mut running_fixed_prefix: u32 = 0;
 
     for i in 0..n_inputs {
         let col = args.get(i)?;
         if col.len() != nrows {
             vortex_bail!(
-                "{}: column {} has length {} but expected {}",
-                op_name,
+                "column {} has length {} but expected {}",
                 i,
                 col.len(),
                 nrows
@@ -135,27 +90,13 @@ pub(crate) fn compute_sizes(
         }
         match codec::row_width_for_dtype(col.dtype())? {
             RowWidth::Fixed(w) => {
-                col_kinds.push(ColKind::Fixed {
-                    width: w,
-                    prefix: running_fixed_prefix,
-                    before_varlen: first_varlen_idx.is_none(),
-                });
                 fixed_per_row = fixed_per_row
                     .checked_add(w)
                     .vortex_expect("row width overflow");
-                running_fixed_prefix = running_fixed_prefix
-                    .checked_add(w)
-                    .vortex_expect("row width overflow");
             }
             RowWidth::Variable => {
-                if first_varlen_idx.is_none() {
-                    first_varlen_idx = Some(i);
-                }
                 let v = var_lengths.get_or_insert_with(|| vec![0u32; nrows]);
                 dispatch_size(&col, options.fields[i], v, ctx)?;
-                col_kinds.push(ColKind::Variable {
-                    fixed_prefix: running_fixed_prefix,
-                });
             }
         }
         columns.push(col);
@@ -164,13 +105,11 @@ pub(crate) fn compute_sizes(
     Ok(SizePassResult {
         fixed_per_row,
         var_lengths,
-        col_kinds,
-        first_varlen_idx,
         columns,
     })
 }
 
-/// Variadic scalar function that, given N input columns and per-column [`SortField`]s,
+/// Variadic scalar function that, given N input columns and per-column [`RowSortField`]s,
 /// returns a `Struct { fixed: U32, var: U32 }` array of per-row byte sizes for the
 /// row-oriented encoding produced by [`RowEncode`](super::encode::RowEncode).
 ///
@@ -180,6 +119,10 @@ pub(crate) fn compute_sizes(
 /// [`PrimitiveArray<u32>`] of per-row varlen-byte sums otherwise.
 ///
 /// The total per-row byte size is `fixed + var`.
+///
+/// This scalar function is public for session registration and encoding extension work.
+/// Most callers should use [`RowEncoder::row_sizes`](crate::RowEncoder::row_sizes) rather
+/// than invoking the scalar function directly.
 #[derive(Clone, Debug)]
 pub struct RowSize;
 
@@ -203,14 +146,14 @@ pub(crate) fn row_size_struct_dtype() -> DType {
 }
 
 impl ScalarFnVTable for RowSize {
-    type Options = RowEncodeOptions;
+    type Options = RowEncodingOptions;
 
     fn id(&self) -> ScalarFnId {
         ScalarFnId::from("vortex.row_size")
     }
 
     fn serialize(&self, options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
-        Ok(Some(serialize_row_encode_options(options)))
+        Ok(Some(serialize_row_encoding_options(options)))
     }
 
     fn deserialize(
@@ -218,7 +161,7 @@ impl ScalarFnVTable for RowSize {
         metadata: &[u8],
         _session: &VortexSession,
     ) -> VortexResult<Self::Options> {
-        deserialize_row_encode_options(metadata)
+        deserialize_row_encoding_options(metadata)
     }
 
     fn arity(&self, _options: &Self::Options) -> Arity {
@@ -240,7 +183,7 @@ impl ScalarFnVTable for RowSize {
         ctx: &mut ExecutionCtx,
     ) -> VortexResult<ArrayRef> {
         let nrows = args.row_count();
-        let result = compute_sizes(options, args, ctx, "RowSize")?;
+        let result = compute_sizes(options, args, ctx)?;
         let fixed_array =
             ConstantArray::new(Scalar::from(result.fixed_per_row), nrows).into_array();
         let var_array = match result.var_lengths {
@@ -266,31 +209,16 @@ impl ScalarFnVTable for RowSize {
     }
 }
 
-/// Dispatch a single column's per-row size contribution.
+/// Dispatch a single column's per-row size contribution through the canonical path.
 ///
-/// For PR 1 this is just the canonicalize-then-`codec::field_size` fallback path. In-crate
-/// fast paths for `Constant`/`Dict`/`Patched` and the inventory-based registry for
-/// downstream encodings are added in PR 3.
-pub fn dispatch_size(
+/// TODO(row): add per-encoding fast paths here so Constant, Dictionary, and compressed arrays
+/// can contribute row sizes without canonicalizing.
+pub(crate) fn dispatch_size(
     col: &ArrayRef,
-    field: SortField,
+    field: RowSortField,
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
     let canonical = col.clone().execute::<Canonical>(ctx)?;
     codec::field_size(&canonical, field, sizes, ctx)
 }
-
-/// Mutate-buffer kernel: add this column's per-row byte contribution into the shared
-/// `sizes` slice. Return `Ok(None)` to decline and fall back to the canonical path.
-///
-/// Trait is defined now; per-encoding impls and dispatch wiring land in PR 3.
-pub trait RowSizeKernel: VTable {
-    /// Add this column's per-row byte contribution into `sizes`.
-    fn row_size_contribution(
-        column: ArrayView<'_, Self>,
-        field: SortField,
-        sizes: &mut [u32],
-        ctx: &mut ExecutionCtx,
-    ) -> VortexResult<Option<()>>;
-}
diff --git a/vortex-row/src/tests.rs b/vortex-row/src/tests.rs
index ff7d8fb274a..a1eaadf3803 100644
--- a/vortex-row/src/tests.rs
+++ b/vortex-row/src/tests.rs
@@ -1,15 +1,10 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-#![allow(
-    clippy::approx_constant,
-    clippy::cloned_ref_to_slice_refs,
-    clippy::redundant_clone,
-    reason = "tests value clarity over micro-optimization"
-)]
-
 //! Tests for the row encoder.
 
+use std::f64::consts::PI;
+
 use rstest::rstest;
 use vortex_array::IntoArray;
 use vortex_array::LEGACY_SESSION;
@@ -21,8 +16,12 @@ use vortex_array::arrays::VarBinViewArray;
 use vortex_array::arrays::listview::ListViewArrayExt;
 use vortex_error::VortexResult;
 
-use crate::SortField;
+use crate::RowEncoder;
+use crate::RowEncodingOptions;
+use crate::RowSortField;
+use crate::compute_row_sizes_with_options;
 use crate::convert_columns;
+use crate::convert_columns_with_options;
 
 fn collect_row_bytes(array: &ListViewArray) -> Vec<Vec<u8>> {
     let mut ctx = LEGACY_SESSION.create_execution_ctx();
@@ -41,10 +40,7 @@ fn collect_row_bytes(array: &ListViewArray) -> Vec<Vec<u8>> {
 fn assert_sort_order_i64(values: Vec<i64>, descending: bool) -> VortexResult<()> {
     let mut ctx = LEGACY_SESSION.create_execution_ctx();
     let col = PrimitiveArray::from_iter(values.clone()).into_array();
-    let field = SortField {
-        descending,
-        nulls_first: true,
-    };
+    let field = RowSortField::new(descending, true);
     let encoded = convert_columns(&[col], &[field], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
@@ -57,7 +53,7 @@ fn assert_sort_order_i64(values: Vec<i64>, descending: bool) -> VortexResult<()>
     }
     let expected_order: Vec<Vec<u8>> = idx.iter().map(|&i| rows[i].clone()).collect();
 
-    let mut sorted = rows.clone();
+    let mut sorted = rows;
     sorted.sort();
     assert_eq!(
         sorted, expected_order,
@@ -79,7 +75,7 @@ fn primitive_u32_sort_order() -> VortexResult<()> {
     let mut ctx = LEGACY_SESSION.create_execution_ctx();
     let values: Vec<u32> = vec![0, 1, 100, u32::MAX, 42, 17];
     let col = PrimitiveArray::from_iter(values.clone()).into_array();
-    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
     let mut sorted_rows = rows.clone();
@@ -98,9 +94,9 @@ fn primitive_f64_sort_order() -> VortexResult<()> {
     // We use IEEE total-ordering semantics: -0.0 < +0.0 in the byte encoding (matches
     // `arrow-row`). Avoid -0.0 in the natural-order baseline since partial_cmp says
     // -0.0 == 0.0.
-    let values: Vec<f64> = vec![-1.5, 0.0, 1.5, f64::INFINITY, f64::NEG_INFINITY, 3.14];
+    let values: Vec<f64> = vec![-1.5, 0.0, 1.5, f64::INFINITY, f64::NEG_INFINITY, PI];
     let col = PrimitiveArray::from_iter(values.clone()).into_array();
-    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
     let mut sorted_rows = rows.clone();
@@ -117,7 +113,7 @@ fn primitive_f64_sort_order() -> VortexResult<()> {
 fn bool_sort_order() -> VortexResult<()> {
     let mut ctx = LEGACY_SESSION.create_execution_ctx();
     let col = BoolArray::from_iter([true, false, true, false]).into_array();
-    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
     let mut sorted = rows.clone();
@@ -142,7 +138,7 @@ fn utf8_sort_order() -> VortexResult<()> {
         "banana_loaf_for_test",
     ];
     let col = VarBinViewArray::from_iter_str(values.clone()).into_array();
-    let encoded = convert_columns(&[col], &[SortField::default()], &mut ctx)?;
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
     let mut sorted = rows.clone();
@@ -164,7 +160,7 @@ fn multi_column_sort() -> VortexResult<()> {
     let col1 = VarBinViewArray::from_iter_str(strs.clone()).into_array();
     let encoded = convert_columns(
         &[col0, col1],
-        &[SortField::default(), SortField::default()],
+        &[RowSortField::default(), RowSortField::default()],
         &mut ctx,
     )?;
     let rows = collect_row_bytes(&encoded);
@@ -186,15 +182,12 @@ fn nulls_first_and_last() -> VortexResult<()> {
 
     // nulls_first=true
     let encoded = convert_columns(
-        &[col.clone()],
-        &[SortField {
-            descending: false,
-            nulls_first: true,
-        }],
+        std::slice::from_ref(&col),
+        &[RowSortField::ascending()],
         &mut ctx,
     )?;
     let rows = collect_row_bytes(&encoded);
-    let mut sorted = rows.clone();
+    let mut sorted = rows;
     sorted.sort();
     // The first two sorted entries should be nulls
     let null_count = values.iter().filter(|v| v.is_none()).count();
@@ -203,16 +196,9 @@ fn nulls_first_and_last() -> VortexResult<()> {
         assert_eq!(sorted[i][0], 0x00);
     }
     // nulls_first=false
-    let encoded = convert_columns(
-        &[col],
-        &[SortField {
-            descending: false,
-            nulls_first: false,
-        }],
-        &mut ctx,
-    )?;
+    let encoded = convert_columns(&[col], &[RowSortField::ascending().nulls_last()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
-    let mut sorted = rows.clone();
+    let mut sorted = rows;
     sorted.sort();
     // The last two sorted entries should be nulls
     for i in 0..null_count {
@@ -222,6 +208,60 @@ fn nulls_first_and_last() -> VortexResult<()> {
     Ok(())
 }
 
+#[test]
+fn reusable_options_helpers() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let options = RowEncodingOptions::new([RowSortField::descending().nulls_last()]);
+    assert_eq!(options.len(), 1);
+    assert!(!options.is_empty());
+    assert_eq!(
+        options.fields(),
+        &[RowSortField {
+            descending: true,
+            nulls_first: false
+        }]
+    );
+
+    let col = PrimitiveArray::from_iter([1i32, 2, 3]).into_array();
+    let encoder = RowEncoder::with_options(options.clone());
+    assert_eq!(encoder.options(), Some(&options));
+
+    let encoded = encoder.encode(std::slice::from_ref(&col), &mut ctx)?;
+    assert_eq!(encoded.len(), 3);
+
+    let sizes = encoder.row_sizes(std::slice::from_ref(&col), &mut ctx)?;
+    assert_eq!(sizes.len(), 3);
+
+    let encoded = convert_columns_with_options(std::slice::from_ref(&col), &options, &mut ctx)?;
+    assert_eq!(encoded.len(), 3);
+
+    let sizes = compute_row_sizes_with_options(std::slice::from_ref(&col), &options, &mut ctx)?;
+    assert_eq!(sizes.len(), 3);
+    Ok(())
+}
+
+#[test]
+fn row_encoder_new_accepts_sort_fields() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let encoder = RowEncoder::new([RowSortField::ascending()]);
+    let col = PrimitiveArray::from_iter([1i32, 2, 3]).into_array();
+
+    let encoded = encoder.encode(std::slice::from_ref(&col), &mut ctx)?;
+    assert_eq!(encoded.len(), 3);
+    Ok(())
+}
+
+#[test]
+fn default_row_encoder_uses_default_fields() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let col0 = PrimitiveArray::from_iter([1i32, 2, 3]).into_array();
+    let col1 = PrimitiveArray::from_iter([4i32, 5, 6]).into_array();
+
+    let encoded = RowEncoder::default().encode(&[col0, col1], &mut ctx)?;
+    assert_eq!(encoded.len(), 3);
+    Ok(())
+}
+
 #[test]
 fn struct_sort_order() -> VortexResult<()> {
     use vortex_array::arrays::StructArray;
@@ -232,7 +272,7 @@ fn struct_sort_order() -> VortexResult<()> {
     let name_arr = VarBinViewArray::from_iter_str(names.clone()).into_array();
     let struct_arr = StructArray::from_fields(&[("id", id_arr), ("name", name_arr)])?.into_array();
 
-    let encoded = convert_columns(&[struct_arr], &[SortField::default()], &mut ctx)?;
+    let encoded = convert_columns(&[struct_arr], &[RowSortField::default()], &mut ctx)?;
     let rows = collect_row_bytes(&encoded);
 
     let mut sorted = rows.clone();
@@ -260,7 +300,7 @@ fn row_size_struct_shape() -> VortexResult<()> {
 
     let sizes = compute_row_sizes(
         &[col0, col1],
-        &[SortField::default(), SortField::default()],
+        &[RowSortField::default(), RowSortField::default()],
         &mut ctx,
     )?;
     // Shape must be Struct { fixed, var }
@@ -299,11 +339,11 @@ fn single_buffer_invariant() -> VortexResult<()> {
     let strings: Vec<String> = (0..nrows)
         .map(|i| format!("row_{}_with_padding", i))
         .collect();
-    let col0 = PrimitiveArray::from_iter(primitives.clone()).into_array();
+    let col0 = PrimitiveArray::from_iter(primitives).into_array();
     let col1 = VarBinViewArray::from_iter_str(strings.iter().map(String::as_str)).into_array();
     let encoded = convert_columns(
         &[col0, col1],
-        &[SortField::default(), SortField::default()],
+        &[RowSortField::default(), RowSortField::default()],
         &mut ctx,
     )?;
 

From 48f59cef9066fd7771e4b8f6c2f6b51fc6d1fb39 Mon Sep 17 00:00:00 2001
From: Joe Isaacs <joe.isaacs@live.co.uk>
Date: Fri, 22 May 2026 15:49:35 +0100
Subject: [PATCH 10/10] t

Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
---
 vortex-row/src/codec.rs  | 485 +++++++++++++++++++++++++++++----------
 vortex-row/src/encode.rs |  87 +++----
 vortex-row/src/size.rs   |  42 ++--
 vortex-row/src/tests.rs  | 215 ++++++++++++++++-
 4 files changed, 626 insertions(+), 203 deletions(-)

diff --git a/vortex-row/src/codec.rs b/vortex-row/src/codec.rs
index 33270b0ad43..2818db62aba 100644
--- a/vortex-row/src/codec.rs
+++ b/vortex-row/src/codec.rs
@@ -9,12 +9,20 @@
 //! descending-ness as configured by [`RowSortField`].
 //!
 //! Conventions:
-//! - Every value is preceded by a 1-byte sentinel that orders nulls relative to non-nulls.
-//! - For `descending`, only the **value** bytes are bit-inverted (XOR with 0xFF), not the
-//!   sentinel.
+//! - Every fixed-width value is preceded by a 1-byte sentinel that orders nulls relative to
+//!   non-nulls. For `descending`, only the **value** bytes are bit-inverted (XOR with 0xFF),
+//!   not the sentinel.
+//! - Variable-length (Utf8, Binary) values use **three** distinct leading sentinels — one each
+//!   for null, empty, and non-empty — so byte comparison at position 0 fully categorizes the
+//!   value and column-byte boundaries stay aligned across rows. See
+//!   [`varlen_null_sentinel`], [`varlen_empty_sentinel`], [`varlen_non_empty_sentinel`].
 //! - Fixed-width integers are big-endian, with the sign bit flipped for signed types.
 //! - Floats are bit-pattern big-endian with sign-aware mask: non-negative flips the top
 //!   bit; negative flips all bits.
+//! - Nullable structs and fixed-size lists encode null parent rows with a **canonical null
+//!   body** so two null parent rows produce byte-equal encodings: fixed-width children
+//!   contribute their fixed null encoding, and variable-width children collapse to a single
+//!   null sentinel byte.
 
 use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
@@ -50,17 +58,23 @@ pub(crate) const VARLEN_BLOCK_SIZE: usize = 32;
 pub(crate) const VARLEN_BLOCK_TOTAL: usize = VARLEN_BLOCK_SIZE + 1;
 const VARLEN_BLOCK_TOTAL_U32: u32 = 33;
 
-/// Returns the size in bytes of the encoded form of a variable-length value of the given length.
+/// Size in bytes of an encoded null varlen value (just the sentinel byte).
+pub(crate) const VARLEN_NULL_SIZE: u32 = 1;
+/// Size in bytes of an encoded empty varlen value (just the sentinel byte).
+pub(crate) const VARLEN_EMPTY_SIZE: u32 = 1;
+
+/// Returns the size in bytes of the encoded form of a non-empty variable-length value.
+///
+/// Includes the leading sentinel byte plus `ceil(len/32) * 33` block bytes (32 content + 1
+/// continuation/length byte). Callers must use [`VARLEN_NULL_SIZE`] for null values and
+/// [`VARLEN_EMPTY_SIZE`] for empty values. A `u32` always suffices because a `BinaryView`
+/// length is itself a `u32`, so `blocks <= ceil(u32::MAX / 32) < 2^27`.
 #[inline]
-fn encoded_size_for_varlen(len: usize) -> u32 {
-    // 1 sentinel + ceil(len/32)*33 content bytes (or 1 zero terminator if empty)
-    if len == 0 {
-        1 + 1
-    } else {
-        let blocks = u32::try_from(len.div_ceil(VARLEN_BLOCK_SIZE))
-            .vortex_expect("varlen block count must fit in u32");
-        1 + blocks * VARLEN_BLOCK_TOTAL_U32
-    }
+fn encoded_size_for_non_empty_varlen(len: usize) -> u32 {
+    debug_assert!(len > 0);
+    let blocks = u32::try_from(len.div_ceil(VARLEN_BLOCK_SIZE))
+        .vortex_expect("varlen block count must fit in u32");
+    1 + blocks * VARLEN_BLOCK_TOTAL_U32
 }
 
 /// Constant per-row size in bytes for fixed-width encodings (including 1-byte sentinel).
@@ -73,6 +87,43 @@ fn byte_width_u32(width: usize) -> u32 {
     u32::try_from(width).vortex_expect("native byte width must fit in u32")
 }
 
+/// Returns the sentinel byte for a null varlen value.
+///
+/// The choice is positional (0x00 when nulls sort first, 0xFF when nulls sort last) and
+/// independent of `descending`, matching the convention used by `arrow-row`.
+#[inline]
+fn varlen_null_sentinel(field: RowSortField) -> u8 {
+    if field.nulls_first { 0x00 } else { 0xFF }
+}
+
+/// Returns the sentinel byte for an empty varlen value.
+///
+/// Equal to `0x01` in ascending mode and `!0x01 = 0xFE` in descending mode.
+#[inline]
+fn varlen_empty_sentinel(field: RowSortField) -> u8 {
+    if field.descending { !0x01u8 } else { 0x01u8 }
+}
+
+/// Returns the sentinel byte for a non-empty varlen value.
+///
+/// Equal to `0x02` in ascending mode and `!0x02 = 0xFD` in descending mode.
+#[inline]
+fn varlen_non_empty_sentinel(field: RowSortField) -> u8 {
+    if field.descending { !0x02u8 } else { 0x02u8 }
+}
+
+/// Returns the single-byte null sentinel used when a child contributes its canonical null
+/// encoding inside a null parent struct/FSL row.
+///
+/// For varlen children that is the varlen null sentinel; for everything else (including
+/// nested struct/FSL when used as a variable-width child) it is the fixed-width null sentinel.
+fn child_canonical_null_byte(child_dtype: &DType, field: RowSortField) -> u8 {
+    match child_dtype {
+        DType::Utf8(_) | DType::Binary(_) => varlen_null_sentinel(field),
+        _ => field.null_sentinel(),
+    }
+}
+
 /// Per-row width classification for a column.
 ///
 /// `Fixed(w)` means every row encodes to exactly `w` bytes (sentinel + value), regardless
@@ -97,7 +148,8 @@ pub(crate) enum RowWidth {
 ///
 /// # Errors
 ///
-/// Returns an error for dtypes that the row encoder does not support.
+/// Returns an error for dtypes that the row encoder does not support. Width arithmetic that
+/// would overflow `u32` is also reported as an error rather than silently saturating.
 pub(crate) fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
     match dtype {
         DType::Null => Ok(RowWidth::Fixed(1)),
@@ -107,6 +159,9 @@ pub(crate) fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
         )))),
         DType::Decimal(dt, _) => {
             let vt = DecimalType::smallest_decimal_value_type(dt);
+            if matches!(vt, DecimalType::I256) {
+                vortex_bail!("row encoding for Decimal256 is not yet implemented");
+            }
             Ok(RowWidth::Fixed(encoded_size_for_fixed(byte_width_u32(
                 vt.byte_width(),
             ))))
@@ -116,8 +171,13 @@ pub(crate) fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
             // FSL is fixed iff its element type is fixed. Add a sentinel byte for the FSL
             // itself, then `n` copies of the element width.
             RowWidth::Fixed(w) => {
-                let body = w.saturating_mul(*n);
-                Ok(RowWidth::Fixed(body.saturating_add(1)))
+                let body = w
+                    .checked_mul(*n)
+                    .ok_or_else(|| vortex_error::vortex_err!("FSL row width overflows u32"))?;
+                let total = body
+                    .checked_add(1)
+                    .ok_or_else(|| vortex_error::vortex_err!("FSL row width overflows u32"))?;
+                Ok(RowWidth::Fixed(total))
             }
             RowWidth::Variable => Ok(RowWidth::Variable),
         },
@@ -126,13 +186,21 @@ pub(crate) fn row_width_for_dtype(dtype: &DType) -> VortexResult<RowWidth> {
             let mut total: u32 = 1; // outer sentinel
             for field_dtype in fields.fields() {
                 match row_width_for_dtype(&field_dtype)? {
-                    RowWidth::Fixed(w) => total = total.saturating_add(w),
+                    RowWidth::Fixed(w) => {
+                        total = total.checked_add(w).ok_or_else(|| {
+                            vortex_error::vortex_err!("Struct row width overflows u32")
+                        })?;
+                    }
                     RowWidth::Variable => return Ok(RowWidth::Variable),
                 }
             }
             Ok(RowWidth::Fixed(total))
         }
-        DType::List(..) => Ok(RowWidth::Variable),
+        DType::List(..) => {
+            vortex_bail!(
+                "row encoding does not support variable-size List arrays (no well-defined ordering)"
+            )
+        }
         DType::Extension(ext) => row_width_for_dtype(ext.storage_dtype()),
         DType::Variant(_) => {
             vortex_bail!("row encoding does not support Variant arrays (no well-defined ordering)")
@@ -241,13 +309,16 @@ fn add_size_varbinview(
     let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
     let views = arr.views();
     for (i, view) in views.iter().enumerate() {
-        let valid = mask.value(i);
-        if !valid {
-            sizes[i] += 1; // sentinel only
+        let contribution = if !mask.value(i) {
+            VARLEN_NULL_SIZE
+        } else if view.is_empty() {
+            VARLEN_EMPTY_SIZE
         } else {
-            let len = view.len() as usize;
-            sizes[i] += encoded_size_for_varlen(len);
-        }
+            encoded_size_for_non_empty_varlen(view.len() as usize)
+        };
+        sizes[i] = sizes[i]
+            .checked_add(contribution)
+            .vortex_expect("per-row size overflow");
     }
     Ok(())
 }
@@ -258,14 +329,31 @@ fn add_size_struct(
     sizes: &mut [u32],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
-    // null sentinel: 1 byte per row.
+    let n = arr.len();
+    let mask = arr.as_ref().validity()?.execute_mask(n, ctx)?;
+    // Outer sentinel: 1 byte per row.
     for s in sizes.iter_mut() {
-        *s += 1;
+        *s = s.checked_add(1).vortex_expect("per-row size overflow");
     }
-    // Each field adds its own per-row size.
+    // Each child contributes its per-row size when the parent is non-null, and a canonical
+    // null contribution when the parent is null. For fixed-width children both are equal,
+    // so we can simply add the fixed width to every row. For variable-width children the
+    // null contribution collapses to 1 byte, ensuring null parent rows have a constant body.
     for child in arr.iter_unmasked_fields() {
-        let canonical = child.clone().execute::<Canonical>(ctx)?;
-        field_size(&canonical, field, sizes, ctx)?;
+        match row_width_for_dtype(child.dtype())? {
+            RowWidth::Fixed(w) => add_size_const(sizes, w),
+            RowWidth::Variable => {
+                let canonical = child.clone().execute::<Canonical>(ctx)?;
+                let mut child_sizes = vec![0u32; n];
+                field_size(&canonical, field, &mut child_sizes, ctx)?;
+                for i in 0..n {
+                    let contribution = if mask.value(i) { child_sizes[i] } else { 1u32 };
+                    sizes[i] = sizes[i]
+                        .checked_add(contribution)
+                        .vortex_expect("per-row size overflow");
+                }
+            }
+        }
     }
     Ok(())
 }
@@ -279,19 +367,45 @@ fn add_size_fsl(
     let n = arr.len();
     debug_assert_eq!(n, sizes.len());
     let list_size = arr.list_size() as usize;
-    let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
-    debug_assert_eq!(elements.len(), n * list_size);
-    // Sizing: 1 sentinel + sum of element sizes (`list_size` per row).
-    // We compute element-wise sizes into a contiguous scratch buffer then reduce by row.
-    let mut elem_sizes = vec![0u32; n * list_size];
-    field_size(&elements, field, &mut elem_sizes, ctx)?;
-    for i in 0..n {
-        let mut sum: u32 = 1; // sentinel
-        let base = i * list_size;
-        for j in 0..list_size {
-            sum = sum.saturating_add(elem_sizes[base + j]);
+    let mask = arr.as_ref().validity()?.execute_mask(n, ctx)?;
+    let elem_dtype = arr.elements().dtype();
+    // Outer sentinel: 1 byte per row.
+    for s in sizes.iter_mut() {
+        *s = s.checked_add(1).vortex_expect("per-row size overflow");
+    }
+    match row_width_for_dtype(elem_dtype)? {
+        RowWidth::Fixed(w) => {
+            // Each row has `list_size` fixed-width elements regardless of null parent mask.
+            let body = w
+                .checked_mul(u32::try_from(list_size).vortex_expect("list_size fits u32"))
+                .vortex_expect("FSL body width overflow");
+            add_size_const(sizes, body);
+        }
+        RowWidth::Variable => {
+            let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
+            debug_assert_eq!(elements.len(), n * list_size);
+            let mut elem_sizes = vec![0u32; n * list_size];
+            field_size(&elements, field, &mut elem_sizes, ctx)?;
+            for i in 0..n {
+                let body: u32 = if mask.value(i) {
+                    let base = i * list_size;
+                    let mut sum: u32 = 0;
+                    for j in 0..list_size {
+                        sum = sum
+                            .checked_add(elem_sizes[base + j])
+                            .vortex_expect("FSL row body overflow");
+                    }
+                    sum
+                } else {
+                    // Canonical null body for FSL with variable element: one null sentinel
+                    // per element. (Each element contributes `child_null_width = 1`.)
+                    u32::try_from(list_size).vortex_expect("list_size fits u32")
+                };
+                sizes[i] = sizes[i]
+                    .checked_add(body)
+                    .vortex_expect("FSL per-row size overflow");
+            }
         }
-        sizes[i] += sum;
     }
     Ok(())
 }
@@ -462,24 +576,33 @@ fn encode_varbinview(
     row_offsets: &[u32],
     col_offset: &mut [u32],
     out: &mut [u8],
-    ctx: &mut ExecutionCtx,
+    _ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
-    let mask = arr.as_ref().validity()?.execute_mask(arr.len(), ctx)?;
-    let non_null = field.non_null_sentinel();
-    let null = field.null_sentinel();
+    let null_byte = varlen_null_sentinel(field);
+    let empty_byte = varlen_empty_sentinel(field);
+    let non_empty_byte = varlen_non_empty_sentinel(field);
 
+    // `with_iterator` yields `Some(bytes)` for non-null rows and `None` for null rows,
+    // so the iterator alone fully describes validity — no separate mask lookup needed.
     arr.with_iterator(|iter| {
         for (i, maybe) in iter.enumerate() {
             let pos = (row_offsets[i] + col_offset[i]) as usize;
-            if !mask.value(i) {
-                out[pos] = null;
-                col_offset[i] += 1;
-                continue;
+            match maybe {
+                None => {
+                    out[pos] = null_byte;
+                    col_offset[i] += VARLEN_NULL_SIZE;
+                }
+                Some([]) => {
+                    out[pos] = empty_byte;
+                    col_offset[i] += VARLEN_EMPTY_SIZE;
+                }
+                Some(bytes) => {
+                    out[pos] = non_empty_byte;
+                    let written =
+                        encode_non_empty_varlen_body(bytes, &mut out[pos + 1..], field.descending);
+                    col_offset[i] += 1 + written;
+                }
             }
-            let bytes: &[u8] = maybe.unwrap_or(&[]);
-            out[pos] = non_null;
-            let written = encode_varlen_value(bytes, &mut out[pos + 1..], field.descending);
-            col_offset[i] += 1 + written;
         }
     });
     Ok(())
@@ -498,37 +621,37 @@ fn encode_struct(
     let non_null = field.non_null_sentinel();
     let null = field.null_sentinel();
 
-    // First, write the sentinel for each row. We track the post-sentinel cursor offsets
-    // for the body in `body_cursors` (which start exactly at +1 of the input cursor).
-    // For null rows we additionally need to zero-fill the (uniform-width) field bytes,
-    // but because struct widths are variable in general, we record null indexes first
-    // and zero-fill after we know each row's contribution.
-    //
-    // To keep the implementation simple we:
-    //   1) advance the cursor past the sentinel,
-    //   2) recursively encode each field's bytes (the field encoders ignore nullness of
-    //      the struct, but use their own per-field nullness),
-    //   3) for null struct rows, overwrite the body bytes with zeros so the encoded form
-    //      depends only on the sentinel.
-    let body_start: Vec<u32> = (0..n).map(|i| col_offset[i] + 1).collect();
+    // Write the outer sentinel for each row.
     for i in 0..n {
         let pos = (row_offsets[i] + col_offset[i]) as usize;
         out[pos] = if mask.value(i) { non_null } else { null };
         col_offset[i] += 1;
     }
 
+    // Encode each child. For non-null parent rows the child contributes its actual encoding;
+    // for null parent rows the child contributes its canonical null encoding so that two null
+    // parent rows produce byte-equal output regardless of underlying child values.
     for child in arr.iter_unmasked_fields() {
-        let canonical = child.clone().execute::<Canonical>(ctx)?;
-        field_encode(&canonical, field, row_offsets, col_offset, out, ctx)?;
-    }
-
-    // Zero-fill body bytes of null rows (the field encoders may have written values).
-    for i in 0..n {
-        if !mask.value(i) {
-            let start = (row_offsets[i] + body_start[i]) as usize;
-            let end = (row_offsets[i] + col_offset[i]) as usize;
-            for b in &mut out[start..end] {
-                *b = 0;
+        match row_width_for_dtype(child.dtype())? {
+            RowWidth::Fixed(w) => {
+                let canonical = child.clone().execute::<Canonical>(ctx)?;
+                field_encode(&canonical, field, row_offsets, col_offset, out, ctx)?;
+                // Replace null parent rows with the canonical null encoding (the same as a
+                // child-level null: null sentinel followed by zero-padded value bytes).
+                let null_byte = child_canonical_null_byte(child.dtype(), field);
+                for i in 0..n {
+                    if !mask.value(i) {
+                        let end = (row_offsets[i] + col_offset[i]) as usize;
+                        let start = end - w as usize;
+                        out[start] = null_byte;
+                        for b in &mut out[start + 1..end] {
+                            *b = 0;
+                        }
+                    }
+                }
+            }
+            RowWidth::Variable => {
+                encode_variable_child(child, field, &mask, row_offsets, col_offset, out, ctx)?;
             }
         }
     }
@@ -544,58 +667,181 @@ fn encode_fsl(
     out: &mut [u8],
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<()> {
-    let n = arr.len();
+    let nrows = arr.len();
     let list_size = arr.list_size() as usize;
-    let mask = arr.as_ref().validity()?.execute_mask(n, ctx)?;
+    let mask = arr.as_ref().validity()?.execute_mask(nrows, ctx)?;
     let non_null = field.non_null_sentinel();
     let null = field.null_sentinel();
-    let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
-    debug_assert_eq!(elements.len(), n * list_size);
+    let elem_dtype = arr.elements().dtype().clone();
 
-    // Write sentinels and remember body start for null zero-fill.
-    let body_start: Vec<u32> = (0..n).map(|i| col_offset[i] + 1).collect();
-    for i in 0..n {
+    // Outer sentinel.
+    for i in 0..nrows {
         let pos = (row_offsets[i] + col_offset[i]) as usize;
         out[pos] = if mask.value(i) { non_null } else { null };
         col_offset[i] += 1;
     }
 
-    // Encode all `n * list_size` elements into the body. Build a fresh
-    // (offsets, cursors) pair where each element gets one slot. Then sum bytes back
-    // into the parent col_offset.
-    let mut elem_sizes = vec![0u32; n * list_size];
-    field_size(&elements, field, &mut elem_sizes, ctx)?;
-    // Element offsets are sequential starting at each parent's current cursor position.
-    let mut elem_offsets = vec![0u32; n * list_size];
-    for i in 0..n {
-        let mut acc = row_offsets[i] + col_offset[i];
-        for j in 0..list_size {
-            elem_offsets[i * list_size + j] = acc;
-            acc = acc.saturating_add(elem_sizes[i * list_size + j]);
+    match row_width_for_dtype(&elem_dtype)? {
+        RowWidth::Fixed(w) => {
+            // Fixed-width elements: encode the elements array directly (its length is
+            // nrows * list_size) using a derived (offsets, cursors) pair. Then overwrite
+            // the body of null parent rows with the canonical null encoding per element.
+            let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
+            debug_assert_eq!(elements.len(), nrows * list_size);
+            let list_size_u32 = arr.list_size();
+            let row_body_bytes = w
+                .checked_mul(list_size_u32)
+                .vortex_expect("FSL body width overflow");
+            let mut elem_offsets = vec![0u32; nrows * list_size];
+            for i in 0..nrows {
+                let base = row_offsets[i] + col_offset[i];
+                for j in 0u32..list_size_u32 {
+                    elem_offsets[i * list_size + j as usize] = base + j * w;
+                }
+            }
+            let mut elem_cursors = vec![0u32; nrows * list_size];
+            field_encode(&elements, field, &elem_offsets, &mut elem_cursors, out, ctx)?;
+            for i in 0..nrows {
+                col_offset[i] = col_offset[i]
+                    .checked_add(row_body_bytes)
+                    .vortex_expect("FSL row body overflow");
+            }
+            // Canonical null body for null parent rows: one null encoding per element.
+            let null_byte = child_canonical_null_byte(&elem_dtype, field);
+            let elem_width = w as usize;
+            for i in 0..nrows {
+                if !mask.value(i) {
+                    let end = (row_offsets[i] + col_offset[i]) as usize;
+                    let start = end - row_body_bytes as usize;
+                    let mut pos = start;
+                    for _ in 0..list_size {
+                        out[pos] = null_byte;
+                        for b in &mut out[pos + 1..pos + elem_width] {
+                            *b = 0;
+                        }
+                        pos += elem_width;
+                    }
+                }
+            }
         }
-    }
-    let mut elem_cursors = vec![0u32; n * list_size];
-    field_encode(&elements, field, &elem_offsets, &mut elem_cursors, out, ctx)?;
-    // Advance the parent cursors by the total per-row element bytes.
-    for i in 0..n {
-        let mut sum: u32 = 0;
-        for j in 0..list_size {
-            sum = sum.saturating_add(elem_sizes[i * list_size + j]);
+        RowWidth::Variable => {
+            // Variable-width elements: for null parent rows the canonical body is exactly
+            // `list_size` null sentinel bytes (one per element). For non-null parent rows,
+            // encode each element via a scratch buffer and copy into out.
+            let elements = arr.elements().clone().execute::<Canonical>(ctx)?;
+            debug_assert_eq!(elements.len(), nrows * list_size);
+            let mut elem_sizes = vec![0u32; nrows * list_size];
+            field_size(&elements, field, &mut elem_sizes, ctx)?;
+            let total: u64 = elem_sizes.iter().map(|&s| u64::from(s)).sum();
+            let total_usize =
+                usize::try_from(total).vortex_expect("FSL scratch buffer size fits usize");
+            let mut scratch = vec![0u8; total_usize];
+            let mut scratch_offsets = Vec::with_capacity(nrows * list_size);
+            let mut acc: u32 = 0;
+            for &s in &elem_sizes {
+                scratch_offsets.push(acc);
+                acc = acc
+                    .checked_add(s)
+                    .vortex_expect("FSL scratch offset overflow");
+            }
+            let mut scratch_cursors = vec![0u32; nrows * list_size];
+            field_encode(
+                &elements,
+                field,
+                &scratch_offsets,
+                &mut scratch_cursors,
+                &mut scratch,
+                ctx,
+            )?;
+            let null_byte = child_canonical_null_byte(&elem_dtype, field);
+            for i in 0..nrows {
+                let dst = (row_offsets[i] + col_offset[i]) as usize;
+                if mask.value(i) {
+                    let mut body_bytes: u32 = 0;
+                    for j in 0..list_size {
+                        let k = i * list_size + j;
+                        let src = scratch_offsets[k] as usize;
+                        let sz = elem_sizes[k] as usize;
+                        out[dst + body_bytes as usize..dst + body_bytes as usize + sz]
+                            .copy_from_slice(&scratch[src..src + sz]);
+                        body_bytes = body_bytes
+                            .checked_add(elem_sizes[k])
+                            .vortex_expect("FSL body bytes overflow");
+                    }
+                    col_offset[i] = col_offset[i]
+                        .checked_add(body_bytes)
+                        .vortex_expect("FSL row offset overflow");
+                } else {
+                    for offset in 0..list_size {
+                        out[dst + offset] = null_byte;
+                    }
+                    col_offset[i] = col_offset[i]
+                        .checked_add(u32::try_from(list_size).vortex_expect("list_size fits u32"))
+                        .vortex_expect("FSL row offset overflow");
+                }
+            }
         }
-        col_offset[i] = col_offset[i].saturating_add(sum);
     }
 
-    // Zero-fill null bodies.
+    Ok(())
+}
+
+/// Encode one variable-width child of a struct: for non-null parent rows, copy the child's
+/// natural encoding from a scratch buffer; for null parent rows, write a single
+/// `child_canonical_null_byte`.
+fn encode_variable_child(
+    child: &vortex_array::ArrayRef,
+    field: RowSortField,
+    parent_mask: &vortex_mask::Mask,
+    row_offsets: &[u32],
+    col_offset: &mut [u32],
+    out: &mut [u8],
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<()> {
+    let n = child.len();
+    let canonical = child.clone().execute::<Canonical>(ctx)?;
+
+    // Size and encode the child into a sequential scratch buffer.
+    let mut child_sizes = vec![0u32; n];
+    field_size(&canonical, field, &mut child_sizes, ctx)?;
+    let total: u64 = child_sizes.iter().map(|&s| u64::from(s)).sum();
+    let total_usize = usize::try_from(total).vortex_expect("child scratch buffer size fits usize");
+    let mut scratch = vec![0u8; total_usize];
+    let mut scratch_offsets = Vec::with_capacity(n);
+    let mut acc: u32 = 0;
+    for &s in &child_sizes {
+        scratch_offsets.push(acc);
+        acc = acc
+            .checked_add(s)
+            .vortex_expect("child scratch offset overflow");
+    }
+    let mut scratch_cursors = vec![0u32; n];
+    field_encode(
+        &canonical,
+        field,
+        &scratch_offsets,
+        &mut scratch_cursors,
+        &mut scratch,
+        ctx,
+    )?;
+
+    let null_byte = child_canonical_null_byte(child.dtype(), field);
     for i in 0..n {
-        if !mask.value(i) {
-            let start = (row_offsets[i] + body_start[i]) as usize;
-            let end = (row_offsets[i] + col_offset[i]) as usize;
-            for b in &mut out[start..end] {
-                *b = 0;
-            }
+        let dst = (row_offsets[i] + col_offset[i]) as usize;
+        if parent_mask.value(i) {
+            let src = scratch_offsets[i] as usize;
+            let sz = child_sizes[i] as usize;
+            out[dst..dst + sz].copy_from_slice(&scratch[src..src + sz]);
+            col_offset[i] = col_offset[i]
+                .checked_add(child_sizes[i])
+                .vortex_expect("col_offset overflow");
+        } else {
+            out[dst] = null_byte;
+            col_offset[i] = col_offset[i]
+                .checked_add(1)
+                .vortex_expect("col_offset overflow");
         }
     }
-
     Ok(())
 }
 
@@ -611,15 +857,12 @@ fn encode_extension(
     field_encode(&storage, field, row_offsets, col_offset, out, ctx)
 }
 
-/// Encode a variable-length byte slice into `out` in 32-byte blocks with
-/// continuation markers. Returns the number of bytes written.
-fn encode_varlen_value(bytes: &[u8], out: &mut [u8], descending: bool) -> u32 {
+/// Encode a non-empty variable-length byte slice into `out` in 32-byte blocks with
+/// continuation/length markers. Returns the number of bytes written. Empty values are
+/// encoded by the caller as a single sentinel byte and never reach this function.
+fn encode_non_empty_varlen_body(bytes: &[u8], out: &mut [u8], descending: bool) -> u32 {
+    debug_assert!(!bytes.is_empty());
     let xor = if descending { 0xFFu8 } else { 0x00 };
-    if bytes.is_empty() {
-        // Single zero terminator.
-        out[0] = xor;
-        return 1;
-    }
     let mut written = 0usize;
     let mut remaining = bytes;
     while remaining.len() > VARLEN_BLOCK_SIZE {
diff --git a/vortex-row/src/encode.rs b/vortex-row/src/encode.rs
index 4bc4962503e..d3721e49a6e 100644
--- a/vortex-row/src/encode.rs
+++ b/vortex-row/src/encode.rs
@@ -11,7 +11,6 @@
 use std::sync::Arc;
 
 use vortex_array::ArrayRef;
-use vortex_array::Canonical;
 use vortex_array::ExecutionCtx;
 use vortex_array::IntoArray;
 use vortex_array::arrays::ListViewArray;
@@ -25,7 +24,6 @@ use vortex_array::scalar_fn::ExecutionArgs;
 use vortex_array::scalar_fn::ScalarFnId;
 use vortex_array::scalar_fn::ScalarFnVTable;
 use vortex_array::validity::Validity;
-use vortex_buffer::Buffer;
 use vortex_buffer::BufferMut;
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
@@ -34,7 +32,6 @@ use vortex_session::VortexSession;
 
 use crate::codec;
 use crate::options::RowEncodingOptions;
-use crate::options::RowSortField;
 use crate::options::deserialize_row_encoding_options;
 use crate::options::serialize_row_encoding_options;
 use crate::size::compute_sizes;
@@ -107,6 +104,9 @@ fn execute_row_encode(
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
     let nrows = args.row_count();
+    if u32::try_from(nrows).is_err() {
+        vortex_bail!("row-encoded input has {} rows, exceeds u32::MAX", nrows);
+    }
 
     // ===== Phase 1: classify + size pass =====
     let crate::size::SizePassResult {
@@ -122,7 +122,9 @@ fn execute_row_encode(
     let total: u64 = (nrows as u64)
         .checked_mul(u64::from(fixed_per_row))
         .and_then(|t| t.checked_add(var_total))
-        .vortex_expect("row-encoded total bytes overflow");
+        .ok_or_else(|| {
+            vortex_error::vortex_err!("row-encoded total bytes overflow u64 (nrows * fixed + var)")
+        })?;
     if total > u32::MAX as u64 {
         vortex_bail!("row-encoded output size {} bytes exceeds u32::MAX", total);
     }
@@ -138,44 +140,42 @@ fn execute_row_encode(
     // listview_offsets[i] is the absolute byte offset where row `i` begins.
     // For pure-fixed: i * fixed_per_row.
     // For mixed: i * fixed_per_row + exclusive prefix sum of var_lengths.
-    let mut listview_offsets: Vec<u32> = Vec::with_capacity(nrows);
+    // Build directly into a BufferMut to avoid a Vec→Buffer copy at the end.
+    let nrows_u32 =
+        u32::try_from(nrows).vortex_expect("nrows fits u32 (validated earlier in this function)");
+    let mut listview_offsets: BufferMut<u32> = BufferMut::with_capacity(nrows);
     match var_lengths.as_ref() {
         None => {
-            for i in 0..nrows {
-                let row_idx =
-                    u32::try_from(i).vortex_expect("row index must fit in u32 after validation");
-                listview_offsets.push(
-                    row_idx
-                        .checked_mul(fixed_per_row)
-                        .vortex_expect("row offset overflow (already validated total fits in u32)"),
-                );
+            for row_idx in 0..nrows_u32 {
+                // Total bytes already fit in u32, so row_idx * fixed_per_row also does.
+                listview_offsets.push(row_idx * fixed_per_row);
             }
         }
         Some(v) => {
             let mut acc: u32 = 0;
-            for (i, &l) in v.iter().enumerate() {
-                let row_idx =
-                    u32::try_from(i).vortex_expect("row index must fit in u32 after validation");
-                let off = row_idx
-                    .checked_mul(fixed_per_row)
-                    .and_then(|t| t.checked_add(acc))
-                    .vortex_expect("row offset overflow");
-                listview_offsets.push(off);
-                acc = acc.checked_add(l).vortex_expect("varlen prefix overflow");
+            for (row_idx, &l) in (0..nrows_u32).zip(v.iter()) {
+                // The arithmetic below cannot overflow because we already verified the
+                // total fits in u32.
+                listview_offsets.push(row_idx * fixed_per_row + acc);
+                acc += l;
             }
         }
     }
+    let listview_offsets_slice: &[u32] = listview_offsets.as_slice();
 
-    // Per-row write cursor (also doubles as the ListView `sizes` slot when done).
-    let mut row_cursors = vec![0u32; nrows];
+    // Per-row write cursor (also doubles as the ListView `sizes` slot when done). We build
+    // it as a BufferMut so we can hand it directly to the output PrimitiveArray.
+    let mut row_cursors: BufferMut<u32> = BufferMut::with_capacity(nrows);
+    row_cursors.push_n(0u32, nrows);
 
     // ===== Phase 4: encode columns via the cursor path =====
-    for (i, col) in columns.iter().enumerate() {
-        dispatch_encode(
-            col,
+    // Each column was canonicalized once during the size pass; reuse that canonical form.
+    for (i, canonical) in columns.iter().enumerate() {
+        codec::field_encode(
+            canonical,
             options.fields[i],
-            &listview_offsets,
-            &mut row_cursors,
+            listview_offsets_slice,
+            row_cursors.as_mut_slice(),
             &mut out_buf,
             ctx,
         )?;
@@ -183,34 +183,11 @@ fn execute_row_encode(
 
     // ===== Phase 5: build ListView output =====
     let elements = PrimitiveArray::new(out_buf.freeze(), Validity::NonNullable).into_array();
-    let offsets_arr = PrimitiveArray::new(
-        Buffer::<u32>::copy_from(&listview_offsets),
-        Validity::NonNullable,
-    )
-    .into_array();
-    let sizes_arr = PrimitiveArray::new(
-        Buffer::<u32>::copy_from(&row_cursors),
-        Validity::NonNullable,
-    )
-    .into_array();
+    let offsets_arr =
+        PrimitiveArray::new(listview_offsets.freeze(), Validity::NonNullable).into_array();
+    let sizes_arr = PrimitiveArray::new(row_cursors.freeze(), Validity::NonNullable).into_array();
     Ok(
         ListViewArray::try_new(elements, offsets_arr, sizes_arr, Validity::NonNullable)?
             .into_array(),
     )
 }
-
-/// Dispatch a single column's encoding into the shared `out` buffer through the canonical path.
-///
-/// TODO(row): add per-encoding fast paths here so Constant, Dictionary, and compressed arrays
-/// can write row bytes without canonicalizing.
-pub(crate) fn dispatch_encode(
-    col: &ArrayRef,
-    field: RowSortField,
-    offsets: &[u32],
-    cursors: &mut [u32],
-    out: &mut [u8],
-    ctx: &mut ExecutionCtx,
-) -> VortexResult<()> {
-    let canonical = col.clone().execute::<Canonical>(ctx)?;
-    codec::field_encode(&canonical, field, offsets, cursors, out, ctx)
-}
diff --git a/vortex-row/src/size.rs b/vortex-row/src/size.rs
index 48d4f8e4dbc..26269081ce7 100644
--- a/vortex-row/src/size.rs
+++ b/vortex-row/src/size.rs
@@ -26,7 +26,6 @@ use vortex_array::scalar_fn::ScalarFnId;
 use vortex_array::scalar_fn::ScalarFnVTable;
 use vortex_array::validity::Validity;
 use vortex_buffer::Buffer;
-use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
 use vortex_session::VortexSession;
@@ -34,16 +33,19 @@ use vortex_session::VortexSession;
 use crate::codec;
 use crate::codec::RowWidth;
 use crate::options::RowEncodingOptions;
-use crate::options::RowSortField;
 use crate::options::deserialize_row_encoding_options;
 use crate::options::serialize_row_encoding_options;
 
 /// Result of the size pass: enough information for both [`RowSize::execute`] and the
 /// downstream [`RowEncode`](super::encode::RowEncode) pipeline.
+///
+/// `columns` holds the canonicalized form of each input so the encode pass can write bytes
+/// without re-decoding — a single canonicalization per column is shared between size and
+/// encode.
 pub(crate) struct SizePassResult {
     pub fixed_per_row: u32,
     pub var_lengths: Option<Vec<u32>>,
-    pub columns: Vec<ArrayRef>,
+    pub columns: Vec<Canonical>,
 }
 
 /// Walk N input columns once, classifying each as fixed-width or variable-length and
@@ -74,7 +76,7 @@ pub(crate) fn compute_sizes(
     }
     let nrows = args.row_count();
 
-    let mut columns: Vec<ArrayRef> = Vec::with_capacity(n_inputs);
+    let mut columns: Vec<Canonical> = Vec::with_capacity(n_inputs);
     let mut fixed_per_row: u32 = 0;
     let mut var_lengths: Option<Vec<u32>> = None;
 
@@ -88,18 +90,21 @@ pub(crate) fn compute_sizes(
                 nrows
             );
         }
-        match codec::row_width_for_dtype(col.dtype())? {
+        let width = codec::row_width_for_dtype(col.dtype())?;
+        // Canonicalize once and reuse for both sizing (variable columns) and encoding.
+        let canonical = col.execute::<Canonical>(ctx)?;
+        match width {
             RowWidth::Fixed(w) => {
-                fixed_per_row = fixed_per_row
-                    .checked_add(w)
-                    .vortex_expect("row width overflow");
+                fixed_per_row = fixed_per_row.checked_add(w).ok_or_else(|| {
+                    vortex_error::vortex_err!("per-row fixed width overflows u32 at column {}", i)
+                })?;
             }
             RowWidth::Variable => {
                 let v = var_lengths.get_or_insert_with(|| vec![0u32; nrows]);
-                dispatch_size(&col, options.fields[i], v, ctx)?;
+                codec::field_size(&canonical, options.fields[i], v, ctx)?;
             }
         }
-        columns.push(col);
+        columns.push(canonical);
     }
 
     Ok(SizePassResult {
@@ -109,7 +114,8 @@ pub(crate) fn compute_sizes(
     })
 }
 
-/// Variadic scalar function that, given N input columns and per-column [`RowSortField`]s,
+/// Variadic scalar function that, given N input columns and per-column
+/// [`RowSortField`](crate::RowSortField)s,
 /// returns a `Struct { fixed: U32, var: U32 }` array of per-row byte sizes for the
 /// row-oriented encoding produced by [`RowEncode`](super::encode::RowEncode).
 ///
@@ -208,17 +214,3 @@ impl ScalarFnVTable for RowSize {
         false
     }
 }
-
-/// Dispatch a single column's per-row size contribution through the canonical path.
-///
-/// TODO(row): add per-encoding fast paths here so Constant, Dictionary, and compressed arrays
-/// can contribute row sizes without canonicalizing.
-pub(crate) fn dispatch_size(
-    col: &ArrayRef,
-    field: RowSortField,
-    sizes: &mut [u32],
-    ctx: &mut ExecutionCtx,
-) -> VortexResult<()> {
-    let canonical = col.clone().execute::<Canonical>(ctx)?;
-    codec::field_size(&canonical, field, sizes, ctx)
-}
diff --git a/vortex-row/src/tests.rs b/vortex-row/src/tests.rs
index a1eaadf3803..62e0e4cfb98 100644
--- a/vortex-row/src/tests.rs
+++ b/vortex-row/src/tests.rs
@@ -323,8 +323,9 @@ fn row_size_struct_shape() -> VortexResult<()> {
     let var_prim = var.clone().execute::<PrimitiveArray>(&mut ctx)?;
     let v: &[u32] = var_prim.as_slice();
     assert_eq!(v.len(), 5);
-    // empty string: sentinel(1) + 1 byte; non-empty: sentinel(1) + 33 bytes (single block).
-    let expected: Vec<u32> = vec![34, 34, 34, 2, 34];
+    // empty string: just the empty sentinel (1 byte); null or non-empty:
+    // sentinel(1) + 33 bytes (single block).
+    let expected: Vec<u32> = vec![34, 34, 34, 1, 34];
     assert_eq!(v, expected.as_slice());
     Ok(())
 }
@@ -362,3 +363,213 @@ fn single_buffer_invariant() -> VortexResult<()> {
     );
     Ok(())
 }
+
+/// Regression: with the previous 2-sentinel varlen scheme, an empty col1 followed by a
+/// non-empty col1 that happened to start with `\0` would corrupt multi-column lex order
+/// because col2's first byte aligned against col1's pad in the longer row. With the
+/// 3-sentinel scheme byte position 0 alone distinguishes empty from non-empty, so column
+/// boundaries always align.
+#[test]
+fn multi_column_varlen_empty_vs_nul_byte_string() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    // col1: empty vs single 0-byte. col2: same int for all rows.
+    let col1 = VarBinViewArray::from_iter_str(["", "\0", "a", "ab"]).into_array();
+    let col2 = PrimitiveArray::from_iter([1i32, 1, 1, 1]).into_array();
+    let encoded = convert_columns(
+        &[col1, col2],
+        &[RowSortField::default(), RowSortField::default()],
+        &mut ctx,
+    )?;
+    let rows = collect_row_bytes(&encoded);
+
+    // Logical natural order of col1: "" < "\0" < "a" < "ab".
+    // Byte sort of the encoded rows must put them in that same order.
+    let sorted_indices_by_bytes = {
+        let mut indices: Vec<usize> = (0..rows.len()).collect();
+        indices.sort_by(|a, b| rows[*a].cmp(&rows[*b]));
+        indices
+    };
+    assert_eq!(
+        sorted_indices_by_bytes,
+        vec![0, 1, 2, 3],
+        "byte sort must match natural col1 order; sorted indices were {:?}",
+        sorted_indices_by_bytes
+    );
+    Ok(())
+}
+
+/// Regression: null col1 must sort distinct from empty col1 even when col2 follows. With
+/// the 3-sentinel scheme null=0x00, empty=0x01 differ at byte 0.
+#[test]
+fn multi_column_varlen_null_vs_empty() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let col1 = VarBinViewArray::from_iter_nullable_str([
+        None::<&str>,
+        Some(""),
+        Some("a"),
+        None,
+        Some(""),
+    ])
+    .into_array();
+    let col2 = PrimitiveArray::from_iter([1i32, 1, 1, 1, 1]).into_array();
+    let encoded = convert_columns(
+        &[col1, col2],
+        &[RowSortField::ascending(), RowSortField::ascending()],
+        &mut ctx,
+    )?;
+    let rows = collect_row_bytes(&encoded);
+
+    // Nulls first, then empties, then non-empties — and all the col2 values are identical
+    // so col1 fully determines the order.
+    // Categorise each row by the leading byte of col1's encoding.
+    let mut buckets: [Vec<usize>; 3] = [Vec::new(), Vec::new(), Vec::new()];
+    for (i, row) in rows.iter().enumerate() {
+        let bucket = match row[0] {
+            0x00 => 0, // null
+            0x01 => 1, // empty
+            0x02 => 2, // non-empty
+            other => panic!("unexpected varlen sentinel: {:#x}", other),
+        };
+        buckets[bucket].push(i);
+    }
+    assert_eq!(buckets[0].len(), 2, "two null col1 rows");
+    assert_eq!(buckets[1].len(), 2, "two empty col1 rows");
+    assert_eq!(buckets[2].len(), 1, "one non-empty col1 row");
+
+    // All null rows must be byte-equal (same col2 value, both col1 null, single sentinel).
+    let null_rows: Vec<&Vec<u8>> = buckets[0].iter().map(|&i| &rows[i]).collect();
+    assert_eq!(
+        null_rows[0], null_rows[1],
+        "null col1 rows must be byte-equal"
+    );
+    // Same for empty.
+    let empty_rows: Vec<&Vec<u8>> = buckets[1].iter().map(|&i| &rows[i]).collect();
+    assert_eq!(
+        empty_rows[0], empty_rows[1],
+        "empty col1 rows must be byte-equal"
+    );
+
+    // Byte sort must group: nulls, empties, non-empties (because leading byte differs).
+    let mut sorted = rows.clone();
+    sorted.sort();
+    assert_eq!(sorted[0][0], 0x00);
+    assert_eq!(sorted[1][0], 0x00);
+    assert_eq!(sorted[2][0], 0x01);
+    assert_eq!(sorted[3][0], 0x01);
+    assert_eq!(sorted[4][0], 0x02);
+    Ok(())
+}
+
+/// Regression: descending varlen must put non-empty before empty (natural "" < "a" inverts
+/// to "a" < "" under descending). The 3-sentinel scheme uses `!empty < !non_empty` so
+/// non-empty's first byte is smaller than empty's first byte.
+#[test]
+fn varlen_descending_empty_vs_non_empty() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let col = VarBinViewArray::from_iter_str(["a", "", "abc"]).into_array();
+    let encoded = convert_columns(&[col], &[RowSortField::descending()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+
+    // Natural order: "" < "a" < "abc"; descending byte sort: "abc" first, "" last.
+    let mut sorted = rows.clone();
+    sorted.sort();
+    // sorted[0] = encoded("abc"), sorted[1] = encoded("a"), sorted[2] = encoded("")
+    assert_eq!(sorted[0], rows[2], "abc first in descending");
+    assert_eq!(sorted[1], rows[0], "a second");
+    assert_eq!(sorted[2], rows[1], "empty last");
+    Ok(())
+}
+
+/// Regression: two null parent struct rows whose underlying child values differ in length
+/// must still produce byte-equal encodings, because the parent emits a canonical null
+/// body (one null sentinel per variable child) regardless of the underlying values.
+#[test]
+fn null_struct_rows_with_varying_child_lengths_are_byte_equal() -> VortexResult<()> {
+    use vortex_array::arrays::StructArray;
+    use vortex_array::dtype::FieldName;
+    use vortex_array::dtype::FieldNames;
+    use vortex_array::validity::Validity;
+    use vortex_buffer::BitBuffer;
+
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    // Build a nullable struct{name: utf8} where rows 0 and 2 are null but the underlying
+    // child has different length data ("short" vs "much longer text data").
+    let names =
+        VarBinViewArray::from_iter_str(["short", "x", "much longer text data"]).into_array();
+    let field_names = FieldNames::from([FieldName::from("name")]);
+    let bits = BitBuffer::from_iter([false, true, false]);
+    let validity = Validity::from(bits);
+    let struct_arr = StructArray::try_new(field_names, vec![names], 3, validity)?.into_array();
+
+    let encoded = convert_columns(&[struct_arr], &[RowSortField::ascending()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+    assert_eq!(rows.len(), 3);
+    // Both null parent rows must produce identical bytes despite the divergent children.
+    assert_eq!(
+        rows[0], rows[2],
+        "two null parent struct rows must encode to byte-equal slices"
+    );
+    // And the non-null row's leading sentinel must differ from the null sentinel.
+    assert_ne!(rows[0][0], rows[1][0], "null vs non-null sentinel differs");
+    Ok(())
+}
+
+#[test]
+fn primitive_f32_sort_order() -> VortexResult<()> {
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let values: Vec<f32> = vec![-1.5, 0.0, 1.5, f32::INFINITY, f32::NEG_INFINITY];
+    let col = PrimitiveArray::from_iter(values.clone()).into_array();
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+    let mut sorted_rows = rows.clone();
+    sorted_rows.sort();
+    let mut sorted_idx: Vec<usize> = (0..values.len()).collect();
+    sorted_idx.sort_by(|a, b| values[*a].partial_cmp(&values[*b]).unwrap());
+    let expected: Vec<Vec<u8>> = sorted_idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted_rows, expected);
+    Ok(())
+}
+
+#[test]
+fn primitive_f16_sort_order() -> VortexResult<()> {
+    use vortex_array::dtype::half::f16;
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let values: Vec<f16> = vec![
+        f16::from_f32(-1.5),
+        f16::from_f32(0.0),
+        f16::from_f32(1.5),
+        f16::INFINITY,
+        f16::NEG_INFINITY,
+    ];
+    let col = PrimitiveArray::from_iter(values.clone()).into_array();
+    let encoded = convert_columns(&[col], &[RowSortField::default()], &mut ctx)?;
+    let rows = collect_row_bytes(&encoded);
+    let mut sorted_rows = rows.clone();
+    sorted_rows.sort();
+    let mut sorted_idx: Vec<usize> = (0..values.len()).collect();
+    sorted_idx.sort_by(|a, b| values[*a].partial_cmp(&values[*b]).unwrap());
+    let expected: Vec<Vec<u8>> = sorted_idx.iter().map(|&i| rows[i].clone()).collect();
+    assert_eq!(sorted_rows, expected);
+    Ok(())
+}
+
+#[test]
+fn reject_list_dtype_early() {
+    use vortex_array::ArrayRef;
+    use vortex_array::arrays::ListArray;
+    use vortex_array::validity::Validity;
+    use vortex_buffer::buffer;
+
+    let mut ctx = LEGACY_SESSION.create_execution_ctx();
+    let offsets = PrimitiveArray::new(buffer![0u32, 1, 2], Validity::NonNullable).into_array();
+    let elements = PrimitiveArray::from_iter([10i32, 20]).into_array();
+    let list: ArrayRef = ListArray::try_new(elements, offsets, Validity::NonNullable)
+        .unwrap()
+        .into_array();
+    let err = convert_columns(&[list], &[RowSortField::default()], &mut ctx)
+        .expect_err("List should not be accepted");
+    assert!(
+        err.to_string().contains("List"),
+        "expected error mentioning List, got: {err}"
+    );
+}