diff --git a/SPEC/CLOCKS.md b/SPEC/CLOCKS.md
new file mode 100644
index 0000000..b2eaa9e
--- /dev/null
+++ b/SPEC/CLOCKS.md
@@ -0,0 +1,66 @@
+# Clock contract
+
+quic-zig uses two clock sources internally. Most of the codebase reads
+`std.time.nanoTimestamp()` (REALTIME); the user-space pacer is the single
+exception — it runs on `CLOCK_MONOTONIC` via `clock.monoNanos()`.
+
+This split is intentional. Reading this page once should be enough to avoid
+introducing a cross-clock comparison bug on a future change.
+
+## Who uses what
+
+| Subsystem | Clock | Source | Why |
+|-----------|-------|--------|-----|
+| Loss detection (PTO, RTT) | REALTIME | `std.time.nanoTimestamp()` | Compares timestamps it produced itself; absolute drift is irrelevant. |
+| Idle timeout | REALTIME | `std.time.nanoTimestamp()` | Same — only the delta `now − last_activity` matters. |
+| Stateless reset / token expiry | REALTIME | `std.time.nanoTimestamp()` | Long-horizon validity windows; wall-clock alignment is fine. |
+| qlog timestamps | REALTIME | `std.time.nanoTimestamp()` | Wall-clock is what humans expect when reading traces. |
+| Datagram receive timestamps | REALTIME | `std.time.nanoTimestamp()` | Compared only to other REALTIME values within the same connection. |
+| **Pacer** (`Pacer.last_sent_time`, `timeUntilSend`, `onPacketSent`) | **MONOTONIC** | `clock.monoNanos()` | Budget replenishment math (`elapsed = now − last_sent_time`) breaks if a wall-clock jump (NTP slew, manual time change, DST) makes elapsed go negative or huge. |
+
+## The single boundary
+
+`Connection.nextTimeoutNs()` is the only function that crosses the boundary.
+It folds the pacer's next-send time into a deadline that the event loop
+compares against REALTIME-based deadlines (loss timer, idle timer, ack alarm).
+
+The conversion happens inline at `connection.zig:3793`:
+
+```zig
+const now_realtime: i64 = @intCast(std.time.nanoTimestamp());
+const now_mono: i64 = clock.monoNanos();
+const elapsed = now_mono - self.pacer.last_sent_time;     // duration on MONO
+// ... compute pacer_delay (a duration, clock-agnostic) ...
+const pacer_deadline = now_realtime + delay;              // anchor on REALTIME
+```
+
+We compute the *duration* on the monotonic clock (where the pacer's state
+lives) and add it to a REALTIME `now` so the resulting deadline is comparable
+to the other deadlines the event loop collects. The result is a REALTIME
+timestamp, never a MONOTONIC one — that boundary stays inside this function.
+
+## Rules for future changes
+
+1. **Adding a new pacer call site:** pass `now_mono` (or call `clock.monoNanos()` fresh). Never pass a `nanoTimestamp()` value.
+2. **Reading `pacer.last_sent_time` from outside the Pacer:** treat it as MONOTONIC. Subtract it from another MONOTONIC value to get a duration. Never compare to a REALTIME timestamp.
+3. **Adding a new clock-using subsystem:** default to REALTIME. Switch to MONOTONIC only if the subsystem hands timestamps to the kernel (e.g., a future `SCM_TXTIME` cmsg) or is genuinely sensitive to wall-clock jumps.
+4. **Mixing in a single deadline computation:** allowed only when computing a *duration* on one clock and anchoring the deadline on another (the `nextTimeoutNs` pattern above). Document why in a comment.
+
+## Why not migrate everything to MONOTONIC
+
+- Loss detection, PTO, and idle timeout are all *delta-based* — they don't care which clock as long as the timestamps in a single comparison agree. They've worked correctly on REALTIME since day one and changing them adds risk for no gain.
+- qlog readers and external tooling expect wall-clock timestamps.
+- Token-validity windows are conceptually wall-clock (a 1-day token means 24 wall-clock hours).
+- The single subsystem that genuinely needed monotonic semantics (the pacer) is now isolated.
+
+## Why the pacer specifically
+
+- `Pacer.replenish` computes `elapsed = now - last_sent_time` and turns it into bytes of budget. If the wall clock jumps backward by 10 seconds (NTP slew, DST end, manual time change), `elapsed` goes negative and the pacer either refuses to send or floods, depending on signedness handling.
+- A forward jump credits the pacer with phantom bandwidth, briefly defeating congestion control.
+- `MONOTONIC` immunizes both directions.
+
+## Files
+
+- `src/quic/clock.zig` — defines `monoNanos()` (Linux/macOS via `clock_gettime`, Windows fallback to `nanoTimestamp()`).
+- `src/quic/congestion.zig` — `Pacer` doc comment names the contract.
+- `src/quic/connection.zig` — three pacer call sites in `send()` use `now_mono`; `nextTimeoutNs` handles the boundary conversion.
diff --git a/SPEC/interop-results.md b/SPEC/interop-results.md
index 70f54a7..89753fa 100644
--- a/SPEC/interop-results.md
+++ b/SPEC/interop-results.md
@@ -1,9 +1,37 @@
 # Interop Test Results
 
-Date: 2026-03-24
-Zig version: 0.15.2, quic-go interop image `martenseemann/quic-go-interop:latest`, webtransport-go interop image `martenseemann/webtransport-go-interop:latest`
+Date: 2026-04-15 (supersedes 2026-03-24 baseline below)
+Zig version: 0.15.2, quic-go interop image `martenseemann/quic-go-interop:latest`, neqo interop image `ghcr.io/mozilla/neqo-qns:latest`, webtransport-go interop image `martenseemann/webtransport-go-interop:latest`
 Build: Docker interop image from `interop/runner/Dockerfile`, `zig build -Doptimize=ReleaseSafe`
 
+## 2026-04-15: UDP send-path optimizations (`sendmmsg` + pacer hardening)
+
+Inspired by Cloudflare's "Accelerating UDP packet transmission for QUIC" post,
+narrowed to the techniques that fit a real-time WebTransport workload (small
+datagrams, latency-sensitive). Larger throughput-oriented optimizations (UDP
+GSO, SO_TXTIME kernel pacing) were prototyped, validated, and reverted —
+see "Cloudflare optimizations: what we kept and why" in `SPEC/STATUS.md` if
+revisiting in the future.
+
+### Send-path toggles
+| Feature | Default | Env var | Notes |
+|---------|---------|---------|-------|
+| `sendmmsg` batching | on (Linux) | `QUIC_ZIG_NO_SENDMMSG=1` disables | one syscall per ECN-mark run |
+| User-space pacer | on | `QUIC_ZIG_NO_PACING=1` disables | bisection escape hatch |
+| Pacer clock | always `CLOCK_MONOTONIC` | n/a | NTP-skew resilience |
+
+### Matrix (sequential run, `handshake,transfer,chacha20,multiplexing,longrtt,http3,keyupdate`)
+
+|                           | quic-go (server/client) | neqo (server/client) |
+|---------------------------|-------------------------|----------------------|
+| quic-zig server ← peer client | **7/7 PASS**        | **7/7 PASS**         |
+| quic-zig client → peer server | **7/7 PASS**        | **6-7/7 PASS**       |
+
+Zero regressions against the 2026-03-24 baseline recorded below. The
+zig-client → neqo-server flake on `keyupdate`/`chacha20` predates this work.
+
+## 2026-03-24 baseline (pre-optimization)
+
 ## Functional Interop Matrix
 
 ### QUIC / HTTP/3 (`quic-go`)
diff --git a/interop/runner/run_endpoint.sh b/interop/runner/run_endpoint.sh
index 9e6da43..c1a726e 100755
--- a/interop/runner/run_endpoint.sh
+++ b/interop/runner/run_endpoint.sh
@@ -4,6 +4,10 @@ set -e
 # Setup routing for the simulated network
 source /setup.sh
 
+# Optimization toggles — both on by default; set to 1 to disable for bisection.
+export QUIC_ZIG_NO_SENDMMSG="${QUIC_ZIG_NO_SENDMMSG:-0}"
+export QUIC_ZIG_NO_PACING="${QUIC_ZIG_NO_PACING:-0}"
+
 # Determine if this is a WebTransport test case
 is_wt_test() {
     case "$TESTCASE" in
diff --git a/src/quic/clock.zig b/src/quic/clock.zig
new file mode 100644
index 0000000..bf2b800
--- /dev/null
+++ b/src/quic/clock.zig
@@ -0,0 +1,28 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+/// Read `CLOCK_MONOTONIC` in nanoseconds.
+///
+/// The Pacer uses this clock so its `last_sent_time` deltas are immune to
+/// wall-clock jumps (NTP slews, daylight-saving, manual clock changes). Loss
+/// detection, PTO, and idle-timeout code paths continue to use
+/// `std.time.nanoTimestamp()` (REALTIME) — those only compare timestamps to
+/// each other within short horizons where the gap matters but the absolute
+/// drift does not.
+pub fn monoNanos() i64 {
+    // On Windows there is no POSIX CLOCK_MONOTONIC; fall back to the default
+    // `nanoTimestamp()` so the pacer still works.
+    if (comptime builtin.os.tag == .windows) {
+        return @intCast(std.time.nanoTimestamp());
+    }
+    const ts = std.posix.clock_gettime(.MONOTONIC) catch {
+        return @intCast(std.time.nanoTimestamp());
+    };
+    return @as(i64, ts.sec) * std.time.ns_per_s + @as(i64, ts.nsec);
+}
+
+test "monoNanos is non-decreasing" {
+    const a = monoNanos();
+    const b = monoNanos();
+    try std.testing.expect(b >= a);
+}
diff --git a/src/quic/congestion.zig b/src/quic/congestion.zig
index 6f836ab..8f3bc6f 100644
--- a/src/quic/congestion.zig
+++ b/src/quic/congestion.zig
@@ -421,6 +421,12 @@ fn icbrt(x: u64) u64 {
 /// Pacer for spacing out packet sends to avoid bursts.
 ///
 /// Uses a token bucket algorithm similar to quic-go's pacer.
+///
+/// All timestamp arguments (`now` in `onPacketSent`, `timeUntilSend`, and
+/// `replenish`) MUST be on `CLOCK_MONOTONIC` — callers obtain them via
+/// `clock.monoNanos()`. The monotonic clock makes budget replenishment
+/// immune to wall-clock jumps (NTP slews, manual time changes). Mixing
+/// clock sources across calls would silently corrupt budget math.
 pub const Pacer = struct {
     /// Available budget in bytes.
     budget: u64,
@@ -428,7 +434,7 @@ pub const Pacer = struct {
     /// Max burst size in bytes.
     max_burst: u64,
 
-    /// Last time a packet was sent (nanoseconds).
+    /// Last time a packet was sent (CLOCK_MONOTONIC nanoseconds).
     last_sent_time: i64 = 0,
 
     /// Bandwidth in bytes per nanosecond, left-shifted by BANDWIDTH_SHIFT for precision.
diff --git a/src/quic/connection.zig b/src/quic/connection.zig
index 86f6b83..ff162c8 100644
--- a/src/quic/connection.zig
+++ b/src/quic/connection.zig
@@ -24,6 +24,24 @@ const stateless_reset = @import("stateless_reset.zig");
 const ecn = @import("ecn.zig");
 const qlog = @import("qlog.zig");
 const quic_lb = @import("quic_lb.zig");
+const clock = @import("clock.zig");
+
+/// Bisection kill switch for the user-space pacer.
+/// When `QUIC_ZIG_NO_PACING=1` (or any non-empty non-"0" value) is set in the
+/// environment, `conn.send()` and `nextTimeoutNs()` behave as if the pacer
+/// never blocks. `Pacer.onPacketSent` and `setBandwidth` continue to run so
+/// bisection can be toggled without polluting CC state.
+var pacing_disabled_cache: ?bool = null;
+
+fn isPacingDisabled() bool {
+    if (pacing_disabled_cache) |v| return v;
+    const v = blk: {
+        const raw = std.posix.getenv("QUIC_ZIG_NO_PACING") orelse break :blk false;
+        break :blk !(raw.len == 0 or std.mem.eql(u8, raw, "0"));
+    };
+    pacing_disabled_cache = v;
+    return v;
+}
 
 pub const State = enum(u8) {
     first_flight = 0,
@@ -2753,6 +2771,9 @@ pub const Connection = struct {
         if (self.state == .draining or self.state == .terminated) return 0;
 
         const now: i64 = @intCast(std.time.nanoTimestamp());
+        // Pacer runs on CLOCK_MONOTONIC for NTP-skew resilience; other
+        // subsystems stay on REALTIME (they only compare deltas).
+        const now_mono: i64 = clock.monoNanos();
 
         // Closing: retransmit saved close packet on each incoming packet (RFC 9000 §10.2.1)
         if (self.state == .closing) {
@@ -2818,11 +2839,13 @@ pub const Connection = struct {
             return try self.sendAckOnly(out_buf, now);
         }
 
-        // Check if pacer allows sending
-        // Exception: PTO probes bypass pacing (RFC 9002 §6.2.4)
-        // Note: ACK-only path above bypasses pacer per RFC 9002 §7.7
-        if (self.pto_probe_pending == 0) {
-            const pacer_delay = self.pacer.timeUntilSend(now);
+        // Pacer gate. Returning 0 here is how the event loop breaks out of
+        // its burst send loop; the next send time is then surfaced via
+        // `nextTimeoutNs()` so libxev wakes us when the pacer has budget again.
+        // Exceptions: PTO probes bypass pacing (RFC 9002 §6.2.4); the ACK-only
+        // path above bypasses it per RFC 9002 §7.7.
+        if (self.pto_probe_pending == 0 and !isPacingDisabled()) {
+            const pacer_delay = self.pacer.timeUntilSend(now_mono);
             if (pacer_delay > 0) {
                 return 0;
             }
@@ -2934,7 +2957,7 @@ pub const Connection = struct {
             self.pto_probe_pending -|= 1;
             self.paths[self.active_path_idx].bytes_sent += bytes_written;
             self.total_packets_sent += 1;
-            self.pacer.onPacketSent(bytes_written, now);
+            self.pacer.onPacketSent(bytes_written, now_mono);
             self.last_packet_sent_time = now;
 
             // If more PTO probes are pending, re-queue stream data + crypto data
@@ -3770,10 +3793,16 @@ pub const Connection = struct {
 
         // Pacer: if the pacer has bandwidth set (active transfer), include its
         // next-send time so the event loop wakes up promptly to send more data.
-        if (self.pacer.bandwidth_shifted > 0 and self.state == .connected) {
-            const now: i64 = @intCast(std.time.nanoTimestamp());
-            // Estimate pacer delay without mutating: budget is replenished by elapsed time
-            const elapsed = now - self.pacer.last_sent_time;
+        // Skipped when pacing is disabled via the env kill switch.
+        //
+        // The pacer stores `last_sent_time` on CLOCK_MONOTONIC; the deadline we
+        // return must be comparable to the REALTIME-based deadlines collected
+        // above, so compute the *delay* on the monotonic clock and add it to
+        // the REALTIME `now`.
+        if (self.pacer.bandwidth_shifted > 0 and self.state == .connected and !isPacingDisabled()) {
+            const now_realtime: i64 = @intCast(std.time.nanoTimestamp());
+            const now_mono: i64 = clock.monoNanos();
+            const elapsed = now_mono - self.pacer.last_sent_time;
             var budget = self.pacer.budget;
             if (self.pacer.last_sent_time > 0 and elapsed > 0) {
                 const replenished = (self.pacer.bandwidth_shifted *| @as(u64, @intCast(elapsed))) >> 20;
@@ -3782,7 +3811,7 @@ pub const Connection = struct {
             if (budget < self.pacer.max_datagram_size) {
                 const deficit = self.pacer.max_datagram_size - budget;
                 const delay: i64 = @intCast((deficit << 20) / self.pacer.bandwidth_shifted);
-                const pacer_deadline = now + delay;
+                const pacer_deadline = now_realtime + delay;
                 if (earliest == null or pacer_deadline < earliest.?) {
                     earliest = pacer_deadline;
                 }
diff --git a/src/quic/ecn_socket.zig b/src/quic/ecn_socket.zig
index 1a9d971..da103d8 100644
--- a/src/quic/ecn_socket.zig
+++ b/src/quic/ecn_socket.zig
@@ -4,6 +4,15 @@ const builtin = @import("builtin");
 
 const is_windows = builtin.os.tag == .windows;
 
+/// Linux sendmmsg batches multiple datagrams into one syscall.
+/// Compile-time gate; on other platforms the portable sendmsg loop is used.
+const use_sendmmsg = builtin.os.tag == .linux;
+const linux = std.os.linux;
+
+/// Runtime kill switch. Set QUIC_ZIG_NO_SENDMMSG=1 to force the sendmsg loop
+/// on Linux (useful for bisecting regressions without rebuilding).
+const sendmmsg_env_var = "QUIC_ZIG_NO_SENDMMSG";
+
 // Platform-specific constants for ECN socket options (IPv4).
 const IPPROTO_IP: u32 = 0;
 
@@ -200,13 +209,27 @@ pub fn mapV4ToV6(storage: *posix.sockaddr.storage) void {
 
 /// Batch sender that collects outgoing packets and flushes them together.
 /// Reduces syscall overhead by batching sendto calls and caching ECN marks.
+/// On Linux, flush uses sendmmsg to send many packets per syscall
+/// (grouped by ECN mark so the cached IP_TOS stays valid). On other platforms
+/// it falls back to a per-packet sendmsg loop.
 pub const SendBatch = struct {
     const MAX_BATCH: usize = 64;
 
+    /// Warn every N dropped packets so a stuck send path is visible without
+    /// flooding the log when ENOBUFS briefly spikes.
+    const DROP_WARN_INTERVAL: u64 = 1024;
+
     sockfd: posix.socket_t,
     count: usize = 0,
     current_ecn: u2 = 0,
 
+    /// Total packets the kernel refused to accept from this batcher.
+    /// UDP is lossy and QUIC loss detection recovers; we just surface a metric.
+    dropped_packets: u64 = 0,
+
+    /// Runtime kill switch — resolved once at init, so flush() never touches env.
+    use_mmsg: bool = false,
+
     // Per-packet data
     addrs: [MAX_BATCH]posix.sockaddr.storage = undefined,
     addr_lens: [MAX_BATCH]posix.socklen_t = undefined,
@@ -219,7 +242,10 @@ pub const SendBatch = struct {
     data_len: usize = 0,
 
     pub fn init(sockfd: posix.socket_t) SendBatch {
-        return .{ .sockfd = sockfd };
+        return .{
+            .sockfd = sockfd,
+            .use_mmsg = use_sendmmsg and !envFlagSet(sendmmsg_env_var),
+        };
     }
 
     /// Add a packet to the batch. Flushes automatically when full.
@@ -238,17 +264,27 @@ pub const SendBatch = struct {
         self.count += 1;
     }
 
-    /// Send all queued packets via sendmsg (matches quic-go's approach).
-    /// Uses sendmsg instead of sendto for more reliable delivery on macOS loopback.
+    /// Send all queued packets. Dispatches to the fastest available path.
     pub fn flush(self: *SendBatch) void {
         if (self.count == 0) return;
+        defer {
+            self.count = 0;
+            self.data_len = 0;
+        }
 
-        for (0..self.count) |i| {
-            // Only call setsockopt when ECN mark changes (saves 2 syscalls per packet)
-            if (self.ecn_marks[i] != self.current_ecn) {
-                self.current_ecn = self.ecn_marks[i];
-                setEcnMark(self.sockfd, self.current_ecn) catch {};
+        if (comptime use_sendmmsg) {
+            if (self.use_mmsg) {
+                self.flushLinux();
+                return;
             }
+        }
+        self.flushPortable();
+    }
+
+    /// Per-packet sendmsg loop — used on macOS/Windows and as the kill-switch fallback.
+    fn flushPortable(self: *SendBatch) void {
+        for (0..self.count) |i| {
+            self.applyEcn(self.ecn_marks[i]);
             const data = self.data_buf[self.offsets[i]..][0..self.lengths[i]];
             var iov = [1]posix.iovec_const{.{
                 .base = data.ptr,
@@ -263,14 +299,99 @@ pub const SendBatch = struct {
                 .controllen = 0,
                 .flags = 0,
             };
-            _ = std.c.sendmsg(self.sockfd, &msg, 0);
+            if (std.c.sendmsg(self.sockfd, &msg, 0) < 0) {
+                self.recordDrop(1);
+            }
         }
+    }
 
-        self.count = 0;
-        self.data_len = 0;
+    /// Linux sendmmsg path: walks runs of same ECN mark, issues one syscall per run.
+    fn flushLinux(self: *SendBatch) void {
+        if (comptime !use_sendmmsg) unreachable;
+
+        // Scratch arrays live on the stack — sized for MAX_BATCH (~5 KB total).
+        var iovs: [MAX_BATCH]posix.iovec_const = undefined;
+        var msgvec: [MAX_BATCH]linux.mmsghdr_const = undefined;
+
+        var start: usize = 0;
+        while (start < self.count) {
+            // Extend the run while the ECN mark matches the one at `start`.
+            const run_ecn = self.ecn_marks[start];
+            var end = start + 1;
+            while (end < self.count and self.ecn_marks[end] == run_ecn) : (end += 1) {}
+
+            self.applyEcn(run_ecn);
+
+            // One mmsghdr per packet within the run.
+            for (start..end) |i| {
+                iovs[i] = .{
+                    .base = self.data_buf[self.offsets[i]..].ptr,
+                    .len = self.lengths[i],
+                };
+                msgvec[i] = .{
+                    .hdr = .{
+                        .name = @ptrCast(&self.addrs[i]),
+                        .namelen = self.addr_lens[i],
+                        .iov = @ptrCast(&iovs[i]),
+                        .iovlen = 1,
+                        .control = null,
+                        .controllen = 0,
+                        .flags = 0,
+                    },
+                    .len = 0,
+                };
+            }
+
+            const run_len: u32 = @intCast(end - start);
+            const sent = sendmmsgRun(self.sockfd, msgvec[start..end].ptr, run_len);
+            if (sent < run_len) {
+                self.recordDrop(run_len - sent);
+            }
+            start = end;
+        }
+    }
+
+    /// Issue one sendmmsg syscall for `n` packets starting at `msgvec`.
+    /// Retries once on EINTR when no packets have been sent yet.
+    /// Returns the number of packets the kernel accepted.
+    fn sendmmsgRun(sockfd: posix.socket_t, msgvec: [*]linux.mmsghdr_const, n: u32) u32 {
+        var attempts: u2 = 0;
+        while (true) : (attempts += 1) {
+            const rc = linux.sendmmsg(sockfd, msgvec, n, 0);
+            switch (linux.E.init(rc)) {
+                .SUCCESS => return @intCast(rc),
+                .INTR => if (attempts == 0) continue else return 0,
+                else => return 0,
+            }
+        }
+    }
+
+    /// Update the socket ECN mark via setsockopt, skipping the syscall when
+    /// the mark hasn't changed since the last send.
+    fn applyEcn(self: *SendBatch, ecn: u2) void {
+        if (ecn == self.current_ecn) return;
+        self.current_ecn = ecn;
+        setEcnMark(self.sockfd, ecn) catch {};
+    }
+
+    fn recordDrop(self: *SendBatch, n: u32) void {
+        const before = self.dropped_packets;
+        self.dropped_packets += n;
+        // Log only when we cross a DROP_WARN_INTERVAL boundary.
+        const crossed = (before / DROP_WARN_INTERVAL) != (self.dropped_packets / DROP_WARN_INTERVAL);
+        if (crossed) {
+            std.log.warn("ecn_socket: {d} outgoing UDP packets dropped so far", .{self.dropped_packets});
+        }
     }
 };
 
+/// Treats an env var as a boolean flag: unset, empty, or "0" → false; anything else → true.
+fn envFlagSet(name: [:0]const u8) bool {
+    if (comptime is_windows) return false;
+    const value = std.posix.getenv(name) orelse return false;
+    return !(value.len == 0 or std.mem.eql(u8, value, "0"));
+}
+
 /// Send a single packet directly from the caller's buffer (zero-copy send path).
 /// Avoids the batch memcpy overhead for single-packet sends — the common case
 /// for latency-sensitive echo/datagram workloads.
@@ -321,6 +442,51 @@ test "setEcnMark on a real socket" {
     try setEcnMark(sockfd, 0b00);
 }
 
+test "SendBatch delivers mixed-ECN packets in order" {
+    if (comptime is_windows) return error.SkipZigTest;
+
+    const rx = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM | posix.SOCK.NONBLOCK, 0);
+    defer posix.close(rx);
+    const tx = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM, 0);
+    defer posix.close(tx);
+
+    const bind_addr = try std.net.Address.parseIp4("127.0.0.1", 0);
+    try posix.bind(rx, &bind_addr.any, bind_addr.getOsSockLen());
+    try enableEcnRecv(rx);
+
+    var peer: posix.sockaddr.storage = std.mem.zeroes(posix.sockaddr.storage);
+    var peer_len: posix.socklen_t = @sizeOf(posix.sockaddr.storage);
+    try posix.getsockname(rx, @ptrCast(&peer), &peer_len);
+
+    var batch = SendBatch.init(tx);
+    // Alternate ECN marks to exercise the run-segmentation logic.
+    const payloads = [_][]const u8{ "aa", "bb", "cc", "dd", "ee" };
+    const marks = [_]u2{ 0, 0b10, 0b10, 0, 0b01 };
+    for (payloads, marks) |p, m| {
+        batch.add(p, @ptrCast(&peer), peer_len, m);
+    }
+    batch.flush();
+    try std.testing.expectEqual(@as(u64, 0), batch.dropped_packets);
+
+    // Drain the receiver — order should match the send order on loopback.
+    var buf: [64]u8 = undefined;
+    // Give the kernel a moment to queue everything (loopback is fast but not sync).
+    var received: usize = 0;
+    const deadline = std.time.milliTimestamp() + 200;
+    while (received < payloads.len and std.time.milliTimestamp() < deadline) {
+        const r = recvmsgEcn(rx, &buf) catch |err| switch (err) {
+            error.WouldBlock => {
+                std.Thread.sleep(1 * std.time.ns_per_ms);
+                continue;
+            },
+            else => return err,
+        };
+        try std.testing.expectEqualSlices(u8, payloads[received], buf[0..r.bytes_read]);
+        received += 1;
+    }
+    try std.testing.expectEqual(payloads.len, received);
+}
+
 test "recvmsgEcn returns WouldBlock on empty socket" {
     if (comptime is_windows) return error.SkipZigTest;
     const sockfd = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM | posix.SOCK.NONBLOCK, 0);