diff --git a/SPEC/CLOCKS.md b/SPEC/CLOCKS.md new file mode 100644 index 0000000..b2eaa9e --- /dev/null +++ b/SPEC/CLOCKS.md @@ -0,0 +1,66 @@ +# Clock contract + +quic-zig uses two clock sources internally. Most of the codebase reads +`std.time.nanoTimestamp()` (REALTIME); the user-space pacer is the single +exception — it runs on `CLOCK_MONOTONIC` via `clock.monoNanos()`. + +This split is intentional. Reading this page once should be enough to avoid +introducing a cross-clock comparison bug on a future change. + +## Who uses what + +| Subsystem | Clock | Source | Why | +|-----------|-------|--------|-----| +| Loss detection (PTO, RTT) | REALTIME | `std.time.nanoTimestamp()` | Compares timestamps it produced itself; absolute drift is irrelevant. | +| Idle timeout | REALTIME | `std.time.nanoTimestamp()` | Same — only the delta `now − last_activity` matters. | +| Stateless reset / token expiry | REALTIME | `std.time.nanoTimestamp()` | Long-horizon validity windows; wall-clock alignment is fine. | +| qlog timestamps | REALTIME | `std.time.nanoTimestamp()` | Wall-clock is what humans expect when reading traces. | +| Datagram receive timestamps | REALTIME | `std.time.nanoTimestamp()` | Compared only to other REALTIME values within the same connection. | +| **Pacer** (`Pacer.last_sent_time`, `timeUntilSend`, `onPacketSent`) | **MONOTONIC** | `clock.monoNanos()` | Budget replenishment math (`elapsed = now − last_sent_time`) breaks if a wall-clock jump (NTP slew, manual time change, DST) makes elapsed go negative or huge. | + +## The single boundary + +`Connection.nextTimeoutNs()` is the only function that crosses the boundary. +It folds the pacer's next-send time into a deadline that the event loop +compares against REALTIME-based deadlines (loss timer, idle timer, ack alarm). + +The conversion happens inline at `connection.zig:3793`: + +```zig +const now_realtime: i64 = @intCast(std.time.nanoTimestamp()); +const now_mono: i64 = clock.monoNanos(); +const elapsed = now_mono - self.pacer.last_sent_time; // duration on MONO +// ... compute pacer_delay (a duration, clock-agnostic) ... +const pacer_deadline = now_realtime + delay; // anchor on REALTIME +``` + +We compute the *duration* on the monotonic clock (where the pacer's state +lives) and add it to a REALTIME `now` so the resulting deadline is comparable +to the other deadlines the event loop collects. The result is a REALTIME +timestamp, never a MONOTONIC one — that boundary stays inside this function. + +## Rules for future changes + +1. **Adding a new pacer call site:** pass `now_mono` (or call `clock.monoNanos()` fresh). Never pass a `nanoTimestamp()` value. +2. **Reading `pacer.last_sent_time` from outside the Pacer:** treat it as MONOTONIC. Subtract it from another MONOTONIC value to get a duration. Never compare to a REALTIME timestamp. +3. **Adding a new clock-using subsystem:** default to REALTIME. Switch to MONOTONIC only if the subsystem hands timestamps to the kernel (e.g., a future `SCM_TXTIME` cmsg) or is genuinely sensitive to wall-clock jumps. +4. **Mixing in a single deadline computation:** allowed only when computing a *duration* on one clock and anchoring the deadline on another (the `nextTimeoutNs` pattern above). Document why in a comment. + +## Why not migrate everything to MONOTONIC + +- Loss detection, PTO, and idle timeout are all *delta-based* — they don't care which clock as long as the timestamps in a single comparison agree. They've worked correctly on REALTIME since day one and changing them adds risk for no gain. +- qlog readers and external tooling expect wall-clock timestamps. +- Token-validity windows are conceptually wall-clock (a 1-day token means 24 wall-clock hours). +- The single subsystem that genuinely needed monotonic semantics (the pacer) is now isolated. + +## Why the pacer specifically + +- `Pacer.replenish` computes `elapsed = now - last_sent_time` and turns it into bytes of budget. If the wall clock jumps backward by 10 seconds (NTP slew, DST end, manual time change), `elapsed` goes negative and the pacer either refuses to send or floods, depending on signedness handling. +- A forward jump credits the pacer with phantom bandwidth, briefly defeating congestion control. +- `MONOTONIC` immunizes both directions. + +## Files + +- `src/quic/clock.zig` — defines `monoNanos()` (Linux/macOS via `clock_gettime`, Windows fallback to `nanoTimestamp()`). +- `src/quic/congestion.zig` — `Pacer` doc comment names the contract. +- `src/quic/connection.zig` — three pacer call sites in `send()` use `now_mono`; `nextTimeoutNs` handles the boundary conversion. diff --git a/SPEC/interop-results.md b/SPEC/interop-results.md index 70f54a7..89753fa 100644 --- a/SPEC/interop-results.md +++ b/SPEC/interop-results.md @@ -1,9 +1,37 @@ # Interop Test Results -Date: 2026-03-24 -Zig version: 0.15.2, quic-go interop image `martenseemann/quic-go-interop:latest`, webtransport-go interop image `martenseemann/webtransport-go-interop:latest` +Date: 2026-04-15 (supersedes 2026-03-24 baseline below) +Zig version: 0.15.2, quic-go interop image `martenseemann/quic-go-interop:latest`, neqo interop image `ghcr.io/mozilla/neqo-qns:latest`, webtransport-go interop image `martenseemann/webtransport-go-interop:latest` Build: Docker interop image from `interop/runner/Dockerfile`, `zig build -Doptimize=ReleaseSafe` +## 2026-04-15: UDP send-path optimizations (`sendmmsg` + pacer hardening) + +Inspired by Cloudflare's "Accelerating UDP packet transmission for QUIC" post, +narrowed to the techniques that fit a real-time WebTransport workload (small +datagrams, latency-sensitive). Larger throughput-oriented optimizations (UDP +GSO, SO_TXTIME kernel pacing) were prototyped, validated, and reverted — +see "Cloudflare optimizations: what we kept and why" in `SPEC/STATUS.md` if +revisiting in the future. + +### Send-path toggles +| Feature | Default | Env var | Notes | +|---------|---------|---------|-------| +| `sendmmsg` batching | on (Linux) | `QUIC_ZIG_NO_SENDMMSG=1` disables | one syscall per ECN-mark run | +| User-space pacer | on | `QUIC_ZIG_NO_PACING=1` disables | bisection escape hatch | +| Pacer clock | always `CLOCK_MONOTONIC` | n/a | NTP-skew resilience | + +### Matrix (sequential run, `handshake,transfer,chacha20,multiplexing,longrtt,http3,keyupdate`) + +| | quic-go (server/client) | neqo (server/client) | +|---------------------------|-------------------------|----------------------| +| quic-zig server ← peer client | **7/7 PASS** | **7/7 PASS** | +| quic-zig client → peer server | **7/7 PASS** | **6-7/7 PASS** | + +Zero regressions against the 2026-03-24 baseline recorded below. The +zig-client → neqo-server flake on `keyupdate`/`chacha20` predates this work. + +## 2026-03-24 baseline (pre-optimization) + ## Functional Interop Matrix ### QUIC / HTTP/3 (`quic-go`) diff --git a/interop/runner/run_endpoint.sh b/interop/runner/run_endpoint.sh index 9e6da43..c1a726e 100755 --- a/interop/runner/run_endpoint.sh +++ b/interop/runner/run_endpoint.sh @@ -4,6 +4,10 @@ set -e # Setup routing for the simulated network source /setup.sh +# Optimization toggles — both on by default; set to 1 to disable for bisection. +export QUIC_ZIG_NO_SENDMMSG="${QUIC_ZIG_NO_SENDMMSG:-0}" +export QUIC_ZIG_NO_PACING="${QUIC_ZIG_NO_PACING:-0}" + # Determine if this is a WebTransport test case is_wt_test() { case "$TESTCASE" in diff --git a/src/quic/clock.zig b/src/quic/clock.zig new file mode 100644 index 0000000..bf2b800 --- /dev/null +++ b/src/quic/clock.zig @@ -0,0 +1,28 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// Read `CLOCK_MONOTONIC` in nanoseconds. +/// +/// The Pacer uses this clock so its `last_sent_time` deltas are immune to +/// wall-clock jumps (NTP slews, daylight-saving, manual clock changes). Loss +/// detection, PTO, and idle-timeout code paths continue to use +/// `std.time.nanoTimestamp()` (REALTIME) — those only compare timestamps to +/// each other within short horizons where the gap matters but the absolute +/// drift does not. +pub fn monoNanos() i64 { + // On Windows there is no POSIX CLOCK_MONOTONIC; fall back to the default + // `nanoTimestamp()` so the pacer still works. + if (comptime builtin.os.tag == .windows) { + return @intCast(std.time.nanoTimestamp()); + } + const ts = std.posix.clock_gettime(.MONOTONIC) catch { + return @intCast(std.time.nanoTimestamp()); + }; + return @as(i64, ts.sec) * std.time.ns_per_s + @as(i64, ts.nsec); +} + +test "monoNanos is non-decreasing" { + const a = monoNanos(); + const b = monoNanos(); + try std.testing.expect(b >= a); +} diff --git a/src/quic/congestion.zig b/src/quic/congestion.zig index 6f836ab..8f3bc6f 100644 --- a/src/quic/congestion.zig +++ b/src/quic/congestion.zig @@ -421,6 +421,12 @@ fn icbrt(x: u64) u64 { /// Pacer for spacing out packet sends to avoid bursts. /// /// Uses a token bucket algorithm similar to quic-go's pacer. +/// +/// All timestamp arguments (`now` in `onPacketSent`, `timeUntilSend`, and +/// `replenish`) MUST be on `CLOCK_MONOTONIC` — callers obtain them via +/// `clock.monoNanos()`. The monotonic clock makes budget replenishment +/// immune to wall-clock jumps (NTP slews, manual time changes). Mixing +/// clock sources across calls would silently corrupt budget math. pub const Pacer = struct { /// Available budget in bytes. budget: u64, @@ -428,7 +434,7 @@ pub const Pacer = struct { /// Max burst size in bytes. max_burst: u64, - /// Last time a packet was sent (nanoseconds). + /// Last time a packet was sent (CLOCK_MONOTONIC nanoseconds). last_sent_time: i64 = 0, /// Bandwidth in bytes per nanosecond, left-shifted by BANDWIDTH_SHIFT for precision. diff --git a/src/quic/connection.zig b/src/quic/connection.zig index 86f6b83..ff162c8 100644 --- a/src/quic/connection.zig +++ b/src/quic/connection.zig @@ -24,6 +24,24 @@ const stateless_reset = @import("stateless_reset.zig"); const ecn = @import("ecn.zig"); const qlog = @import("qlog.zig"); const quic_lb = @import("quic_lb.zig"); +const clock = @import("clock.zig"); + +/// Bisection kill switch for the user-space pacer. +/// When `QUIC_ZIG_NO_PACING=1` (or any non-empty non-"0" value) is set in the +/// environment, `conn.send()` and `nextTimeoutNs()` behave as if the pacer +/// never blocks. `Pacer.onPacketSent` and `setBandwidth` continue to run so +/// bisection can be toggled without polluting CC state. +var pacing_disabled_cache: ?bool = null; + +fn isPacingDisabled() bool { + if (pacing_disabled_cache) |v| return v; + const v = blk: { + const raw = std.posix.getenv("QUIC_ZIG_NO_PACING") orelse break :blk false; + break :blk !(raw.len == 0 or std.mem.eql(u8, raw, "0")); + }; + pacing_disabled_cache = v; + return v; +} pub const State = enum(u8) { first_flight = 0, @@ -2753,6 +2771,9 @@ pub const Connection = struct { if (self.state == .draining or self.state == .terminated) return 0; const now: i64 = @intCast(std.time.nanoTimestamp()); + // Pacer runs on CLOCK_MONOTONIC for NTP-skew resilience; other + // subsystems stay on REALTIME (they only compare deltas). + const now_mono: i64 = clock.monoNanos(); // Closing: retransmit saved close packet on each incoming packet (RFC 9000 §10.2.1) if (self.state == .closing) { @@ -2818,11 +2839,13 @@ pub const Connection = struct { return try self.sendAckOnly(out_buf, now); } - // Check if pacer allows sending - // Exception: PTO probes bypass pacing (RFC 9002 §6.2.4) - // Note: ACK-only path above bypasses pacer per RFC 9002 §7.7 - if (self.pto_probe_pending == 0) { - const pacer_delay = self.pacer.timeUntilSend(now); + // Pacer gate. Returning 0 here is how the event loop breaks out of + // its burst send loop; the next send time is then surfaced via + // `nextTimeoutNs()` so libxev wakes us when the pacer has budget again. + // Exceptions: PTO probes bypass pacing (RFC 9002 §6.2.4); the ACK-only + // path above bypasses it per RFC 9002 §7.7. + if (self.pto_probe_pending == 0 and !isPacingDisabled()) { + const pacer_delay = self.pacer.timeUntilSend(now_mono); if (pacer_delay > 0) { return 0; } @@ -2934,7 +2957,7 @@ pub const Connection = struct { self.pto_probe_pending -|= 1; self.paths[self.active_path_idx].bytes_sent += bytes_written; self.total_packets_sent += 1; - self.pacer.onPacketSent(bytes_written, now); + self.pacer.onPacketSent(bytes_written, now_mono); self.last_packet_sent_time = now; // If more PTO probes are pending, re-queue stream data + crypto data @@ -3770,10 +3793,16 @@ pub const Connection = struct { // Pacer: if the pacer has bandwidth set (active transfer), include its // next-send time so the event loop wakes up promptly to send more data. - if (self.pacer.bandwidth_shifted > 0 and self.state == .connected) { - const now: i64 = @intCast(std.time.nanoTimestamp()); - // Estimate pacer delay without mutating: budget is replenished by elapsed time - const elapsed = now - self.pacer.last_sent_time; + // Skipped when pacing is disabled via the env kill switch. + // + // The pacer stores `last_sent_time` on CLOCK_MONOTONIC; the deadline we + // return must be comparable to the REALTIME-based deadlines collected + // above, so compute the *delay* on the monotonic clock and add it to + // the REALTIME `now`. + if (self.pacer.bandwidth_shifted > 0 and self.state == .connected and !isPacingDisabled()) { + const now_realtime: i64 = @intCast(std.time.nanoTimestamp()); + const now_mono: i64 = clock.monoNanos(); + const elapsed = now_mono - self.pacer.last_sent_time; var budget = self.pacer.budget; if (self.pacer.last_sent_time > 0 and elapsed > 0) { const replenished = (self.pacer.bandwidth_shifted *| @as(u64, @intCast(elapsed))) >> 20; @@ -3782,7 +3811,7 @@ pub const Connection = struct { if (budget < self.pacer.max_datagram_size) { const deficit = self.pacer.max_datagram_size - budget; const delay: i64 = @intCast((deficit << 20) / self.pacer.bandwidth_shifted); - const pacer_deadline = now + delay; + const pacer_deadline = now_realtime + delay; if (earliest == null or pacer_deadline < earliest.?) { earliest = pacer_deadline; } diff --git a/src/quic/ecn_socket.zig b/src/quic/ecn_socket.zig index 1a9d971..da103d8 100644 --- a/src/quic/ecn_socket.zig +++ b/src/quic/ecn_socket.zig @@ -4,6 +4,15 @@ const builtin = @import("builtin"); const is_windows = builtin.os.tag == .windows; +/// Linux sendmmsg batches multiple datagrams into one syscall. +/// Compile-time gate; on other platforms the portable sendmsg loop is used. +const use_sendmmsg = builtin.os.tag == .linux; +const linux = std.os.linux; + +/// Runtime kill switch. Set QUIC_ZIG_NO_SENDMMSG=1 to force the sendmsg loop +/// on Linux (useful for bisecting regressions without rebuilding). +const sendmmsg_env_var = "QUIC_ZIG_NO_SENDMMSG"; + // Platform-specific constants for ECN socket options (IPv4). const IPPROTO_IP: u32 = 0; @@ -200,13 +209,27 @@ pub fn mapV4ToV6(storage: *posix.sockaddr.storage) void { /// Batch sender that collects outgoing packets and flushes them together. /// Reduces syscall overhead by batching sendto calls and caching ECN marks. +/// On Linux, flush uses sendmmsg to send many packets per syscall +/// (grouped by ECN mark so the cached IP_TOS stays valid). On other platforms +/// it falls back to a per-packet sendmsg loop. pub const SendBatch = struct { const MAX_BATCH: usize = 64; + /// Warn every N dropped packets so a stuck send path is visible without + /// flooding the log when ENOBUFS briefly spikes. + const DROP_WARN_INTERVAL: u64 = 1024; + sockfd: posix.socket_t, count: usize = 0, current_ecn: u2 = 0, + /// Total packets the kernel refused to accept from this batcher. + /// UDP is lossy and QUIC loss detection recovers; we just surface a metric. + dropped_packets: u64 = 0, + + /// Runtime kill switch — resolved once at init, so flush() never touches env. + use_mmsg: bool = false, + // Per-packet data addrs: [MAX_BATCH]posix.sockaddr.storage = undefined, addr_lens: [MAX_BATCH]posix.socklen_t = undefined, @@ -219,7 +242,10 @@ pub const SendBatch = struct { data_len: usize = 0, pub fn init(sockfd: posix.socket_t) SendBatch { - return .{ .sockfd = sockfd }; + return .{ + .sockfd = sockfd, + .use_mmsg = use_sendmmsg and !envFlagSet(sendmmsg_env_var), + }; } /// Add a packet to the batch. Flushes automatically when full. @@ -238,17 +264,27 @@ pub const SendBatch = struct { self.count += 1; } - /// Send all queued packets via sendmsg (matches quic-go's approach). - /// Uses sendmsg instead of sendto for more reliable delivery on macOS loopback. + /// Send all queued packets. Dispatches to the fastest available path. pub fn flush(self: *SendBatch) void { if (self.count == 0) return; + defer { + self.count = 0; + self.data_len = 0; + } - for (0..self.count) |i| { - // Only call setsockopt when ECN mark changes (saves 2 syscalls per packet) - if (self.ecn_marks[i] != self.current_ecn) { - self.current_ecn = self.ecn_marks[i]; - setEcnMark(self.sockfd, self.current_ecn) catch {}; + if (comptime use_sendmmsg) { + if (self.use_mmsg) { + self.flushLinux(); + return; } + } + self.flushPortable(); + } + + /// Per-packet sendmsg loop — used on macOS/Windows and as the kill-switch fallback. + fn flushPortable(self: *SendBatch) void { + for (0..self.count) |i| { + self.applyEcn(self.ecn_marks[i]); const data = self.data_buf[self.offsets[i]..][0..self.lengths[i]]; var iov = [1]posix.iovec_const{.{ .base = data.ptr, @@ -263,14 +299,99 @@ pub const SendBatch = struct { .controllen = 0, .flags = 0, }; - _ = std.c.sendmsg(self.sockfd, &msg, 0); + if (std.c.sendmsg(self.sockfd, &msg, 0) < 0) { + self.recordDrop(1); + } } + } - self.count = 0; - self.data_len = 0; + /// Linux sendmmsg path: walks runs of same ECN mark, issues one syscall per run. + fn flushLinux(self: *SendBatch) void { + if (comptime !use_sendmmsg) unreachable; + + // Scratch arrays live on the stack — sized for MAX_BATCH (~5 KB total). + var iovs: [MAX_BATCH]posix.iovec_const = undefined; + var msgvec: [MAX_BATCH]linux.mmsghdr_const = undefined; + + var start: usize = 0; + while (start < self.count) { + // Extend the run while the ECN mark matches the one at `start`. + const run_ecn = self.ecn_marks[start]; + var end = start + 1; + while (end < self.count and self.ecn_marks[end] == run_ecn) : (end += 1) {} + + self.applyEcn(run_ecn); + + // One mmsghdr per packet within the run. + for (start..end) |i| { + iovs[i] = .{ + .base = self.data_buf[self.offsets[i]..].ptr, + .len = self.lengths[i], + }; + msgvec[i] = .{ + .hdr = .{ + .name = @ptrCast(&self.addrs[i]), + .namelen = self.addr_lens[i], + .iov = @ptrCast(&iovs[i]), + .iovlen = 1, + .control = null, + .controllen = 0, + .flags = 0, + }, + .len = 0, + }; + } + + const run_len: u32 = @intCast(end - start); + const sent = sendmmsgRun(self.sockfd, msgvec[start..end].ptr, run_len); + if (sent < run_len) { + self.recordDrop(run_len - sent); + } + start = end; + } + } + + /// Issue one sendmmsg syscall for `n` packets starting at `msgvec`. + /// Retries once on EINTR when no packets have been sent yet. + /// Returns the number of packets the kernel accepted. + fn sendmmsgRun(sockfd: posix.socket_t, msgvec: [*]linux.mmsghdr_const, n: u32) u32 { + var attempts: u2 = 0; + while (true) : (attempts += 1) { + const rc = linux.sendmmsg(sockfd, msgvec, n, 0); + switch (linux.E.init(rc)) { + .SUCCESS => return @intCast(rc), + .INTR => if (attempts == 0) continue else return 0, + else => return 0, + } + } + } + + /// Update the socket ECN mark via setsockopt, skipping the syscall when + /// the mark hasn't changed since the last send. + fn applyEcn(self: *SendBatch, ecn: u2) void { + if (ecn == self.current_ecn) return; + self.current_ecn = ecn; + setEcnMark(self.sockfd, ecn) catch {}; + } + + fn recordDrop(self: *SendBatch, n: u32) void { + const before = self.dropped_packets; + self.dropped_packets += n; + // Log only when we cross a DROP_WARN_INTERVAL boundary. + const crossed = (before / DROP_WARN_INTERVAL) != (self.dropped_packets / DROP_WARN_INTERVAL); + if (crossed) { + std.log.warn("ecn_socket: {d} outgoing UDP packets dropped so far", .{self.dropped_packets}); + } } }; +/// Treats an env var as a boolean flag: unset, empty, or "0" → false; anything else → true. +fn envFlagSet(name: [:0]const u8) bool { + if (comptime is_windows) return false; + const value = std.posix.getenv(name) orelse return false; + return !(value.len == 0 or std.mem.eql(u8, value, "0")); +} + /// Send a single packet directly from the caller's buffer (zero-copy send path). /// Avoids the batch memcpy overhead for single-packet sends — the common case /// for latency-sensitive echo/datagram workloads. @@ -321,6 +442,51 @@ test "setEcnMark on a real socket" { try setEcnMark(sockfd, 0b00); } +test "SendBatch delivers mixed-ECN packets in order" { + if (comptime is_windows) return error.SkipZigTest; + + const rx = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM | posix.SOCK.NONBLOCK, 0); + defer posix.close(rx); + const tx = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM, 0); + defer posix.close(tx); + + const bind_addr = try std.net.Address.parseIp4("127.0.0.1", 0); + try posix.bind(rx, &bind_addr.any, bind_addr.getOsSockLen()); + try enableEcnRecv(rx); + + var peer: posix.sockaddr.storage = std.mem.zeroes(posix.sockaddr.storage); + var peer_len: posix.socklen_t = @sizeOf(posix.sockaddr.storage); + try posix.getsockname(rx, @ptrCast(&peer), &peer_len); + + var batch = SendBatch.init(tx); + // Alternate ECN marks to exercise the run-segmentation logic. + const payloads = [_][]const u8{ "aa", "bb", "cc", "dd", "ee" }; + const marks = [_]u2{ 0, 0b10, 0b10, 0, 0b01 }; + for (payloads, marks) |p, m| { + batch.add(p, @ptrCast(&peer), peer_len, m); + } + batch.flush(); + try std.testing.expectEqual(@as(u64, 0), batch.dropped_packets); + + // Drain the receiver — order should match the send order on loopback. + var buf: [64]u8 = undefined; + // Give the kernel a moment to queue everything (loopback is fast but not sync). + var received: usize = 0; + const deadline = std.time.milliTimestamp() + 200; + while (received < payloads.len and std.time.milliTimestamp() < deadline) { + const r = recvmsgEcn(rx, &buf) catch |err| switch (err) { + error.WouldBlock => { + std.Thread.sleep(1 * std.time.ns_per_ms); + continue; + }, + else => return err, + }; + try std.testing.expectEqualSlices(u8, payloads[received], buf[0..r.bytes_read]); + received += 1; + } + try std.testing.expectEqual(payloads.len, received); +} + test "recvmsgEcn returns WouldBlock on empty socket" { if (comptime is_windows) return error.SkipZigTest; const sockfd = try posix.socket(posix.AF.INET, posix.SOCK.DGRAM | posix.SOCK.NONBLOCK, 0);