From ba6be15059edebe0fc46b93e2df24eaf258f6dc3 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 20 Mar 2026 14:32:17 +0100 Subject: [PATCH 01/13] feat(otel-thread-ctx): add TLS C shim C and build Declares `custom_labels_current_set_v2` in a C file so the linker emits a proper TLSDESC dynamic symbol entry readable by out-of-process tools (e.g. eBPF profiler). Rust's `#[thread_local]` does not produce a TLSDESC dialect, so a C shim is required. Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 1 + libdd-library-config/Cargo.toml | 3 +++ libdd-library-config/build.rs | 11 +++++++++ libdd-library-config/src/lib.rs | 1 + libdd-library-config/src/otel_thread_ctx.rs | 25 +++++++++++++++++++++ libdd-library-config/src/tls_shim.c | 20 +++++++++++++++++ 6 files changed, 61 insertions(+) create mode 100644 libdd-library-config/build.rs create mode 100644 libdd-library-config/src/otel_thread_ctx.rs create mode 100644 libdd-library-config/src/tls_shim.c diff --git a/Cargo.lock b/Cargo.lock index 1961545419..b29f9599ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3039,6 +3039,7 @@ name = "libdd-library-config" version = "1.1.0" dependencies = [ "anyhow", + "cc", "libdd-trace-protobuf", "memfd", "prost", diff --git a/libdd-library-config/Cargo.toml b/libdd-library-config/Cargo.toml index 8c2f2e35a8..ae287abf22 100644 --- a/libdd-library-config/Cargo.toml +++ b/libdd-library-config/Cargo.toml @@ -26,6 +26,9 @@ rmp-serde = "1.3.0" libdd-trace-protobuf = { version = "3.0.0", path = "../libdd-trace-protobuf" } +[build-dependencies] +cc = "1.1.31" + [dev-dependencies] tempfile = { version = "3.3" } serial_test = "3.2" diff --git a/libdd-library-config/build.rs b/libdd-library-config/build.rs new file mode 100644 index 0000000000..6e3701205f --- /dev/null +++ b/libdd-library-config/build.rs @@ -0,0 +1,11 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache +// License Version 2.0. This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. + +fn main() { + // Only compile the TLS shim on Linux; the thread-level context feature is Linux-only. + #[cfg(target_os = "linux")] + { + cc::Build::new().file("src/tls_shim.c").compile("tls_shim"); + println!("cargo:rerun-if-changed=src/tls_shim.c"); + } +} diff --git a/libdd-library-config/src/lib.rs b/libdd-library-config/src/lib.rs index f243678c09..7262511250 100644 --- a/libdd-library-config/src/lib.rs +++ b/libdd-library-config/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 pub mod otel_process_ctx; +pub mod otel_thread_ctx; pub mod tracer_metadata; use std::borrow::Cow; diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs new file mode 100644 index 0000000000..03b3a16db0 --- /dev/null +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -0,0 +1,25 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache +// License Version 2.0. This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. + +//! # Thread-level context sharing +//! +//! This module implements the publisher side of the Thread Context OTEP (PR #4947). +//! +//! Since `rustc` doesn't currently support the TLSDESC dialect, we use a C shim to set and get the +//! thread-local storage used for the context. + +#[cfg(target_os = "linux")] +pub mod linux { + use std::ffi::c_void; + + extern "C" { + /// Returns the current thread's value of `custom_labels_current_set_v2`. + fn libdd_get_custom_labels_current_set_v2() -> *mut *mut c_void; + } + + /// Read the TLS pointer for the current thread. + #[allow(clippy::missing_safety_doc)] + pub(crate) unsafe fn get_tls_ptr() -> *mut *mut c_void { + libdd_get_custom_labels_current_set_v2() + } +} diff --git a/libdd-library-config/src/tls_shim.c b/libdd-library-config/src/tls_shim.c new file mode 100644 index 0000000000..4a6d612e03 --- /dev/null +++ b/libdd-library-config/src/tls_shim.c @@ -0,0 +1,20 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. + +// Declares the thread-local pointer that external readers (e.g. the eBPF +// profiler) discover via the dynsym table. The Rust layer accesses this +// pointer in otel_thread_ctx.rs. +// +// The variable is be declared in C in order to use the TLSDESC dialect for +// thread-local storage, which is required by the OTel thread-level context +// sharing spec. Unfortunately, it's not possible to have Rust use this dialect +// as of today. +#include + +__attribute__((visibility("default"))) +__thread void *custom_labels_current_set_v2 = NULL; + +// Return the resolved address of the thread-local variable. +void **libdd_get_custom_labels_current_set_v2(void) { + return &custom_labels_current_set_v2; +} From a664e173147b81e0db59572d35c7b64bac43ee1e Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 24 Mar 2026 00:16:29 +0100 Subject: [PATCH 02/13] feat: add thread context record type and wrappers --- libdd-library-config/build.rs | 11 +- libdd-library-config/src/otel_process_ctx.rs | 5 +- libdd-library-config/src/otel_thread_ctx.rs | 580 ++++++++++++++++++- libdd-library-config/src/tls_shim.c | 6 +- 4 files changed, 589 insertions(+), 13 deletions(-) diff --git a/libdd-library-config/build.rs b/libdd-library-config/build.rs index 6e3701205f..4fa8078d52 100644 --- a/libdd-library-config/build.rs +++ b/libdd-library-config/build.rs @@ -5,7 +5,16 @@ fn main() { // Only compile the TLS shim on Linux; the thread-level context feature is Linux-only. #[cfg(target_os = "linux")] { - cc::Build::new().file("src/tls_shim.c").compile("tls_shim"); + let mut build = cc::Build::new(); + + // - On aarch64, TLSDESC is already the only dynamic TLS model so no flag is needed. + // - On x86-64, we use `-mtls-dialect=gnu2` (supported since GCC 4.4 and Clang 19+) to force + // the use of TLSDESC as mandated by the spec. If it's not supported, this build will + // fail. + #[cfg(target_arch = "x86_64")] + build.flag("-mtls-dialect=gnu2"); + + build.file("src/tls_shim.c").compile("tls_shim"); println!("cargo:rerun-if-changed=src/tls_shim.c"); } } diff --git a/libdd-library-config/src/otel_process_ctx.rs b/libdd-library-config/src/otel_process_ctx.rs index a85221e357..b3efad4aab 100644 --- a/libdd-library-config/src/otel_process_ctx.rs +++ b/libdd-library-config/src/otel_process_ctx.rs @@ -1,4 +1,4 @@ -// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 //! Implementation of the publisher part of the [OTEL process @@ -8,7 +8,8 @@ //! //! Process context sharing implies concurrently writing to a memory area that another process //! might be actively reading. However, reading isn't done as direct memory accesses but goes -//! through the OS, so the Rust definition of race conditions doesn't really apply. +//! through the OS, so the Rust definition of race conditions doesn't really apply. We also use +//! atomics and fences, see MappingHeader's documentation. #[cfg(target_os = "linux")] #[cfg(target_has_atomic = "64")] diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 03b3a16db0..475e46c086 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -1,5 +1,5 @@ -// Unless explicitly stated otherwise all files in this repository are licensed under the Apache -// License Version 2.0. This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 //! # Thread-level context sharing //! @@ -7,19 +7,585 @@ //! //! Since `rustc` doesn't currently support the TLSDESC dialect, we use a C shim to set and get the //! thread-local storage used for the context. +//! +//! ## Synchronization +//! +//! Readers are constrained to the same thread as the writer and operate like async-signal +//! handlers: the writer thread is always stopped while a reader runs. There is thus no +//! cross-thread synchronization concerns. The only hazard is compiler reordering or not committing +//! writes to memory, which is handled using volatile writes. #[cfg(target_os = "linux")] pub mod linux { - use std::ffi::c_void; + use std::{ + ffi::c_void, + mem, + ops::{Deref, DerefMut}, + ptr::{self, NonNull}, + }; extern "C" { - /// Returns the current thread's value of `custom_labels_current_set_v2`. + /// Return the address of the current thread's `custom_labels_current_set_v2` local. fn libdd_get_custom_labels_current_set_v2() -> *mut *mut c_void; } - /// Read the TLS pointer for the current thread. + /// Return a pointer to the TLS slot holding the context. The address calculation requires a + /// call to a C shim in order to use the TLSDESC dialect from Rust. Note that the returned + /// address is stable (per thread), so the result can be reused . + /// + /// Note that the address is read by an external process or a signal handler, so it should be + /// written to using [std::ptr::write_volatile]. #[allow(clippy::missing_safety_doc)] - pub(crate) unsafe fn get_tls_ptr() -> *mut *mut c_void { - libdd_get_custom_labels_current_set_v2() + fn get_tls_ptr() -> *mut *mut ThreadContextRecord { + // Safety: this is just an FFI call, but there's no particular pre-condition to uphold: the + // TLS slot should always be accessible. + unsafe { libdd_get_custom_labels_current_set_v2().cast() } + } + + /// Maximum size in bytes of the `attrs_data` field. + /// + /// Chosen so that the total record size (`28 + MAX_ATTRS_DATA_SIZE`) stays within the 640-byte + /// limit recommended by the spec (the eBPF profiler read limit). + pub const MAX_ATTRS_DATA_SIZE: usize = 612; + + /// In-memory layout of a thread-level context. The structure MUST match exactly the + /// specification. + /// + /// # Synchronization + /// + /// Readers are async-signal handlers on the same thread; the writer is always stopped while a + /// reader runs. `valid` is written with `ptr::write_volatile` so the compiler cannot reorder + /// its store past the surrounding field writes: + /// + /// - The writer sets `valid = 1` *after* all other fields are populated (publish). + /// - The writer sets `valid = 0` *before* modifying fields in-place (modify). + #[repr(C)] + pub struct ThreadContextRecord { + /// 128-bit trace identifier; all-zeroes means "no trace". + pub trace_id: [u8; 16], + /// 64-bit span identifier. + pub span_id: [u8; 8], + /// Whether the record is ready/consistent. Written with `ptr::write_volatile`. + pub valid: u8, + pub _reserved: u8, + /// Number of populated bytes in `attrs_data`. + pub attrs_data_size: u16, + /// Packed variable-length key-value records. + /// + /// It's a contiguous list of blocks with layout: + /// + /// 1. 1-byte `key_index` + /// 2. 1-byte `val_len` + /// 3. `val_len` bytes of a string value. + /// + /// # Size + /// + /// Currently, we always allocate the max recommended size. This potentially wastes a few + /// hundred bytes per thread, but it guarantees that we can modify the context in-place + /// without (re)allocation in the hot path. Having a hybrid scheme (starting smaller and + /// resizing up a few times) is not out of the question. + pub attrs_data: [u8; MAX_ATTRS_DATA_SIZE], + } + + impl ThreadContextRecord { + /// Build a record with the given trace id, span id and attributes. + pub fn new(trace_id: [u8; 16], span_id: [u8; 8], attrs: &[(u8, &str)]) -> Self { + let mut record = Self { + trace_id, + span_id, + ..Default::default() + }; + record.set_attrs(attrs); + record + } + + /// Write `value` to `self.valid` using a volatile store, preventing the compiler from + /// eliminating or reordering or this write past other volatile writes to the record. + fn write_valid_volatile(&mut self, value: u8) { + // Safety: we have an exclusive borrow of `self`, so `valid` is live and valid for + // writes. + unsafe { ptr::write_volatile(&mut self.valid as *mut u8, value) } + } + + /// Write `trace_id` to `self.trace_id` using a volatile store, preventing the compiler + /// from eliminating or reordering this write past other volatile writes to the record. + pub fn write_trace_id_volatile(&mut self, trace_id: [u8; 16]) { + // Safety: we have an exclusive borrow of `self`, so `trace_id` is live and valid for + // writes. + unsafe { ptr::write_volatile(&mut self.trace_id, trace_id) } + } + + /// Write `span_id` to `self.span_id` using a volatile store, preventing the compiler from + /// eliminating or reordering this write past other volatile writes to the record. + pub fn write_span_id_volatile(&mut self, span_id: [u8; 8]) { + // Safety: we have an exclusive borrow of `self`, so `span_id` is live and valid for + // writes. + unsafe { ptr::write_volatile(&mut self.span_id, span_id) } + } + + /// Write a single byte of the encoded attributes using a volatile store, preventing the + /// compiler from eliminating or reordering or this write past other volatile writes to the + /// record. + /// + /// # Panic + /// + /// Panics if the offset is out of bound. + fn write_attrs_volatile(&mut self, offset: usize, value: u8) { + // Safety: we have an exclusive borrow of `self`, so `attrs_data` is live and valid for + // writes. + unsafe { ptr::write_volatile(&mut self.attrs_data[offset] as *mut u8, value) } + } + + /// Copy `src` into the encoded attributes starting at `offset` using volatile stores, + /// preventing the compiler from eliminating or reordering or this write past other + /// volatile writes to the record. + /// + /// # Panic + /// + /// Panics if `offset + src.len() > self.attrs_data.len()`. + fn copy_attrs_volatile(&mut self, src: &[u8], offset: usize) { + // Safety: we have an exclusive borrow of `self`, so `attrs_data` is live and valid for + // writes. + for (idx, byte) in src.iter().enumerate() { + unsafe { ptr::write_volatile(&mut self.attrs_data[offset + idx] as *mut u8, *byte) } + } + } + + /// Encode `attributes` into `record.attrs_data` as packed key-value records. Existing data + /// are overridden (and if there were more entires than `attributes.len()`, they aren't + /// zeroed, but they will be ignored by readers). + /// + /// # Arguments + /// + /// Each input entry is a pair of a 1-byte `key_index` and a string value. + /// + /// # Size limits + /// + /// Any value over 255 bytes will be capped at this size. If the total size of the encoded + /// attributes is over [MAX_ATTRS_DATA_SIZE], extra attributes are ignored. We do this + /// instead of raising an error because we encode the attributes on-the-fly. Proper error + /// recovery would require us to be able to rollback to the previous attributes which would + /// hurt the happy path, or leave the record in a inconsistent state. Another possibility + /// would be to error out and reset the record in that situation. + /// + /// # Memory + /// + /// Disclaimer: This note is for internal usage only. As a consumer of this library, please + /// use [ThreadContext::modify] to mutate the currently attached record. + /// + /// Writes are volatile, making `set_attrs` suitable for mutating an attached record + /// in-place (TODO: does this impact the performance of writing to a non-attached record? + /// Should we have two specialized version `set_attrs` and `set_attrs_volatile`?). + pub fn set_attrs(&mut self, attributes: &[(u8, &str)]) { + let mut offset = 0; + + for &(key_index, val) in attributes { + let val_bytes = val.as_bytes(); + let val_len = val_bytes.len().min(255); + let entry_size = 2 + val_len; + + if offset + entry_size > MAX_ATTRS_DATA_SIZE { + break; + } + + self.write_attrs_volatile(offset, key_index); + // `val_len <= 255` thanks to the `min()` + self.write_attrs_volatile(offset + 1, val_len as u8); + self.copy_attrs_volatile(&val_bytes[..val_len], offset + 2); + offset += entry_size; + } + + // `offset < MAX_ATTRS_DATA_SIZE`, which guarantees it fits in a `u16` + self.attrs_data_size = offset as u16; + } + } + + impl Default for ThreadContextRecord { + fn default() -> Self { + Self { + trace_id: [0u8; 16], + span_id: [0u8; 8], + valid: 0, + _reserved: 0, + attrs_data_size: 0, + attrs_data: [0u8; MAX_ATTRS_DATA_SIZE], + } + } + } + + /// An owned (and non-moving) thread context record allocation. + /// + /// We don't use `Box` under the hood because it excludes aliasing, while we share the context + /// to readers through thread-level context and through the FFI. But it is a boxed + /// `ThreadContextRecord` for all intent of purpose. + pub struct ThreadContext(NonNull); + + impl ThreadContext { + /// Create a new thread context with the given trace/span IDs and encoded attributes. + pub fn new(trace_id: [u8; 16], span_id: [u8; 8], attrs: &[(u8, &str)]) -> Self { + Self::from(ThreadContextRecord::new(trace_id, span_id, attrs)) + } + + /// Turn this thread context into a raw pointer to the underlying [ThreadContextRecord]. + /// The pointer must be reconstructed through [`Self::from_raw`] in order to be properly + /// dropped, or the record will leak. + pub fn into_raw(self) -> *mut ThreadContextRecord { + use mem::ManuallyDrop; + + let mdrop = ManuallyDrop::new(self); + mdrop.0.as_ptr() + } + + /// Reconstruct a [ThreadContextRecord] from a raw pointer to `ThreadContextRecord` that is + /// either `null` or comes from [`Self::into_raw`]. Return `None` if `ptr` is null. + /// + /// # Safety + /// + /// - `ptr` must be `null` or come from a prior call to [`Self::into_raw`]. + /// - if `ptr` is aliased, accesses to through aliases must not be interleaved with method + /// calls on the returned [ThreadContextRecord]. More precisely, mutable references might + /// be reconstructed during those calls, so any constraint from either Stacked Borrows, + /// Tree Borrows or whatever is the current aliasing model implemented in Miri applies. + pub unsafe fn from_raw(ptr: *mut ThreadContextRecord) -> Option { + NonNull::new(ptr).map(Self) + } + } + + impl Default for ThreadContext { + fn default() -> Self { + Self::from(ThreadContextRecord::default()) + } + } + + impl From for ThreadContext { + fn from(record: ThreadContextRecord) -> Self { + // Safety: `Box::into_raw` returns a non-null pointer + unsafe { Self(NonNull::new_unchecked(Box::into_raw(Box::new(record)))) } + } + } + + impl ThreadContext { + /// Swap the current context with a pointer value. Return the previously attached context, + /// if any. + fn swap( + slot: *mut *mut ThreadContextRecord, + tgt: *mut ThreadContextRecord, + ) -> Option { + unsafe { + // Safety: TLS slot is always live and valid for reads and writes. + let prev = ThreadContext::from_raw(*slot); + ptr::write_volatile(slot, tgt); + // Safety: a non-null value in the slot came from a prior `into_raw` call. + prev + } + } + + /// Publish a new (or previously detached) thread context record by writing its pointer + /// into the TLS slot. Sets `valid = 1` before publishing. Returns the previously attached + /// context, if any. + pub fn attach(mut self) -> Option { + let slot = get_tls_ptr(); + // Set `valid = 1` written before the TLS pointer is updated, so any reader that + // observes the new pointer also observes `valid = 1`. + self.write_valid_volatile(1); + Self::swap(slot, self.into_raw()) + } + + /// Modify the currently attached record in-place. Sets `valid = 0` before the update and + /// `valid = 1` after, so a reader that fires between the two writes sees an inconsistent + /// record and skips it. + /// + /// If there's currently no attached context, `modify` will create one, and is in this case + /// equivalent to `ThreadContext::new(trace_id, span_id, attrs).attach()`. + pub fn update(trace_id: [u8; 16], span_id: [u8; 8], attrs: &[(u8, &str)]) { + let slot = get_tls_ptr(); + + // Safety: the tls slot is always valid for reads and writes. + if let Some(current) = unsafe { (*slot).as_mut() } { + current.write_valid_volatile(0); + current.write_trace_id_volatile(trace_id); + current.write_span_id_volatile(span_id); + current.set_attrs(attrs); + current.write_valid_volatile(1); + } else { + let mut ctx = ThreadContext::new(trace_id, span_id, attrs); + ctx.write_valid_volatile(1); + let _ = Self::swap(slot, ctx.into_raw()); + } + } + + /// Detach the current record from the TLS slot. Writes null to the slot, sets `valid = 0` + /// on the detached record, and returns it. + pub fn detach() -> Option { + Self::swap(get_tls_ptr(), ptr::null_mut()).map(|mut ctx| { + ctx.write_valid_volatile(0); + ctx + }) + } + } + + impl Drop for ThreadContext { + fn drop(&mut self) { + // Safety: `self.0` was obtained from a `Box::new`, and `ThreadContext` represents + // ownership of the underyling memory. + unsafe { + let _ = Box::from_raw(self.0.as_ptr()); + } + } + } + + impl Deref for ThreadContext { + type Target = ThreadContextRecord; + + fn deref(&self) -> &Self::Target { + // Safety: `ThreadContext` represents ownership of a valid, alive + // `ThreadContextRecord`. + unsafe { self.0.as_ref() } + } + } + + impl DerefMut for ThreadContext { + fn deref_mut(&mut self) -> &mut Self::Target { + // Safety: `ThreadContext` represents ownership of a valid, alive + // `ThreadContextRecord`. + unsafe { self.0.as_mut() } + } + } + + #[cfg(test)] + mod tests { + use super::{ThreadContext, ThreadContextRecord}; + + /// Read the TLS pointer for the current thread (the value stored in the TLS slot, not the + /// address of the slot itself). + fn read_tls_context_ptr() -> *const ThreadContextRecord { + unsafe { *super::get_tls_ptr() as *const ThreadContextRecord } + } + + #[test] + fn tls_lifecycle_basic() { + let trace_id = [1u8; 16]; + let span_id = [2u8; 8]; + + assert!( + read_tls_context_ptr().is_null(), + "TLS must be null initially" + ); + ThreadContext::new(trace_id, span_id, &[]).attach(); + assert!( + !read_tls_context_ptr().is_null(), + "TLS must not be null after attach" + ); + + let prev = ThreadContext::detach().unwrap(); + assert!( + prev.trace_id == trace_id, + "got back a different trace_id than attached" + ); + assert!( + prev.span_id == span_id, + "got back a different span_id than attached" + ); + + assert!( + read_tls_context_ptr().is_null(), + "TLS must be null after detach" + ); + } + + #[test] + fn raw_tls_pointer_read() { + let trace_id = [1u8; 16]; + let span_id = [2u8; 8]; + + ThreadContext::new(trace_id, span_id, &[]).attach(); + + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null(), "TLS must be non-null after attach"); + + // Safety: context is still live. + let record = unsafe { &*ptr }; + assert_eq!(record.trace_id, trace_id); + assert_eq!(record.span_id, span_id); + assert_eq!(record.valid, 1); + assert_eq!(record.attrs_data_size, 0); + + let _ = ThreadContext::detach(); + } + + #[test] + fn attribute_encoding_basic() { + let attrs: &[(u8, &str)] = &[(0, "GET"), (1, "/api/v1")]; + ThreadContext::new([0u8; 16], [0u8; 8], attrs).attach(); + + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null()); + let record = unsafe { &*ptr }; + let expected_size: u16 = (2 + 3 + 2 + 7) as u16; + assert_eq!(record.attrs_data_size, expected_size); + assert_eq!(record.attrs_data[0], 0); + assert_eq!(record.attrs_data[1], 3); + assert_eq!(&record.attrs_data[2..5], b"GET"); + assert_eq!(record.attrs_data[5], 1); + assert_eq!(record.attrs_data[6], 7); + assert_eq!(&record.attrs_data[7..14], b"/api/v1"); + + let _ = ThreadContext::detach(); + } + + #[test] + fn attribute_truncation_on_overflow() { + // Build attributes whose combined encoded size exceeds MAX_ATTRS_DATA_SIZE. + // Each max entry: 1 (key) + 1 (len) + 255 (val) = 257 bytes. + // Two such entries: 514 bytes. A third entry of 100 chars would need 102 bytes, + // bringing the total to 616 > 612, so the third entry must be dropped. + let val_a = "a".repeat(255); // 257 bytes encoded + let val_b = "b".repeat(255); // 257 bytes encoded → 514 total + let val_c = "c".repeat(100); // 102 bytes encoded → 616 total: must be dropped + + let attrs: &[(u8, &str)] = &[ + (0, val_a.as_str()), + (1, val_b.as_str()), + (2, val_c.as_str()), + ]; + + ThreadContext::new([0u8; 16], [0u8; 8], attrs).attach(); + + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null()); + let record = unsafe { &*ptr }; + // Only the first two entries fit (514 bytes). + assert_eq!(record.attrs_data_size, 514); + assert_eq!(record.attrs_data[0], 0); + assert_eq!(record.attrs_data[1], 255); + assert_eq!(record.attrs_data[257], 1); + assert_eq!(record.attrs_data[258], 255); + + let _ = ThreadContext::detach(); + } + + #[test] + fn update_record_in_place() { + let trace_id_1 = [1u8; 16]; + let span_id_1 = [1u8; 8]; + let trace_id_2 = [2u8; 16]; + let span_id_2 = [2u8; 8]; + + // Updating before any context is attached should be equivalent to `attach()` + ThreadContext::update(trace_id_1, span_id_1, &[(0, "v1")]); + + let ptr_before = read_tls_context_ptr(); + assert!(!ptr_before.is_null()); + let record = unsafe { &*ptr_before }; + assert_eq!(record.trace_id, trace_id_1); + assert_eq!(record.span_id, span_id_1); + assert_eq!(record.valid, 1); + assert_eq!(record.attrs_data[0], 0); + assert_eq!(record.attrs_data[1], 2); + assert_eq!(&record.attrs_data[2..4], b"v1"); + + ThreadContext::update(trace_id_2, span_id_2, &[(0, "v2")]); + + let ptr_after = read_tls_context_ptr(); + assert_eq!( + ptr_before, ptr_after, + "modify must not change the TLS pointer" + ); + + let record = unsafe { &*ptr_after }; + assert_eq!(record.trace_id, trace_id_2); + assert_eq!(record.span_id, span_id_2); + assert_eq!(record.valid, 1); + assert_eq!(record.attrs_data[0], 0); + assert_eq!(record.attrs_data[1], 2); + assert_eq!(&record.attrs_data[2..4], b"v2"); + + let _ = ThreadContext::detach(); + assert!(read_tls_context_ptr().is_null()); + } + + #[test] + fn explicit_detach_nulls_tls() { + ThreadContext::new([3u8; 16], [3u8; 8], &[]).attach(); + assert!(!read_tls_context_ptr().is_null()); + + let _ = ThreadContext::detach(); + assert!(read_tls_context_ptr().is_null()); + + // Calling detach again is safe (no-op, returns None). + let _ = ThreadContext::detach(); + assert!(read_tls_context_ptr().is_null()); + } + + #[test] + fn long_value_capped_at_255_bytes() { + let long_val = "a".repeat(300); + ThreadContext::new([0u8; 16], [0u8; 8], &[(0, long_val.as_str())]).attach(); + + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null()); + let record = unsafe { &*ptr }; + let val_len = record.attrs_data[1] as usize; + assert_eq!(val_len, 255, "value must be capped at 255 bytes"); + assert_eq!(record.attrs_data_size as usize, 2 + 255); + + let _ = ThreadContext::detach(); + } + + // Make sure the C shim is indeed providing a thread-local address. + #[test] + fn tls_slots_are_per_thread() { + use std::sync::{Arc, Barrier}; + + let barrier = Arc::new(Barrier::new(2)); + let b = barrier.clone(); + + let spawned_trace_id = [0xABu8; 16]; + let spawned_span_id = [0xCDu8; 8]; + let main_trace_id = [0x11u8; 16]; + let main_span_id = [0x22u8; 8]; + + let handle = std::thread::spawn(move || { + ThreadContext::new(spawned_trace_id, spawned_span_id, &[]).attach(); + + // Let the main thread attach its own record and verify its slot. + b.wait(); + // Wait for the main thread to finish observing before we verify ours. + b.wait(); + + // The main thread's attach must not have touched this slot. + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null(), "spawned thread TLS must still be set"); + let record = unsafe { &*ptr }; + assert_eq!(record.trace_id, spawned_trace_id); + assert_eq!(record.span_id, spawned_span_id); + + let _ = ThreadContext::detach(); + assert!(read_tls_context_ptr().is_null()); + }); + + // Wait for the spawned thread to attach its record, then attach our own. + barrier.wait(); + + assert!( + read_tls_context_ptr().is_null(), + "main thread should see a null pointer and not another thread's context" + ); + + ThreadContext::new(main_trace_id, main_span_id, &[]).attach(); + + let ptr = read_tls_context_ptr(); + assert!(!ptr.is_null(), "main thread TLS must be set"); + let record = unsafe { &*ptr }; + assert_eq!(record.trace_id, main_trace_id); + assert_eq!(record.span_id, main_span_id); + + barrier.wait(); + + let _ = ThreadContext::detach(); + assert!(read_tls_context_ptr().is_null()); + + handle.join().unwrap(); + } } } diff --git a/libdd-library-config/src/tls_shim.c b/libdd-library-config/src/tls_shim.c index 4a6d612e03..4b2abd3502 100644 --- a/libdd-library-config/src/tls_shim.c +++ b/libdd-library-config/src/tls_shim.c @@ -1,11 +1,11 @@ -// Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 // Declares the thread-local pointer that external readers (e.g. the eBPF // profiler) discover via the dynsym table. The Rust layer accesses this // pointer in otel_thread_ctx.rs. // -// The variable is be declared in C in order to use the TLSDESC dialect for +// The variable is declared in C in order to use the TLSDESC dialect for // thread-local storage, which is required by the OTel thread-level context // sharing spec. Unfortunately, it's not possible to have Rust use this dialect // as of today. From 577aa448163cb82b9c6e45734ab2f345fb66e1bd Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 25 Mar 2026 14:40:19 +0100 Subject: [PATCH 03/13] chore: exclude test with threads from miri --- libdd-library-config/src/otel_thread_ctx.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 475e46c086..e018ab2d9e 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -534,6 +534,7 @@ pub mod linux { // Make sure the C shim is indeed providing a thread-local address. #[test] + #[cfg_attr(miri, ignore)] fn tls_slots_are_per_thread() { use std::sync::{Arc, Barrier}; From 95343539061333ef757af959cee29b8c27fc05f3 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 25 Mar 2026 15:05:24 +0100 Subject: [PATCH 04/13] chore: exclude otel thread ctx tests from miri --- libdd-library-config/src/otel_thread_ctx.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index e018ab2d9e..404d5dad2a 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -353,6 +353,8 @@ pub mod linux { } #[cfg(test)] + // Accessing the TLS through C isn't supported in Miri + #[cfg_attr(miri, ignore)] mod tests { use super::{ThreadContext, ThreadContextRecord}; @@ -534,7 +536,6 @@ pub mod linux { // Make sure the C shim is indeed providing a thread-local address. #[test] - #[cfg_attr(miri, ignore)] fn tls_slots_are_per_thread() { use std::sync::{Arc, Barrier}; From 3ed562ab38ed990c8b4dc1bff7fdbddeaddea6d4 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 25 Mar 2026 17:09:00 +0100 Subject: [PATCH 05/13] chore: improve comment Apply suggestion from @ivoanjo Co-authored-by: Ivo Anjo --- libdd-library-config/src/otel_thread_ctx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 404d5dad2a..482a544a89 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -57,8 +57,8 @@ pub mod linux { /// reader runs. `valid` is written with `ptr::write_volatile` so the compiler cannot reorder /// its store past the surrounding field writes: /// - /// - The writer sets `valid = 1` *after* all other fields are populated (publish). /// - The writer sets `valid = 0` *before* modifying fields in-place (modify). + /// - The writer sets `valid = 1` *after* all other fields are populated (publish). #[repr(C)] pub struct ThreadContextRecord { /// 128-bit trace identifier; all-zeroes means "no trace". From b5403b96fafc4c7548092d886e20117789635e72 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 26 Mar 2026 14:46:44 +0100 Subject: [PATCH 06/13] feat(otel-thread-ctx): volatile -> atomic + fences It turns out the previous implementation used volatile to synchronize with the async-signal-handler reader, but the proper device to use is actually atomics + compiler-only fences. This commit updates the model to do that, and get rid of all the `xxx_volatile` details. --- libdd-library-config/src/otel_thread_ctx.rs | 240 +++++++++----------- 1 file changed, 107 insertions(+), 133 deletions(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 482a544a89..a532782bfb 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -12,8 +12,10 @@ //! //! Readers are constrained to the same thread as the writer and operate like async-signal //! handlers: the writer thread is always stopped while a reader runs. There is thus no -//! cross-thread synchronization concerns. The only hazard is compiler reordering or not committing -//! writes to memory, which is handled using volatile writes. +//! cross-thread synchronization concerns. The only hazard is compiler reordering, which is +//! handled by making `valid` atomic and using compiler-only fences (equivalent to C's +//! `atomic_signal_fence`) to keep field writes boxed between the `valid = 0` and `valid = 1` +//! stores during in-place updates. #[cfg(target_os = "linux")] pub mod linux { @@ -22,24 +24,43 @@ pub mod linux { mem, ops::{Deref, DerefMut}, ptr::{self, NonNull}, + sync::atomic::{compiler_fence, AtomicPtr, AtomicU8, Ordering}, }; extern "C" { /// Return the address of the current thread's `custom_labels_current_set_v2` local. + /// + /// **CAUTION**: do not use this directly, always go through [get_tls_slot] to read and + /// write it atomically. fn libdd_get_custom_labels_current_set_v2() -> *mut *mut c_void; } - /// Return a pointer to the TLS slot holding the context. The address calculation requires a - /// call to a C shim in order to use the TLSDESC dialect from Rust. Note that the returned - /// address is stable (per thread), so the result can be reused . + /// Return an atomic view of the TLS slot. The address calculation requires a call to a C shim + /// in order to use the TLSDESC dialect from Rust. The returned address is stable (per thread), + /// so the resulting atomic should be reused whenever possible, to reduce the number of calls + /// to this function. /// - /// Note that the address is read by an external process or a signal handler, so it should be - /// written to using [std::ptr::write_volatile]. - #[allow(clippy::missing_safety_doc)] - fn get_tls_ptr() -> *mut *mut ThreadContextRecord { - // Safety: this is just an FFI call, but there's no particular pre-condition to uphold: the - // TLS slot should always be accessible. - unsafe { libdd_get_custom_labels_current_set_v2().cast() } + /// The slot is read by an async signal handler. Atomic operations should in general use + /// [Odering::Relaxed], but modifications to the record might need additional compiler-only + /// fences (see [ThreadContext::update] for an example). + fn get_tls_slot<'a>() -> &'a AtomicPtr { + const { + assert!( + mem::align_of::>() + == mem::align_of::<*mut ThreadContextRecord>() + ) + } + + // Safety: the const assertion above ensures the alignment is correct. The TLS slot is + // valid for writes during the lifetime of the program. + // + // We forbid direct usage of `libdd_get_custom_labels_current_set_v2`, which guarantees + // that there's never conflicting non-atomic accesses to the TLS slot. + unsafe { + AtomicPtr::from_ptr( + libdd_get_custom_labels_current_set_v2().cast::<*mut ThreadContextRecord>(), + ) + } } /// Maximum size in bytes of the `attrs_data` field. @@ -48,25 +69,32 @@ pub mod linux { /// limit recommended by the spec (the eBPF profiler read limit). pub const MAX_ATTRS_DATA_SIZE: usize = 612; - /// In-memory layout of a thread-level context. The structure MUST match exactly the - /// specification. + /// In-memory layout of a thread-level context. + /// + /// **CAUTION**: The structure MUST match exactly the OTel thread-level context specification. + /// It is read by external, out-of-process code. Do not re-order fields or modify in any way, + /// unless you know exactly what you're doing. /// /// # Synchronization /// - /// Readers are async-signal handlers on the same thread; the writer is always stopped while a - /// reader runs. `valid` is written with `ptr::write_volatile` so the compiler cannot reorder - /// its store past the surrounding field writes: + /// Readers are async-signal handlers. The writer is always stopped while a reader runs. + /// Sharing memory with a signal handler still requires some form of synchronization, which is + /// achieved through atomics and compiler fence, using `valid` and/or the TLS slot as + /// synchronization points. /// - /// - The writer sets `valid = 0` *before* modifying fields in-place (modify). - /// - The writer sets `valid = 1` *after* all other fields are populated (publish). + /// - The writer stores `valid = 0` *before* modifying fields in-place, guarded by a fence. + /// - The writer stores `valid = 1` *after* all fields are populated, guarded by a fence. + /// - `valid` starts at `1` on construction and is never set to `0` except during an in-place + /// update. #[repr(C)] pub struct ThreadContextRecord { /// 128-bit trace identifier; all-zeroes means "no trace". pub trace_id: [u8; 16], /// 64-bit span identifier. pub span_id: [u8; 8], - /// Whether the record is ready/consistent. Written with `ptr::write_volatile`. - pub valid: u8, + /// Whether the record is ready/consistent. Always set to `1` except during in-place update + /// of the current record. + pub valid: AtomicU8, pub _reserved: u8, /// Number of populated bytes in `attrs_data`. pub attrs_data_size: u16, @@ -99,58 +127,6 @@ pub mod linux { record } - /// Write `value` to `self.valid` using a volatile store, preventing the compiler from - /// eliminating or reordering or this write past other volatile writes to the record. - fn write_valid_volatile(&mut self, value: u8) { - // Safety: we have an exclusive borrow of `self`, so `valid` is live and valid for - // writes. - unsafe { ptr::write_volatile(&mut self.valid as *mut u8, value) } - } - - /// Write `trace_id` to `self.trace_id` using a volatile store, preventing the compiler - /// from eliminating or reordering this write past other volatile writes to the record. - pub fn write_trace_id_volatile(&mut self, trace_id: [u8; 16]) { - // Safety: we have an exclusive borrow of `self`, so `trace_id` is live and valid for - // writes. - unsafe { ptr::write_volatile(&mut self.trace_id, trace_id) } - } - - /// Write `span_id` to `self.span_id` using a volatile store, preventing the compiler from - /// eliminating or reordering this write past other volatile writes to the record. - pub fn write_span_id_volatile(&mut self, span_id: [u8; 8]) { - // Safety: we have an exclusive borrow of `self`, so `span_id` is live and valid for - // writes. - unsafe { ptr::write_volatile(&mut self.span_id, span_id) } - } - - /// Write a single byte of the encoded attributes using a volatile store, preventing the - /// compiler from eliminating or reordering or this write past other volatile writes to the - /// record. - /// - /// # Panic - /// - /// Panics if the offset is out of bound. - fn write_attrs_volatile(&mut self, offset: usize, value: u8) { - // Safety: we have an exclusive borrow of `self`, so `attrs_data` is live and valid for - // writes. - unsafe { ptr::write_volatile(&mut self.attrs_data[offset] as *mut u8, value) } - } - - /// Copy `src` into the encoded attributes starting at `offset` using volatile stores, - /// preventing the compiler from eliminating or reordering or this write past other - /// volatile writes to the record. - /// - /// # Panic - /// - /// Panics if `offset + src.len() > self.attrs_data.len()`. - fn copy_attrs_volatile(&mut self, src: &[u8], offset: usize) { - // Safety: we have an exclusive borrow of `self`, so `attrs_data` is live and valid for - // writes. - for (idx, byte) in src.iter().enumerate() { - unsafe { ptr::write_volatile(&mut self.attrs_data[offset + idx] as *mut u8, *byte) } - } - } - /// Encode `attributes` into `record.attrs_data` as packed key-value records. Existing data /// are overridden (and if there were more entires than `attributes.len()`, they aren't /// zeroed, but they will be ignored by readers). @@ -167,15 +143,6 @@ pub mod linux { /// recovery would require us to be able to rollback to the previous attributes which would /// hurt the happy path, or leave the record in a inconsistent state. Another possibility /// would be to error out and reset the record in that situation. - /// - /// # Memory - /// - /// Disclaimer: This note is for internal usage only. As a consumer of this library, please - /// use [ThreadContext::modify] to mutate the currently attached record. - /// - /// Writes are volatile, making `set_attrs` suitable for mutating an attached record - /// in-place (TODO: does this impact the performance of writing to a non-attached record? - /// Should we have two specialized version `set_attrs` and `set_attrs_volatile`?). pub fn set_attrs(&mut self, attributes: &[(u8, &str)]) { let mut offset = 0; @@ -188,10 +155,11 @@ pub mod linux { break; } - self.write_attrs_volatile(offset, key_index); + self.attrs_data[offset] = key_index; // `val_len <= 255` thanks to the `min()` - self.write_attrs_volatile(offset + 1, val_len as u8); - self.copy_attrs_volatile(&val_bytes[..val_len], offset + 2); + self.attrs_data[offset + 1] = val_len as u8; + self.attrs_data[offset + 2..offset + 2 + val_len] + .copy_from_slice(&val_bytes[..val_len]); offset += entry_size; } @@ -205,7 +173,7 @@ pub mod linux { Self { trace_id: [0u8; 16], span_id: [0u8; 8], - valid: 0, + valid: AtomicU8::new(1), _reserved: 0, attrs_data_size: 0, attrs_data: [0u8; MAX_ATTRS_DATA_SIZE], @@ -215,7 +183,7 @@ pub mod linux { /// An owned (and non-moving) thread context record allocation. /// - /// We don't use `Box` under the hood because it excludes aliasing, while we share the context + /// We don't use `Box` under the hood because it precludes aliasing, while we share the context /// to readers through thread-level context and through the FFI. But it is a boxed /// `ThreadContextRecord` for all intent of purpose. pub struct ThreadContext(NonNull); @@ -230,14 +198,12 @@ pub mod linux { /// The pointer must be reconstructed through [`Self::from_raw`] in order to be properly /// dropped, or the record will leak. pub fn into_raw(self) -> *mut ThreadContextRecord { - use mem::ManuallyDrop; - - let mdrop = ManuallyDrop::new(self); + let mdrop = mem::ManuallyDrop::new(self); mdrop.0.as_ptr() } - /// Reconstruct a [ThreadContextRecord] from a raw pointer to `ThreadContextRecord` that is - /// either `null` or comes from [`Self::into_raw`]. Return `None` if `ptr` is null. + /// Reconstruct a [ThreadContextRecord] from a raw pointer that is either `null` or comes + /// from [`Self::into_raw`]. Return `None` if `ptr` is null. /// /// # Safety /// @@ -265,62 +231,68 @@ pub mod linux { } impl ThreadContext { - /// Swap the current context with a pointer value. Return the previously attached context, - /// if any. + /// Atomically swap the current context with a pointer value. Return the previously + /// attached context, if any. fn swap( - slot: *mut *mut ThreadContextRecord, + slot: &AtomicPtr, tgt: *mut ThreadContextRecord, ) -> Option { - unsafe { - // Safety: TLS slot is always live and valid for reads and writes. - let prev = ThreadContext::from_raw(*slot); - ptr::write_volatile(slot, tgt); - // Safety: a non-null value in the slot came from a prior `into_raw` call. - prev - } + // Safety: a non-null value in the slot came from a prior `into_raw` call. + unsafe { ThreadContext::from_raw(slot.swap(tgt, Ordering::Relaxed)) } } /// Publish a new (or previously detached) thread context record by writing its pointer - /// into the TLS slot. Sets `valid = 1` before publishing. Returns the previously attached - /// context, if any. - pub fn attach(mut self) -> Option { - let slot = get_tls_ptr(); - // Set `valid = 1` written before the TLS pointer is updated, so any reader that - // observes the new pointer also observes `valid = 1`. - self.write_valid_volatile(1); - Self::swap(slot, self.into_raw()) + /// into the TLS slot. Returns the previously attached context, if any. + /// + /// `valid` is already `1` since construction, so any reader that observes the new pointer + /// also observes `valid = 1`. + pub fn attach(self) -> Option { + // [^tls-slot-ordering]: since we get back the previous context, we should in principle + // use an `Acquire` compiler fence to make sure we don't get back a not-yet-initiliazed + // record. + // + // However, this thread (excluding the reader signal handler) is the only one to ever + // _write_ to the context, so the store we load the value from automatically + // happens-before (because it's sequenced-before) the swap. + Self::swap(get_tls_slot(), self.into_raw()) } - /// Modify the currently attached record in-place. Sets `valid = 0` before the update and + /// Update the currently attached record in-place. Sets `valid = 0` before the update and /// `valid = 1` after, so a reader that fires between the two writes sees an inconsistent - /// record and skips it. + /// record and skips it. Compiler fences prevent the compiler from reordering field writes + /// outside that window. /// - /// If there's currently no attached context, `modify` will create one, and is in this case + /// If there's currently no attached context, `update` will create one, and is in this case /// equivalent to `ThreadContext::new(trace_id, span_id, attrs).attach()`. pub fn update(trace_id: [u8; 16], span_id: [u8; 8], attrs: &[(u8, &str)]) { - let slot = get_tls_ptr(); + let slot = get_tls_slot(); - // Safety: the tls slot is always valid for reads and writes. - if let Some(current) = unsafe { (*slot).as_mut() } { - current.write_valid_volatile(0); - current.write_trace_id_volatile(trace_id); - current.write_span_id_volatile(span_id); + if let Some(current) = unsafe { slot.load(Ordering::Relaxed).as_mut() } { + current.valid.store(0, Ordering::Relaxed); + compiler_fence(Ordering::SeqCst); + + current.trace_id = trace_id; + current.span_id = span_id; current.set_attrs(attrs); - current.write_valid_volatile(1); + + compiler_fence(Ordering::SeqCst); + current.valid.store(1, Ordering::Relaxed); } else { - let mut ctx = ThreadContext::new(trace_id, span_id, attrs); - ctx.write_valid_volatile(1); - let _ = Self::swap(slot, ctx.into_raw()); + // No need for `AcqRel`, see [^tls-slot-ordering]. + compiler_fence(Ordering::Release); + // `ThreadContext::new` already initialises `valid = 1`. + let _ = Self::swap( + slot, + ThreadContext::new(trace_id, span_id, attrs).into_raw(), + ); } } - /// Detach the current record from the TLS slot. Writes null to the slot, sets `valid = 0` - /// on the detached record, and returns it. + /// Detach the current record from the TLS slot. Writes null to the slot and returns the + /// detached record. pub fn detach() -> Option { - Self::swap(get_tls_ptr(), ptr::null_mut()).map(|mut ctx| { - ctx.write_valid_volatile(0); - ctx - }) + // We don't need any fence here, see [^tls-slot-ordering]. + Self::swap(get_tls_slot(), ptr::null_mut()) } } @@ -356,12 +328,14 @@ pub mod linux { // Accessing the TLS through C isn't supported in Miri #[cfg_attr(miri, ignore)] mod tests { + use std::sync::atomic::Ordering; + use super::{ThreadContext, ThreadContextRecord}; /// Read the TLS pointer for the current thread (the value stored in the TLS slot, not the /// address of the slot itself). fn read_tls_context_ptr() -> *const ThreadContextRecord { - unsafe { *super::get_tls_ptr() as *const ThreadContextRecord } + super::get_tls_slot().load(Ordering::Relaxed) } #[test] @@ -409,7 +383,7 @@ pub mod linux { let record = unsafe { &*ptr }; assert_eq!(record.trace_id, trace_id); assert_eq!(record.span_id, span_id); - assert_eq!(record.valid, 1); + assert_eq!(record.valid.load(Ordering::Relaxed), 1); assert_eq!(record.attrs_data_size, 0); let _ = ThreadContext::detach(); @@ -481,7 +455,7 @@ pub mod linux { let record = unsafe { &*ptr_before }; assert_eq!(record.trace_id, trace_id_1); assert_eq!(record.span_id, span_id_1); - assert_eq!(record.valid, 1); + assert_eq!(record.valid.load(Ordering::Relaxed), 1); assert_eq!(record.attrs_data[0], 0); assert_eq!(record.attrs_data[1], 2); assert_eq!(&record.attrs_data[2..4], b"v1"); @@ -497,7 +471,7 @@ pub mod linux { let record = unsafe { &*ptr_after }; assert_eq!(record.trace_id, trace_id_2); assert_eq!(record.span_id, span_id_2); - assert_eq!(record.valid, 1); + assert_eq!(record.valid.load(Ordering::Relaxed), 1); assert_eq!(record.attrs_data[0], 0); assert_eq!(record.attrs_data[1], 2); assert_eq!(&record.attrs_data[2..4], b"v2"); From 6802b1de2b3fe596ec8845d2c07da6b8af10cc2f Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 26 Mar 2026 14:55:05 +0100 Subject: [PATCH 07/13] chore: indicative return value for set_attrs --- libdd-library-config/src/otel_thread_ctx.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index a532782bfb..d95b2a677d 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -131,6 +131,11 @@ pub mod linux { /// are overridden (and if there were more entires than `attributes.len()`, they aren't /// zeroed, but they will be ignored by readers). /// + /// # Return + /// + /// Returns `true` if all attributes were properly encoded, or `false` if some of the data + /// needed to be dropped. See Size limits below. + /// /// # Arguments /// /// Each input entry is a pair of a 1-byte `key_index` and a string value. @@ -143,15 +148,23 @@ pub mod linux { /// recovery would require us to be able to rollback to the previous attributes which would /// hurt the happy path, or leave the record in a inconsistent state. Another possibility /// would be to error out and reset the record in that situation. - pub fn set_attrs(&mut self, attributes: &[(u8, &str)]) { + pub fn set_attrs(&mut self, attributes: &[(u8, &str)]) -> bool { let mut offset = 0; + let mut fully_encoded = true; for &(key_index, val) in attributes { let val_bytes = val.as_bytes(); - let val_len = val_bytes.len().min(255); + let val_len = val_bytes.len(); + let val_len = if val_len > 255 { + fully_encoded = false; + 255 + } else { + val_len + }; let entry_size = 2 + val_len; if offset + entry_size > MAX_ATTRS_DATA_SIZE { + fully_encoded = false; break; } @@ -165,6 +178,7 @@ pub mod linux { // `offset < MAX_ATTRS_DATA_SIZE`, which guarantees it fits in a `u16` self.attrs_data_size = offset as u16; + fully_encoded } } From c72399eae5f9f3010dfb5dda5eac8bfb6e164ff7 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 26 Mar 2026 15:05:02 +0100 Subject: [PATCH 08/13] chore: insist on ctx not being thread-safe --- libdd-library-config/src/otel_thread_ctx.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index d95b2a677d..26f5028d36 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -200,6 +200,9 @@ pub mod linux { /// We don't use `Box` under the hood because it precludes aliasing, while we share the context /// to readers through thread-level context and through the FFI. But it is a boxed /// `ThreadContextRecord` for all intent of purpose. + /// + /// The context is `!Send` and `!Sync`; it is supposed to stay on the same thread and is thus + /// not thread-safe. pub struct ThreadContext(NonNull); impl ThreadContext { From 2e4a5dae92fce2923a7f690f4ce0da48cd926318 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 26 Mar 2026 15:12:47 +0100 Subject: [PATCH 09/13] chore: make ThreadContextRecord private --- libdd-library-config/src/otel_thread_ctx.rs | 57 ++++++++------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 26f5028d36..6ca2830d44 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -22,7 +22,6 @@ pub mod linux { use std::{ ffi::c_void, mem, - ops::{Deref, DerefMut}, ptr::{self, NonNull}, sync::atomic::{compiler_fence, AtomicPtr, AtomicU8, Ordering}, }; @@ -87,17 +86,17 @@ pub mod linux { /// - `valid` starts at `1` on construction and is never set to `0` except during an in-place /// update. #[repr(C)] - pub struct ThreadContextRecord { + struct ThreadContextRecord { /// 128-bit trace identifier; all-zeroes means "no trace". - pub trace_id: [u8; 16], + trace_id: [u8; 16], /// 64-bit span identifier. - pub span_id: [u8; 8], + span_id: [u8; 8], /// Whether the record is ready/consistent. Always set to `1` except during in-place update /// of the current record. - pub valid: AtomicU8, - pub _reserved: u8, + valid: AtomicU8, + _reserved: u8, /// Number of populated bytes in `attrs_data`. - pub attrs_data_size: u16, + attrs_data_size: u16, /// Packed variable-length key-value records. /// /// It's a contiguous list of blocks with layout: @@ -112,7 +111,7 @@ pub mod linux { /// hundred bytes per thread, but it guarantees that we can modify the context in-place /// without (re)allocation in the hot path. Having a hybrid scheme (starting smaller and /// resizing up a few times) is not out of the question. - pub attrs_data: [u8; MAX_ATTRS_DATA_SIZE], + attrs_data: [u8; MAX_ATTRS_DATA_SIZE], } impl ThreadContextRecord { @@ -187,6 +186,7 @@ pub mod linux { Self { trace_id: [0u8; 16], span_id: [0u8; 8], + // We only ever set `valid` to `0` during in-place update of an attached context. valid: AtomicU8::new(1), _reserved: 0, attrs_data_size: 0, @@ -214,7 +214,7 @@ pub mod linux { /// Turn this thread context into a raw pointer to the underlying [ThreadContextRecord]. /// The pointer must be reconstructed through [`Self::from_raw`] in order to be properly /// dropped, or the record will leak. - pub fn into_raw(self) -> *mut ThreadContextRecord { + fn into_raw(self) -> *mut ThreadContextRecord { let mdrop = mem::ManuallyDrop::new(self); mdrop.0.as_ptr() } @@ -229,7 +229,7 @@ pub mod linux { /// calls on the returned [ThreadContextRecord]. More precisely, mutable references might /// be reconstructed during those calls, so any constraint from either Stacked Borrows, /// Tree Borrows or whatever is the current aliasing model implemented in Miri applies. - pub unsafe fn from_raw(ptr: *mut ThreadContextRecord) -> Option { + unsafe fn from_raw(ptr: *mut ThreadContextRecord) -> Option { NonNull::new(ptr).map(Self) } } @@ -323,24 +323,6 @@ pub mod linux { } } - impl Deref for ThreadContext { - type Target = ThreadContextRecord; - - fn deref(&self) -> &Self::Target { - // Safety: `ThreadContext` represents ownership of a valid, alive - // `ThreadContextRecord`. - unsafe { self.0.as_ref() } - } - } - - impl DerefMut for ThreadContext { - fn deref_mut(&mut self) -> &mut Self::Target { - // Safety: `ThreadContext` represents ownership of a valid, alive - // `ThreadContextRecord`. - unsafe { self.0.as_mut() } - } - } - #[cfg(test)] // Accessing the TLS through C isn't supported in Miri #[cfg_attr(miri, ignore)] @@ -371,14 +353,17 @@ pub mod linux { ); let prev = ThreadContext::detach().unwrap(); - assert!( - prev.trace_id == trace_id, - "got back a different trace_id than attached" - ); - assert!( - prev.span_id == span_id, - "got back a different span_id than attached" - ); + + unsafe { + assert!( + prev.0.as_ref().trace_id == trace_id, + "got back a different trace_id than attached" + ); + assert!( + prev.0.as_ref().span_id == span_id, + "got back a different span_id than attached" + ); + } assert!( read_tls_context_ptr().is_null(), From 98d5cad665cc524a2d210a2ce60ee10477e4f9f3 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 27 Mar 2026 11:28:31 +0100 Subject: [PATCH 10/13] chore: assert total size of thread ctx is the expected one --- libdd-library-config/src/otel_thread_ctx.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 6ca2830d44..a162b3efca 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -85,11 +85,16 @@ pub mod linux { /// - The writer stores `valid = 1` *after* all fields are populated, guarded by a fence. /// - `valid` starts at `1` on construction and is never set to `0` except during an in-place /// update. + // Note: we don't need to make this struct packed, because it's already designed to avoid + // padding. Moreover, doing so would make it 1-aligned, potentially making access to + // `attrs_data_size` unaligned and thus slower, and prevent us from using `AtomicU8` for + // `valid`. We just use a const assertion in `new()` to surprises and make sure this struct has + // the right total size. #[repr(C)] struct ThreadContextRecord { - /// 128-bit trace identifier; all-zeroes means "no trace". + /// Trace identifier; all-zeroes means "no trace". trace_id: [u8; 16], - /// 64-bit span identifier. + /// Span identifier. span_id: [u8; 8], /// Whether the record is ready/consistent. Always set to `1` except during in-place update /// of the current record. @@ -117,6 +122,8 @@ pub mod linux { impl ThreadContextRecord { /// Build a record with the given trace id, span id and attributes. pub fn new(trace_id: [u8; 16], span_id: [u8; 8], attrs: &[(u8, &str)]) -> Self { + const { assert!(size_of::() == 640) } + let mut record = Self { trace_id, span_id, From 113036cce881feddde22c194e433415a48a2f679 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 27 Mar 2026 11:30:13 +0100 Subject: [PATCH 11/13] chore: add zeroing precision in comment --- libdd-library-config/src/otel_thread_ctx.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index a162b3efca..15ef466c23 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -182,7 +182,9 @@ pub mod linux { offset += entry_size; } - // `offset < MAX_ATTRS_DATA_SIZE`, which guarantees it fits in a `u16` + // `offset < MAX_ATTRS_DATA_SIZE`, which guarantees it fits in a `u16`. This also + // effectively hide the remaining of the previous `attrs` bytes, so we don't have to + // zero them. self.attrs_data_size = offset as u16; fully_encoded } From e8655c15423d5c158d3dc949a2c2b3b0c4043536 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 27 Mar 2026 16:55:54 +0100 Subject: [PATCH 12/13] chore: add example usage of otel thread ctx --- libdd-library-config/src/otel_thread_ctx.rs | 47 +++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-library-config/src/otel_thread_ctx.rs index 15ef466c23..f14d2fda34 100644 --- a/libdd-library-config/src/otel_thread_ctx.rs +++ b/libdd-library-config/src/otel_thread_ctx.rs @@ -8,6 +8,53 @@ //! Since `rustc` doesn't currently support the TLSDESC dialect, we use a C shim to set and get the //! thread-local storage used for the context. //! +//! ## Usage +//! +//! There are two main patterns for publishing and updating thread contexts. +//! +//! ### In-place update +//! +//! The simplest pattern, when applicable, is to attach one record and then mutate it in place. +//! This avoid allocation in the hot path. +//! +//! ```ignore +//! #use libdd_library_config::otel_thread_ctx::linux::ThreadContext; +//! +//! let trace_id = [0u8; 16]; +//! let span_id = [1u8; 8]; +//! +//! // First call allocates a record and attaches it. +//! ThreadContext::new(trace_id, span_id, &[(0, "first")]).attach(); +//! ThreadContext::update(trace_id, span_id, &[(0, "second")]); +//! ThreadContext::detach(); +//! ``` +//! +//! ### Swapping +//! +//! Swapping can be used when it's beneficial to pre-allocate or keep around a bunch of contexts to +//! be saved and restored repeatedly. Could be the case with async-runtimes were several tasks +//! might run on the same thread, or even move from one thread to another, for example. +//! +//! ```ignore +//! #use libdd_library_config::otel_thread_ctx::linux::ThreadContext; +//! +//! let trace_id = [0u8; 16]; +//! let span_id = [1u8; 8]; +//! let attrs: &[(u8, &str)] = &[(0, "GET"), (1, "/api/v1")]; +//! +//! // Publish a new context and save the previously attached one (if any). +//! let ctx = ThreadContext::new(trace_id, span_id, attrs); +//! let previous = ctx.attach(); +//! +//! // ... do work inside the span ... +//! +//! // Restore the previous context: detach the current one and re-attach the saved one. +//! if let Some(prev) = previous { +//! // here we drop `ctx`, but we could store for later usage +//! let _ = prev.attach(); +//! } +//! ``` +//! //! ## Synchronization //! //! Readers are constrained to the same thread as the writer and operate like async-signal From ce8e143885a0daf6bd8273f7a1e98e6056493308 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 27 Mar 2026 17:04:29 +0100 Subject: [PATCH 13/13] refactor: move otel_thread_ctx to libdd-profiling The module and its C shim (tls_shim.c) logically belong in the profiling crate rather than library-config. Move otel_thread_ctx.rs and tls_shim.c to libdd-profiling/src/, wire up the cc build step in its build.rs, and restore libdd-library-config to its pre-PR state. Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 2 +- libdd-library-config/Cargo.toml | 3 --- libdd-library-config/build.rs | 20 ------------------- libdd-library-config/src/lib.rs | 1 - libdd-profiling/Cargo.toml | 1 + libdd-profiling/build.rs | 16 +++++++++++++++ libdd-profiling/src/lib.rs | 1 + .../src/otel_thread_ctx.rs | 0 .../src/tls_shim.c | 0 9 files changed, 19 insertions(+), 25 deletions(-) delete mode 100644 libdd-library-config/build.rs rename {libdd-library-config => libdd-profiling}/src/otel_thread_ctx.rs (100%) rename {libdd-library-config => libdd-profiling}/src/tls_shim.c (100%) diff --git a/Cargo.lock b/Cargo.lock index b29f9599ba..1d3de5f9aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3039,7 +3039,6 @@ name = "libdd-library-config" version = "1.1.0" dependencies = [ "anyhow", - "cc", "libdd-trace-protobuf", "memfd", "prost", @@ -3105,6 +3104,7 @@ dependencies = [ "bolero", "byteorder", "bytes", + "cc", "chrono", "criterion", "crossbeam-channel", diff --git a/libdd-library-config/Cargo.toml b/libdd-library-config/Cargo.toml index ae287abf22..8c2f2e35a8 100644 --- a/libdd-library-config/Cargo.toml +++ b/libdd-library-config/Cargo.toml @@ -26,9 +26,6 @@ rmp-serde = "1.3.0" libdd-trace-protobuf = { version = "3.0.0", path = "../libdd-trace-protobuf" } -[build-dependencies] -cc = "1.1.31" - [dev-dependencies] tempfile = { version = "3.3" } serial_test = "3.2" diff --git a/libdd-library-config/build.rs b/libdd-library-config/build.rs deleted file mode 100644 index 4fa8078d52..0000000000 --- a/libdd-library-config/build.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed under the Apache -// License Version 2.0. This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. - -fn main() { - // Only compile the TLS shim on Linux; the thread-level context feature is Linux-only. - #[cfg(target_os = "linux")] - { - let mut build = cc::Build::new(); - - // - On aarch64, TLSDESC is already the only dynamic TLS model so no flag is needed. - // - On x86-64, we use `-mtls-dialect=gnu2` (supported since GCC 4.4 and Clang 19+) to force - // the use of TLSDESC as mandated by the spec. If it's not supported, this build will - // fail. - #[cfg(target_arch = "x86_64")] - build.flag("-mtls-dialect=gnu2"); - - build.file("src/tls_shim.c").compile("tls_shim"); - println!("cargo:rerun-if-changed=src/tls_shim.c"); - } -} diff --git a/libdd-library-config/src/lib.rs b/libdd-library-config/src/lib.rs index 7262511250..f243678c09 100644 --- a/libdd-library-config/src/lib.rs +++ b/libdd-library-config/src/lib.rs @@ -1,7 +1,6 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 pub mod otel_process_ctx; -pub mod otel_thread_ctx; pub mod tracer_metadata; use std::borrow::Cow; diff --git a/libdd-profiling/Cargo.toml b/libdd-profiling/Cargo.toml index 731bef4ff5..37db386fcb 100644 --- a/libdd-profiling/Cargo.toml +++ b/libdd-profiling/Cargo.toml @@ -81,4 +81,5 @@ strum = { version = "0.26", features = ["derive"] } tempfile = "3" [build-dependencies] +cc = "1.1.31" cxx-build = { version = "1.0", optional = true } diff --git a/libdd-profiling/build.rs b/libdd-profiling/build.rs index b32dff0f98..6a67b55d82 100644 --- a/libdd-profiling/build.rs +++ b/libdd-profiling/build.rs @@ -11,4 +11,20 @@ fn main() { println!("cargo:rerun-if-changed=src/cxx.rs"); } + + // Only compile the TLS shim on Linux; the thread-level context feature is Linux-only. + #[cfg(target_os = "linux")] + { + let mut build = cc::Build::new(); + + // - On aarch64, TLSDESC is already the only dynamic TLS model so no flag is needed. + // - On x86-64, we use `-mtls-dialect=gnu2` (supported since GCC 4.4 and Clang 19+) to force + // the use of TLSDESC as mandated by the spec. If it's not supported, this build will + // fail. + #[cfg(target_arch = "x86_64")] + build.flag("-mtls-dialect=gnu2"); + + build.file("src/tls_shim.c").compile("tls_shim"); + println!("cargo:rerun-if-changed=src/tls_shim.c"); + } } diff --git a/libdd-profiling/src/lib.rs b/libdd-profiling/src/lib.rs index b9d7c1df1a..bf02565b90 100644 --- a/libdd-profiling/src/lib.rs +++ b/libdd-profiling/src/lib.rs @@ -14,5 +14,6 @@ pub mod cxx; pub mod exporter; pub mod internal; pub mod iter; +pub mod otel_thread_ctx; pub mod pprof; pub mod profiles; diff --git a/libdd-library-config/src/otel_thread_ctx.rs b/libdd-profiling/src/otel_thread_ctx.rs similarity index 100% rename from libdd-library-config/src/otel_thread_ctx.rs rename to libdd-profiling/src/otel_thread_ctx.rs diff --git a/libdd-library-config/src/tls_shim.c b/libdd-profiling/src/tls_shim.c similarity index 100% rename from libdd-library-config/src/tls_shim.c rename to libdd-profiling/src/tls_shim.c