From 4b711e03057d31559f7ae86aa373900582f4b65a Mon Sep 17 00:00:00 2001 From: Avi Cohen Date: Thu, 14 May 2026 10:43:04 +0300 Subject: [PATCH 1/2] add run_with_libfunc_profile + AotWithProgram variant for ContractExecutor Exposes the libfunc-profiling primitives that downstream consumers (e.g. the blockifier in starkware-libs/sequencer) currently maintain locally. Profile collection is callback-driven so the per-call key (tx hash, etc.) stays out of cairo-native. - metadata::profiler::Profile is now pub (was a private type alias). - AotContractExecutor::run_with_libfunc_profile (gated on with-libfunc-profiling, in new file src/executor/libfunc_profile.rs) wraps run: allocates a unique trace ID, points the executor's cairo_native__profiler__profile_id symbol at it, drains the resulting Profile after run returns, and hands it to a caller-supplied FnOnce(Profile). A ProfilerGuard restores the previous trace ID and drops the LIBFUNC_PROFILE slot on both the success and unwind paths. - ContractExecutor::AotWithProgram(AotWithProgram { executor, program }) is a new variant that bundles an AOT executor with the Sierra program it was built from. From is provided. - ContractExecutor::run dispatches the new variant via run_with_libfunc_profile with a no-op profile callback. - ContractExecutor::run_with_profile is the profile-capturing counterpart of run; for non-AotWithProgram variants it falls through to run (callback never fires). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/executor.rs | 8 ++- src/executor/contract_executor.rs | 70 ++++++++++++++++++- src/executor/libfunc_profile.rs | 111 ++++++++++++++++++++++++++++++ src/metadata/profiler.rs | 2 +- 4 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 src/executor/libfunc_profile.rs diff --git a/src/executor.rs b/src/executor.rs index 48c50722e3..1fbfe3d507 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -3,12 +3,14 @@ //! This module provides methods to execute the programs, either via JIT or compiled ahead //! of time. It also provides a cache to avoid recompiling previously compiled programs. -#[cfg(feature = "sierra-emu")] -pub use self::contract_executor::EmuContractInfo; pub use self::{ aot::AotNativeExecutor, contract::AotContractExecutor, contract_executor::ContractExecutor, jit::JitNativeExecutor, }; +#[cfg(feature = "sierra-emu")] +pub use self::contract_executor::EmuContractInfo; +#[cfg(feature = "with-libfunc-profiling")] +pub use self::contract_executor::AotWithProgram; use crate::{ arch::{AbiArgument, ValueWithInfoWrapper}, error::{panic::ToNativeAssertError, Error}, @@ -46,6 +48,8 @@ mod aot; mod contract; mod contract_executor; mod jit; +#[cfg(feature = "with-libfunc-profiling")] +mod libfunc_profile; #[cfg(target_arch = "aarch64")] global_asm!(include_str!("arch/aarch64.s")); diff --git a/src/executor/contract_executor.rs b/src/executor/contract_executor.rs index 32854ec1ef..4ecbe32a0a 100644 --- a/src/executor/contract_executor.rs +++ b/src/executor/contract_executor.rs @@ -5,14 +5,14 @@ //! `H: StarknetSyscallHandler` — sierra-emu and cairo-native re-export the trait from //! `cairo-native-syscalls`, so no adapter is needed. -#[cfg(feature = "sierra-emu")] +#[cfg(any(feature = "sierra-emu", feature = "with-libfunc-profiling"))] use cairo_lang_sierra::program::Program; #[cfg(feature = "sierra-emu")] use cairo_lang_starknet_classes::compiler_version::VersionId; #[cfg(feature = "sierra-emu")] use cairo_lang_starknet_classes::contract_class::ContractEntryPoints; use starknet_types_core::felt::Felt; -#[cfg(feature = "sierra-emu")] +#[cfg(any(feature = "sierra-emu", feature = "with-libfunc-profiling"))] use std::sync::Arc; #[cfg(feature = "sierra-emu")] @@ -20,6 +20,8 @@ use crate::error::Error; use crate::error::Result; use crate::execution_result::ContractExecutionResult; use crate::executor::AotContractExecutor; +#[cfg(feature = "with-libfunc-profiling")] +use crate::metadata::profiler::Profile; use crate::starknet::StarknetSyscallHandler; use crate::utils::BuiltinCosts; @@ -33,6 +35,8 @@ pub enum ContractExecutor { Aot(AotContractExecutor), #[cfg(feature = "sierra-emu")] Emu(EmuContractInfo), + #[cfg(feature = "with-libfunc-profiling")] + AotWithProgram(AotWithProgram), } /// Inputs required to construct a `sierra_emu::VirtualMachine` for the `Emu` variant. @@ -44,6 +48,16 @@ pub struct EmuContractInfo { pub sierra_version: VersionId, } +/// AOT executor paired with the Sierra program it was built from. Required by +/// [`ContractExecutor::run_with_profile`] so libfunc samples can be resolved against +/// the program's declarations. +#[cfg(feature = "with-libfunc-profiling")] +#[derive(Debug)] +pub struct AotWithProgram { + pub executor: AotContractExecutor, + pub program: Arc, +} + impl From for ContractExecutor { fn from(value: AotContractExecutor) -> Self { Self::Aot(value) @@ -57,6 +71,13 @@ impl From for ContractExecutor { } } +#[cfg(feature = "with-libfunc-profiling")] +impl From for ContractExecutor { + fn from(value: AotWithProgram) -> Self { + Self::AotWithProgram(value) + } +} + impl ContractExecutor { /// Run the contract entry point identified by `selector`. /// @@ -105,6 +126,51 @@ impl ContractExecutor { builtin_stats: Default::default(), }) } + #[cfg(feature = "with-libfunc-profiling")] + ContractExecutor::AotWithProgram(AotWithProgram { executor, program }) => executor + .run_with_libfunc_profile( + program, + selector, + args, + gas, + builtin_costs, + syscall_handler, + // Profile is collected and dropped on this path. Use + // `run_with_profile` to capture it. + |_profile| {}, + ), + } + } + + /// Like [`Self::run`] but, for the `AotWithProgram` variant, hands the captured + /// libfunc profile to `on_profile` after the call returns successfully. For other + /// variants this is identical to `run` and `on_profile` is never invoked. + #[cfg(feature = "with-libfunc-profiling")] + pub fn run_with_profile( + &self, + selector: Felt, + args: &[Felt], + gas: u64, + builtin_costs: Option, + syscall_handler: H, + on_profile: F, + ) -> Result + where + H: StarknetSyscallHandler, + F: FnOnce(Profile), + { + match self { + ContractExecutor::AotWithProgram(AotWithProgram { executor, program }) => executor + .run_with_libfunc_profile( + program, + selector, + args, + gas, + builtin_costs, + syscall_handler, + on_profile, + ), + _ => self.run(selector, args, gas, builtin_costs, syscall_handler), } } } diff --git a/src/executor/libfunc_profile.rs b/src/executor/libfunc_profile.rs new file mode 100644 index 0000000000..100c33c877 --- /dev/null +++ b/src/executor/libfunc_profile.rs @@ -0,0 +1,111 @@ +//! Profiling-instrumented run wrapper around [`AotContractExecutor::run`]. +//! +//! Available under the `with-libfunc-profiling` feature (gated at the `mod` +//! declaration in `src/executor.rs`). + +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + +use cairo_lang_sierra::program::Program; +use starknet_types_core::felt::Felt; + +use crate::error::Result; +use crate::execution_result::ContractExecutionResult; +use crate::executor::AotContractExecutor; +use crate::metadata::profiler::{Profile, ProfilerBinding, ProfilerImpl, LIBFUNC_PROFILE}; +use crate::starknet::StarknetSyscallHandler; +use crate::utils::BuiltinCosts; + +impl AotContractExecutor { + /// Run the entrypoint with libfunc-level profiling instrumentation. + /// + /// Wraps [`AotContractExecutor::run`] with the bookkeeping the + /// `with-libfunc-profiling` runtime needs: + /// + /// 1. Allocates a unique trace ID and inserts an empty `ProfilerImpl` slot in + /// [`LIBFUNC_PROFILE`]. + /// 2. Points the executor's `cairo_native__profiler__profile_id` symbol at the new + /// trace ID, saving the previous value. + /// 3. Calls `run`. Per-statement samples accumulate in the slot via the runtime + /// `push_stmt` callback. + /// 4. Drains the slot, calls [`ProfilerImpl::get_profile`] with `program`, and hands + /// the resulting [`Profile`] to `on_profile`. + /// 5. A [`ProfilerGuard`] restores the previous trace ID — and removes the slot if + /// the success path didn't — on both success and unwind paths. + /// + /// `program` must be the Sierra program this executor was compiled from; it's used + /// by `get_profile` to map runtime libfunc IDs back to declarations. + /// + /// Profiling is intended to run single-threaded; concurrent calls would race on the + /// global `trace_id` symbol. + pub fn run_with_libfunc_profile( + &self, + program: &Arc, + selector: Felt, + args: &[Felt], + gas: u64, + builtin_costs: Option, + syscall_handler: H, + on_profile: F, + ) -> Result + where + H: StarknetSyscallHandler, + F: FnOnce(Profile), + { + static COUNTER: AtomicU64 = AtomicU64::new(0); + let counter = COUNTER.fetch_add(1, Ordering::Relaxed); + + LIBFUNC_PROFILE + .lock() + .unwrap() + .insert(counter, ProfilerImpl::new()); + + // The pointer targets a global symbol in the executor's shared library; it lives + // for the executor's lifetime. Single-threaded profiling means no concurrent writer. + let trace_id_ptr = self + .find_symbol_ptr(ProfilerBinding::ProfileId.symbol()) + .unwrap() + .cast::(); + // SAFETY: see above. Read/write to a non-null, properly-aligned `*mut u64`. + let old_trace_id = unsafe { *trace_id_ptr }; + unsafe { + *trace_id_ptr = counter; + } + + // Restore on the success path AND on unwind. On success the caller drains the + // slot below; the guard's `remove` is then a no-op. + let _guard = ProfilerGuard { + trace_id_ptr, + old_trace_id, + counter, + }; + + let result = self.run(selector, args, gas, builtin_costs, syscall_handler); + + let profiler = LIBFUNC_PROFILE.lock().unwrap().remove(&counter).unwrap(); + on_profile(profiler.get_profile(program)); + + result + } +} + +/// RAII cleanup for the profiler globals. Restores `*trace_id_ptr` and drops the +/// `LIBFUNC_PROFILE` slot at `counter` if it's still occupied. +struct ProfilerGuard { + trace_id_ptr: *mut u64, + old_trace_id: u64, + counter: u64, +} + +impl Drop for ProfilerGuard { + fn drop(&mut self) { + // SAFETY: same provenance as the construction site; single-threaded use. + unsafe { + *self.trace_id_ptr = self.old_trace_id; + } + // Tolerate a poisoned mutex silently — Drop must not panic. + if let Ok(mut profile) = LIBFUNC_PROFILE.lock() { + profile.remove(&self.counter); + } + } +} diff --git a/src/metadata/profiler.rs b/src/metadata/profiler.rs index feb96daa31..8e58b4b0e6 100644 --- a/src/metadata/profiler.rs +++ b/src/metadata/profiler.rs @@ -330,7 +330,7 @@ impl ProfilerMeta { /// Represents the entire profile of the execution. /// /// It maps the libfunc ID to a libfunc profile. -type Profile = HashMap; +pub type Profile = HashMap; /// Represents the profile data for a particular libfunc. #[derive(Default)] From 94bc24601f2079e15d8a40e4a3e88471fed1d9bd Mon Sep 17 00:00:00 2001 From: Avi Cohen Date: Thu, 14 May 2026 15:25:46 +0300 Subject: [PATCH 2/2] serialize concurrent profiling, propagate missing-symbol as error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two correctness fixes to run_with_libfunc_profile, both inherited from the source branch (lambdaclass/sequencer:libfunc_profiling_support): 1. Race condition. The trace_id global symbol that drives sample routing is process-wide; concurrent calls non-atomically write to the same address, and the second caller's ProfilerGuard clobbers the first's saved old_trace_id. Add a process-wide PROFILE_LOCK (Mutex<()>) and hold it for the entire call. Lock poisoning is recovered — the protected state is the symbol itself, and inner_into() is safe to reuse. 2. Error paths. find_symbol_ptr().unwrap() panicked when the .so was compiled without libfunc-profiling instrumentation; the LIBFUNC_PROFILE slot inserted earlier was leaked. Reorder: look up the symbol first, convert None to Error::UnexpectedValue, then insert the slot. The trailing drain now only invokes on_profile when the inner run succeeded — a partial profile from an aborted run isn't meaningful. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/executor.rs | 8 +-- src/executor/libfunc_profile.rs | 98 +++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/executor.rs b/src/executor.rs index 1fbfe3d507..b2601c6a8f 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -3,14 +3,14 @@ //! This module provides methods to execute the programs, either via JIT or compiled ahead //! of time. It also provides a cache to avoid recompiling previously compiled programs. +#[cfg(feature = "with-libfunc-profiling")] +pub use self::contract_executor::AotWithProgram; +#[cfg(feature = "sierra-emu")] +pub use self::contract_executor::EmuContractInfo; pub use self::{ aot::AotNativeExecutor, contract::AotContractExecutor, contract_executor::ContractExecutor, jit::JitNativeExecutor, }; -#[cfg(feature = "sierra-emu")] -pub use self::contract_executor::EmuContractInfo; -#[cfg(feature = "with-libfunc-profiling")] -pub use self::contract_executor::AotWithProgram; use crate::{ arch::{AbiArgument, ValueWithInfoWrapper}, error::{panic::ToNativeAssertError, Error}, diff --git a/src/executor/libfunc_profile.rs b/src/executor/libfunc_profile.rs index 100c33c877..110c4af821 100644 --- a/src/executor/libfunc_profile.rs +++ b/src/executor/libfunc_profile.rs @@ -4,40 +4,48 @@ //! declaration in `src/executor.rs`). use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use cairo_lang_sierra::program::Program; use starknet_types_core::felt::Felt; -use crate::error::Result; +use crate::error::{Error, Result}; use crate::execution_result::ContractExecutionResult; use crate::executor::AotContractExecutor; use crate::metadata::profiler::{Profile, ProfilerBinding, ProfilerImpl, LIBFUNC_PROFILE}; use crate::starknet::StarknetSyscallHandler; use crate::utils::BuiltinCosts; +/// Process-wide lock that serializes calls into [`AotContractExecutor::run_with_libfunc_profile`]. +/// The profiler hot-swaps a process-global symbol (`cairo_native__profiler__profile_id`); +/// concurrent callers would race on that write and on the [`LIBFUNC_PROFILE`] slot bookkeeping. +static PROFILE_LOCK: Mutex<()> = Mutex::new(()); + impl AotContractExecutor { /// Run the entrypoint with libfunc-level profiling instrumentation. /// /// Wraps [`AotContractExecutor::run`] with the bookkeeping the /// `with-libfunc-profiling` runtime needs: /// - /// 1. Allocates a unique trace ID and inserts an empty `ProfilerImpl` slot in - /// [`LIBFUNC_PROFILE`]. - /// 2. Points the executor's `cairo_native__profiler__profile_id` symbol at the new - /// trace ID, saving the previous value. - /// 3. Calls `run`. Per-statement samples accumulate in the slot via the runtime + /// 1. Acquires [`PROFILE_LOCK`] so concurrent profile calls serialize on the + /// global trace-id symbol. The lock is recovered if poisoned. + /// 2. Looks up the executor's `cairo_native__profiler__profile_id` symbol. If + /// absent (the .so was compiled without profiling instrumentation) the call + /// returns an error before touching any global state. + /// 3. Allocates a unique trace ID and inserts an empty `ProfilerImpl` slot in + /// [`LIBFUNC_PROFILE`]; points the profile-id symbol at the new ID, saving + /// the previous value. + /// 4. Calls `run`. Per-statement samples accumulate in the slot via the runtime /// `push_stmt` callback. - /// 4. Drains the slot, calls [`ProfilerImpl::get_profile`] with `program`, and hands - /// the resulting [`Profile`] to `on_profile`. - /// 5. A [`ProfilerGuard`] restores the previous trace ID — and removes the slot if - /// the success path didn't — on both success and unwind paths. + /// 5. Drains the slot. On success (and only on success) hands the resulting + /// [`Profile`] to `on_profile`; on failure the callback is not invoked + /// (partial profiles aren't meaningful). + /// 6. A [`ProfilerGuard`] restores the previous trace ID and clears the slot on + /// both the success and unwind paths. /// /// `program` must be the Sierra program this executor was compiled from; it's used /// by `get_profile` to map runtime libfunc IDs back to declarations. - /// - /// Profiling is intended to run single-threaded; concurrent calls would race on the - /// global `trace_id` symbol. + #[allow(clippy::too_many_arguments)] pub fn run_with_libfunc_profile( &self, program: &Arc, @@ -52,28 +60,42 @@ impl AotContractExecutor { H: StarknetSyscallHandler, F: FnOnce(Profile), { + // Serialize against concurrent profile calls. Recover from a poisoned lock — + // we don't have invariants on the protected state itself; the lock only gates + // access to the global trace-id symbol. + let _profile_lock = PROFILE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + + // Look up the profile-id symbol before touching any global state. If the + // executor wasn't compiled with libfunc-profiling instrumentation, the + // symbol is absent — return a typed error rather than panicking. + let trace_id_ptr = self + .find_symbol_ptr(ProfilerBinding::ProfileId.symbol()) + .ok_or_else(|| { + Error::UnexpectedValue(format!( + "AOT executor missing libfunc-profiling symbol `{}`; \ + was the program compiled with libfunc-profiling enabled?", + ProfilerBinding::ProfileId.symbol() + )) + })? + .cast::(); + static COUNTER: AtomicU64 = AtomicU64::new(0); let counter = COUNTER.fetch_add(1, Ordering::Relaxed); LIBFUNC_PROFILE .lock() - .unwrap() + .unwrap_or_else(|e| e.into_inner()) .insert(counter, ProfilerImpl::new()); - // The pointer targets a global symbol in the executor's shared library; it lives - // for the executor's lifetime. Single-threaded profiling means no concurrent writer. - let trace_id_ptr = self - .find_symbol_ptr(ProfilerBinding::ProfileId.symbol()) - .unwrap() - .cast::(); - // SAFETY: see above. Read/write to a non-null, properly-aligned `*mut u64`. + // SAFETY: the pointer targets a memref-global emitted into the executor's + // shared library; the executor outlives the call. `PROFILE_LOCK` serializes + // us against any other writer, and the JIT/AOT code reads through the same + // address. Reads/writes are aligned `u64`s. let old_trace_id = unsafe { *trace_id_ptr }; unsafe { *trace_id_ptr = counter; } - // Restore on the success path AND on unwind. On success the caller drains the - // slot below; the guard's `remove` is then a no-op. let _guard = ProfilerGuard { trace_id_ptr, old_trace_id, @@ -82,15 +104,27 @@ impl AotContractExecutor { let result = self.run(selector, args, gas, builtin_costs, syscall_handler); - let profiler = LIBFUNC_PROFILE.lock().unwrap().remove(&counter).unwrap(); - on_profile(profiler.get_profile(program)); + // Drain the slot. `ProfilerGuard::drop` would also remove it; doing it here + // means we hold the lock for the shortest time and can hand the profile to + // the callback. Tolerate a poisoned mutex (we'd lose the profile, not state). + let drained = LIBFUNC_PROFILE + .lock() + .unwrap_or_else(|e| e.into_inner()) + .remove(&counter); + + // Only call the user's callback when `run` succeeded — a partial profile + // captured against an aborted execution wouldn't be meaningful. + if let (Some(profiler), Ok(_)) = (drained, &result) { + on_profile(profiler.get_profile(program)); + } result } } -/// RAII cleanup for the profiler globals. Restores `*trace_id_ptr` and drops the -/// `LIBFUNC_PROFILE` slot at `counter` if it's still occupied. +/// RAII cleanup for the profiler globals. Restores `*trace_id_ptr` on success or +/// unwind. The [`LIBFUNC_PROFILE`] slot at `counter` is normally drained on the +/// success path; this guard removes it if it's still occupied (panic case). struct ProfilerGuard { trace_id_ptr: *mut u64, old_trace_id: u64, @@ -99,11 +133,15 @@ struct ProfilerGuard { impl Drop for ProfilerGuard { fn drop(&mut self) { - // SAFETY: same provenance as the construction site; single-threaded use. + // SAFETY: same provenance as the construction site. `PROFILE_LOCK` is held + // by the enclosing scope (still in flight while we drop) so no other thread + // races us. unsafe { *self.trace_id_ptr = self.old_trace_id; } - // Tolerate a poisoned mutex silently — Drop must not panic. + // Tolerate a poisoned mutex silently — Drop must not panic. Slot leak on + // poison is intentional and matches the behavior of other Drop impls in + // this crate; the alternative (panic in Drop) is worse. if let Ok(mut profile) = LIBFUNC_PROFILE.lock() { profile.remove(&self.counter); }