From c760207136079402e4e17ab21fdb3d743477533e Mon Sep 17 00:00:00 2001 From: evalir Date: Tue, 24 Feb 2026 22:25:01 +0100 Subject: [PATCH 1/3] fix(node): Set backfill max job duration to a safe amount of time Our timeout crash is caused by a footgun on Reth's backfill threshold configuration API. Here's how it roughly looks: - reth allows the mdbx tx timeout sentinel thread to have a configurable max timeout, hidden behind a flag (--db.read-transaction-timeout). We currently, AFAIK, do not configure this. The default value is 5 minutes. Any transactions that live beyond that are killed. - Reth, for the exex backfill, sets the max_duration for its backfill processes to 30 seconds. This does not exceed the default max timeout for the mdbx sentinel thread. - The `ExecutionStageThresholds` default for max duration for execution jobs is 10 minutes. This, by far, exceeds the mdbx timeout sentinel thread max duration. On the execution stage for the reth node this is safe as it calls `disable_long_read_transaction_safety()`, which bypasses the timeout. This is not done on backfill, so this default is unsafe. This is therefore a massive footgun for any exex that configures backfill thresholds. - We were only tweaking the `max_blocks` config value, and using the unsafe default for max duration, when setting the backfill thresholds. This leads to us avoiding an OOM crash, but then running into an mdbx crash due to the sentinel thread forcibly killing the transaction. - Before `set_backfill_thresholds` was added, what caused the OOM was the massive amount of blocks being processed. `max_duration` had a sane default (30s). Once it was introduced, this new bug was added due to the `::default()` usage. - The fix, is therefore, to limit the max duration to a reasonably low time. 30s should be fine. - Note: 30s is a reasonably margin due to a few reasons: - We avoid the hard 5m timeout. - The intended default for exex backfill was already 30s. - There's a 60s timeout warning already, this avoids it entirely. We've seen this error spuriously over the entirety of the signet node's lifecycle. --- crates/node/src/node.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/node/src/node.rs b/crates/node/src/node.rs index 8b4ed05..401dfe0 100644 --- a/crates/node/src/node.rs +++ b/crates/node/src/node.rs @@ -27,7 +27,7 @@ use signet_node_types::{NodeStatus, NodeTypesDbTrait, SignetNodeTypes}; use signet_rpc::RpcServerGuard; use signet_types::{PairedHeights, constants::SignetSystemConstants}; use std::{fmt, mem::MaybeUninit, sync::Arc}; -use tokio::sync::watch; +use tokio::{sync::watch, time::Duration}; use tracing::{debug, info, instrument}; /// The genesis journal hash for the signet chain. @@ -302,6 +302,9 @@ where if let Some(max_blocks) = self.config.backfill_max_blocks() { self.host.notifications.set_backfill_thresholds(ExecutionStageThresholds { max_blocks: Some(max_blocks), + // Keep the backfill mdbx read transaction open for no longer than 30 seconds. + // This prevents MDBX from killing the read transaction, leading to a crash if reth's default mdbx read transaction timeout is exceeded. + max_duration: Some(Duration::from_secs(30)), ..Default::default() }); debug!(max_blocks, "configured backfill thresholds"); From f72ab0707d00afd7c036290eecc261f803eb6510 Mon Sep 17 00:00:00 2001 From: evalir Date: Tue, 24 Feb 2026 23:54:09 +0100 Subject: [PATCH 2/3] chore: expose backfill max duration config on env var --- crates/node-config/src/core.rs | 18 ++++++++++++++++++ crates/node/src/node.rs | 20 +++++++++----------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/crates/node-config/src/core.rs b/crates/node-config/src/core.rs index 4656c85..08a2959 100644 --- a/crates/node-config/src/core.rs +++ b/crates/node-config/src/core.rs @@ -12,6 +12,7 @@ use std::{ fmt::Display, path::PathBuf, sync::{Arc, OnceLock}, + time::Duration, }; use tracing::warn; use trevm::revm::primitives::hardfork::SpecId; @@ -72,6 +73,15 @@ pub struct SignetNodeConfig { optional )] backfill_max_blocks: Option, + + /// Maximum duration of a backfill batch. + /// This prevents MDBX from killing the read transaction, leading to a crash if reth's default mdbx read transaction timeout is exceeded. + #[from_env( + var = "BACKFILL_MAX_DURATION", + desc = "Maximum duration of a backfill batch, in seconds", + optional + )] + backfill_max_duration: Option, } impl Display for SignetNodeConfig { @@ -105,6 +115,7 @@ impl SignetNodeConfig { genesis, slot_calculator, backfill_max_blocks: None, // Uses default of 10,000 via accessor + backfill_max_duration: None, // Uses default of 30 seconds via accessor } } @@ -252,6 +263,12 @@ impl SignetNodeConfig { // Default to 10,000 if not explicitly configured Some(self.backfill_max_blocks.unwrap_or(10_000)) } + + /// Get the maximum duration of a backfill batch. + /// Returns `Some(30)` seconds by default if not configured. + pub fn backfill_max_duration(&self) -> Option { + Some(Duration::from_secs(self.backfill_max_duration.unwrap_or(30))) + } } #[cfg(test)] @@ -272,6 +289,7 @@ mod defaults { genesis: GenesisSpec::Known(KnownChains::Test), slot_calculator: SlotCalculator::new(0, 0, 12), backfill_max_blocks: None, // Uses default of 10,000 via accessor + backfill_max_duration: None, // Uses default of 30 seconds via accessor } } } diff --git a/crates/node/src/node.rs b/crates/node/src/node.rs index 401dfe0..4b5ac4f 100644 --- a/crates/node/src/node.rs +++ b/crates/node/src/node.rs @@ -27,7 +27,7 @@ use signet_node_types::{NodeStatus, NodeTypesDbTrait, SignetNodeTypes}; use signet_rpc::RpcServerGuard; use signet_types::{PairedHeights, constants::SignetSystemConstants}; use std::{fmt, mem::MaybeUninit, sync::Arc}; -use tokio::{sync::watch, time::Duration}; +use tokio::sync::watch; use tracing::{debug, info, instrument}; /// The genesis journal hash for the signet chain. @@ -299,16 +299,14 @@ where /// This should be called after `set_with_head` to configure how many /// blocks can be processed per backfill batch. fn set_backfill_thresholds(&mut self) { - if let Some(max_blocks) = self.config.backfill_max_blocks() { - self.host.notifications.set_backfill_thresholds(ExecutionStageThresholds { - max_blocks: Some(max_blocks), - // Keep the backfill mdbx read transaction open for no longer than 30 seconds. - // This prevents MDBX from killing the read transaction, leading to a crash if reth's default mdbx read transaction timeout is exceeded. - max_duration: Some(Duration::from_secs(30)), - ..Default::default() - }); - debug!(max_blocks, "configured backfill thresholds"); - } + let max_blocks = self.config.backfill_max_blocks(); + let max_duration = self.config.backfill_max_duration(); + self.host.notifications.set_backfill_thresholds(ExecutionStageThresholds { + max_blocks, + max_duration, + ..Default::default() + }); + debug!(?max_blocks, ?max_duration, "configured backfill thresholds"); } /// Runs on any notification received from the ExEx context. From b0c71d78365f96fb30074f4eeefe9cf0ed27328d Mon Sep 17 00:00:00 2001 From: evalir Date: Tue, 24 Feb 2026 23:55:28 +0100 Subject: [PATCH 3/3] chore: version bump --- Cargo.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 712c0db..297ef5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/*"] resolver = "2" [workspace.package] -version = "0.16.0-rc.8" +version = "0.16.0-rc.9" edition = "2024" rust-version = "1.88" authors = ["init4"] @@ -34,15 +34,15 @@ debug = false incremental = false [workspace.dependencies] -signet-blobber = { version = "0.16.0-rc.8", path = "crates/blobber" } -signet-block-processor = { version = "0.16.0-rc.8", path = "crates/block-processor" } -signet-db = { version = "0.16.0-rc.8", path = "crates/db" } -signet-genesis = { version = "0.16.0-rc.8", path = "crates/genesis" } -signet-node = { version = "0.16.0-rc.8", path = "crates/node" } -signet-node-config = { version = "0.16.0-rc.8", path = "crates/node-config" } -signet-node-tests = { version = "0.16.0-rc.8", path = "crates/node-tests" } -signet-node-types = { version = "0.16.0-rc.8", path = "crates/node-types" } -signet-rpc = { version = "0.16.0-rc.8", path = "crates/rpc" } +signet-blobber = { version = "0.16.0-rc.9", path = "crates/blobber" } +signet-block-processor = { version = "0.16.0-rc.9", path = "crates/block-processor" } +signet-db = { version = "0.16.0-rc.9", path = "crates/db" } +signet-genesis = { version = "0.16.0-rc.9", path = "crates/genesis" } +signet-node = { version = "0.16.0-rc.9", path = "crates/node" } +signet-node-config = { version = "0.16.0-rc.9", path = "crates/node-config" } +signet-node-tests = { version = "0.16.0-rc.9", path = "crates/node-tests" } +signet-node-types = { version = "0.16.0-rc.9", path = "crates/node-types" } +signet-rpc = { version = "0.16.0-rc.9", path = "crates/rpc" } init4-bin-base = { version = "0.18.0-rc.8", features = ["alloy"] }