From 606d5bcb04b65485aa539c88e340137357a8bfac Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Thu, 14 May 2026 11:16:25 +0700
Subject: [PATCH 01/28] test: update qbft test

---
 crates/core/src/qbft/fake_clock.rs    |   58 +-
 crates/core/src/qbft/internal_test.rs | 1196 ++++++++++++++++++++++---
 2 files changed, 1131 insertions(+), 123 deletions(-)
diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 04e143f5..3b7baeea 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -1,6 +1,6 @@
 use crossbeam::channel as mpmc;
 use std::{
-    collections::HashMap,
+    collections::BTreeMap,
     sync::{Arc, Mutex},
     thread,
     time::{Duration, Instant},
@@ -15,7 +15,7 @@ struct FakeClockInner {
     start: Instant,
     now: Instant,
     last_id: usize,
-    clients: HashMap<usize, (mpmc::Sender<Instant>, Instant)>,
+    clients: BTreeMap<usize, (mpmc::Sender<Instant>, Instant)>,
 }
 
 impl FakeClock {
@@ -98,56 +98,82 @@ impl FakeClock {
     }
 }
 
-impl Drop for FakeClock {
-    fn drop(&mut self) {
-        self.cancel();
-    }
-}
-
 #[test]
 fn multiple_threads_timers() {
     let clock = FakeClock::new(Instant::now());
+    let (done_tx, done_rx) = mpmc::bounded(2);
 
-    let start = Instant::now();
     thread::scope(|s| {
         let c1 = clock.clone();
         let (ch_1, _) = c1.new_timer(Duration::from_secs(5));
+        let done_tx_1 = done_tx.clone();
         s.spawn(move || {
-            let _ = ch_1.recv();
+            done_tx_1.send(ch_1.recv().is_ok()).unwrap();
         });
 
         let c2 = clock.clone();
         let (ch_2, _) = c2.new_timer(Duration::from_secs(5));
+        let done_tx_2 = done_tx.clone();
         s.spawn(move || {
-            let _ = ch_2.recv();
+            done_tx_2.send(ch_2.recv().is_ok()).unwrap();
         });
 
+        clock.advance(Duration::from_secs(4));
+        assert!(done_rx.try_recv().is_err());
         clock.advance(Duration::from_secs(6));
     });
 
-    println!("start={:?}, clock={:?}", start.elapsed(), clock.elapsed());
+    assert_eq!(vec![true, true], done_rx.try_iter().collect::<Vec<_>>());
+    assert_eq!(Duration::from_secs(10), clock.elapsed());
 }
 
 #[test]
 fn multiple_threads_cancellation() {
     let clock = FakeClock::new(Instant::now());
+    let (done_tx, done_rx) = mpmc::bounded(2);
 
-    let start = Instant::now();
     thread::scope(|s| {
         let c1 = clock.clone();
         let (ch_1, _) = c1.new_timer(Duration::from_secs(5));
+        let done_tx_1 = done_tx.clone();
         s.spawn(move || {
-            let _ = ch_1.recv();
+            done_tx_1.send(ch_1.recv().is_err()).unwrap();
         });
 
         let c2 = clock.clone();
         let (ch_2, _) = c2.new_timer(Duration::from_secs(5));
+        let done_tx_2 = done_tx.clone();
         s.spawn(move || {
-            let _ = ch_2.recv();
+            done_tx_2.send(ch_2.recv().is_err()).unwrap();
         });
 
         clock.cancel();
     });
 
-    println!("start={:?}, clock={:?}", start.elapsed(), clock.elapsed());
+    assert_eq!(vec![true, true], done_rx.try_iter().collect::<Vec<_>>());
+    assert_eq!(Duration::ZERO, clock.elapsed());
+}
+
+#[test]
+fn cancel_one_timer_only() {
+    let clock = FakeClock::new(Instant::now());
+    let (ch_1, cancel_1) = clock.new_timer(Duration::from_secs(5));
+    let (ch_2, _) = clock.new_timer(Duration::from_secs(5));
+
+    cancel_1();
+    clock.advance(Duration::from_secs(5));
+
+    assert!(ch_1.try_recv().is_err());
+    assert!(ch_2.try_recv().is_ok());
+}
+
+#[test]
+fn expired_timer_delivers_once() {
+    let clock = FakeClock::new(Instant::now());
+    let (ch, _) = clock.new_timer(Duration::from_secs(5));
+
+    clock.advance(Duration::from_secs(5));
+    assert!(ch.try_recv().is_ok());
+    clock.advance(Duration::from_secs(5));
+    assert!(ch.try_recv().is_err());
 }
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index b7770251..5c5659f2 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1,10 +1,32 @@
 use crate::qbft::{self, fake_clock::FakeClock, *};
 use cancellation::CancellationTokenSource;
 use crossbeam::channel as mpmc;
-use std::{collections::HashMap, sync::Arc, thread, time::Duration};
+use std::{
+    collections::{BTreeMap, HashMap},
+    fmt::Write as _,
+    panic::{self, AssertUnwindSafe},
+    sync::{Arc, Mutex},
+    thread,
+    time::Duration,
+};
 
 const WRITE_CHAN_ERR: &str = "Failed to write to channel";
 const READ_CHAN_ERR: &str = "Failed to read from channel";
+const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c;
+
+type RunOutcome = std::thread::Result<Result<()>>;
+type TestMsgRef = Msg<i64, i64, i64>;
+
+struct PendingBroadcast {
+    deliver_at: Duration,
+    key: u64,
+    msg: TestMsgRef,
+}
+
+enum BroadcastEvent {
+    Immediate(TestMsgRef),
+    Delayed(PendingBroadcast),
+}
 
 #[derive(Default, Debug)]
 struct Test {
@@ -28,6 +50,8 @@ struct Test {
     pub prepared_val: i32,
     /// Non-deterministic consensus at random round.
     pub random_round: bool,
+    /// Enables fuzzing by node 1.
+    pub fuzz: bool,
 }
 
 fn test_qbft(test: Test) {
@@ -35,20 +59,24 @@ fn test_qbft(test: Test) {
     const MAX_ROUND: usize = 50;
     const FIFO_LIMIT: usize = 100;
 
+    let seed = test_seed(&test);
+    let trace = Trace::new();
     let start_time = time::Instant::now();
+    let real_start = time::Instant::now();
     let clock = FakeClock::new(start_time);
 
     let cts = CancellationTokenSource::new();
-    let mut receives = HashMap::<
+    let mut receives = BTreeMap::<
         i64,
         (
             mpmc::Sender<Msg<i64, i64, i64>>,
             mpmc::Receiver<Msg<i64, i64, i64>>,
         ),
     >::new();
-    let (broadcast_tx, broadcast_rx) = mpmc::unbounded::<Msg<i64, i64, i64>>();
+    let (broadcast_tx, broadcast_rx) = mpmc::unbounded::<BroadcastEvent>();
+    let (unjust_tx, unjust_rx) = mpmc::unbounded::<String>();
     let (result_chan_tx, result_chan_rx) = mpmc::bounded::<Vec<Msg<i64, i64, i64>>>(N);
-    let (run_chan_tx, run_chan_rx) = mpmc::bounded::<Result<()>>(N);
+    let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N);
 
     let is_leader = Box::new(make_is_leader(N as i64));
 
@@ -74,32 +102,46 @@ fn test_qbft(test: Test) {
                 result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
             })
         },
-        compare: Box::new(|_, _, _, _, return_err, _| {
-            return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+        compare: Box::new(|_, _, _, input_value_source, return_err, return_value| {
+            return_value
+                .send(*input_value_source)
+                .expect(WRITE_CHAN_ERR);
+            send_compare_result(return_err, return_value, Ok(()));
         }),
         nodes: N as i64,
         fifo_limit: FIFO_LIMIT as i64,
         log_round_change: {
             let clock = clock.clone();
+            let trace = trace.clone();
 
             Box::new(move |_, process, round, new_round, upon_rule, _| {
-                println!(
+                trace.push(format!(
                     "{:?} - {}@{} change to {} ~= {}",
                     clock.elapsed(),
                     process,
                     round,
                     new_round,
                     upon_rule,
-                );
+                ));
+            })
+        },
+        log_unjust: {
+            let trace = trace.clone();
+            let unjust_tx = unjust_tx.clone();
+            let fuzz = test.fuzz;
+            Box::new(move |_, process, msg| {
+                let line = format!("Unjust: process={} msg={:?}", process, msg);
+                trace.push(line.clone());
+                if !fuzz {
+                    unjust_tx.send(line).expect(WRITE_CHAN_ERR);
+                }
             })
         },
-        log_unjust: Box::new(|_, _, msg| {
-            println!("Unjust: {:?}", msg);
-        }),
         log_upon_rule: {
             let clock = clock.clone();
+            let trace = trace.clone();
             Box::new(move |_, process, round, msg, upon_rule| {
-                println!(
+                trace.push(format!(
                     "{:?} {} => {}@{} -> {}@{} ~= {}",
                     clock.elapsed(),
                     msg.source(),
@@ -108,7 +150,7 @@ fn test_qbft(test: Test) {
                     process,
                     round,
                     upon_rule,
-                );
+                ));
             })
         },
     });
@@ -122,6 +164,7 @@ fn test_qbft(test: Test) {
             let trans = Transport {
                 broadcast: {
                     let clock = clock.clone();
+                    let trace = trace.clone();
 
                     Box::new(
                         move |_, type_, instance, source, round, value, pr, pv, justification| {
@@ -130,16 +173,22 @@ fn test_qbft(test: Test) {
                             }
 
                             if type_ == MSG_COMMIT && round <= test.commits_after.into() {
-                                println!(
+                                trace.push(format!(
                                     "{:?} {} dropping commit for round {}",
                                     clock.elapsed(),
                                     source,
                                     round
-                                );
+                                ));
                                 return Ok(());
                             }
 
-                            println!("{:?} {} => {}@{}", clock.elapsed(), source, type_, round);
+                            trace.push(format!(
+                                "{:?} {} => {}@{}",
+                                clock.elapsed(),
+                                source,
+                                type_,
+                                round
+                            ));
 
                             let msg = new_msg(
                                 type_,
@@ -159,7 +208,9 @@ fn test_qbft(test: Test) {
                                 msg.clone(),
                                 test.bcast_jitter_ms,
                                 clock.clone(),
-                            ); // TODO: Add clock
+                                trace.clone(),
+                                seed,
+                            );
 
                             Ok(())
                         },
@@ -168,7 +219,7 @@ fn test_qbft(test: Test) {
                 receive: receiver.clone(),
             };
 
-            let token = cts.token();
+            let token = cts.token().clone();
             let clock = clock.clone();
             let receiver = receiver.clone();
             let start_delay = test.start_delay.get(&i).copied();
@@ -177,99 +228,229 @@ fn test_qbft(test: Test) {
             let run_chan_tx = run_chan_tx.clone();
             let defs = defs.clone();
             let is_leader = is_leader.clone();
+            let trace = trace.clone();
 
             s.spawn(move || {
                 if let Some(delay) = start_delay {
-                    println!("{:?} Node {} start delay {:?}", clock.elapsed(), i, delay);
+                    trace.push(format!(
+                        "{:?} Node {} start delay {:?}",
+                        clock.elapsed(),
+                        i,
+                        delay
+                    ));
                     let (delay_ch, _) = clock.new_timer(delay);
                     _ = delay_ch.recv();
-                    println!("{:?} Node {} starting", clock.elapsed(), i);
+                    trace.push(format!(
+                        "{:?} Node {} starting {:?}",
+                        clock.elapsed(),
+                        i,
+                        delay
+                    ));
                 }
 
-                // Drain any buffered messages
-                while !receiver.is_empty() {
-                    _ = receiver.recv().expect(READ_CHAN_ERR);
+                if start_delay.is_some() {
+                    // Drain any buffered messages
+                    while !receiver.is_empty() {
+                        _ = receiver.recv().expect(READ_CHAN_ERR);
+                    }
                 }
 
                 let (v_chan_tx, v_chan_rx) = mpmc::bounded::<i64>(1);
-                let (_, vs_chan_rx) = mpmc::bounded::<i64>(1);
+                let (vs_chan_tx, vs_chan_rx) = mpmc::bounded::<i64>(1);
+                let mut keep_value_sender = Some(v_chan_tx);
+                let mut input_value_rx = v_chan_rx;
 
                 if let Some(delay) = value_delay {
+                    let v_chan_tx_send = keep_value_sender
+                        .as_ref()
+                        .expect("value sender kept until run returns")
+                        .clone();
                     s.spawn(move || {
                         let (delay_ch, cancel) = clock.new_timer(delay);
                         _ = delay_ch.recv();
-                        _ = v_chan_tx.send(i);
+                        _ = v_chan_tx_send.send(i);
 
                         cancel();
                     });
                 } else if decide_round != 1 {
+                    let v_chan_tx_send = keep_value_sender
+                        .as_ref()
+                        .expect("value sender kept until run returns")
+                        .clone();
                     s.spawn(move || {
-                        _ = v_chan_tx.send(i);
+                        _ = v_chan_tx_send.send(i);
                     });
                 } else if is_leader(&test.instance, 1, i) {
+                    let v_chan_tx_send = keep_value_sender
+                        .as_ref()
+                        .expect("value sender kept until run returns")
+                        .clone();
                     s.spawn(move || {
-                        _ = v_chan_tx.send(i);
+                        _ = v_chan_tx_send.send(i);
                     });
+                } else {
+                    keep_value_sender = None;
+                    input_value_rx = mpmc::never();
                 }
 
-                run_chan_tx
-                    .send(qbft::run(
-                        token,
+                let keepalive = (keep_value_sender, vs_chan_tx);
+                let run_result = panic::catch_unwind(AssertUnwindSafe(|| {
+                    qbft::run(
+                        &token,
                         &defs,
                         &trans,
                         &test.instance,
                         i,
-                        v_chan_rx,
+                        input_value_rx,
                         vs_chan_rx,
-                    ))
-                    .expect(WRITE_CHAN_ERR);
+                    )
+                }));
+                drop(keepalive);
+                run_chan_tx.send((i, run_result)).expect(WRITE_CHAN_ERR);
             });
         }
 
-        let mut results = HashMap::<i64, Msg<i64, i64, i64>>::new();
+        let mut results = BTreeMap::<i64, Msg<i64, i64, i64>>::new();
         let mut count = 0;
         let mut decided = false;
         let mut done = 0;
+        let mut broadcasts = 0usize;
+        let mut pending = Vec::<PendingBroadcast>::new();
+        let mut next_fuzz_at = test.fuzz.then_some(Duration::from_millis(100));
+        let mut fuzz_counter = 0_u64;
 
         loop {
-            mpmc::select! {
-                recv(broadcast_rx) -> msg => {
-                    let msg = msg.expect(READ_CHAN_ERR);
-                    for (target, (out_tx, _)) in receives.iter() {
-                        if *target == msg.source() {
-                            continue; // Do not broadcast to self, we sent to self already.
-                        }
-
-                        if let Some(p) = test.drop_prob.get(&msg.source()) {
-                            if rand::random::<f64>() < *p {
-                                println!("{:?} {} => {}@{} => {} (dropped)", clock.elapsed(), msg.source(), msg.type_(), msg.round(), target);
-                                continue; // Drop
-                            }
-                        }
+            broadcasts += deliver_ready_broadcasts(
+                &mut pending,
+                &receives,
+                &test.drop_prob,
+                seed,
+                &trace,
+                &clock,
+            );
+
+            while let Some(next) = next_fuzz_at {
+                if clock.elapsed() < next {
+                    break;
+                }
 
-                        out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+                let msg = random_msg(test.instance, 1, seed, fuzz_counter);
+                fuzz_counter = fuzz_counter.wrapping_add(1);
+                trace.push(format!(
+                    "{:?} fuzz {} => {}@{}",
+                    clock.elapsed(),
+                    msg.source(),
+                    msg.type_(),
+                    msg.round()
+                ));
+                broadcasts +=
+                    fanout_broadcast(&receives, &test.drop_prob, seed, &trace, &clock, msg);
+                next_fuzz_at = Some(next + Duration::from_millis(100));
+            }
 
-                        if rand::random::<f64>() < 0.1 { // Send 10% messages twice
-                            out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+            mpmc::select! {
+                recv(broadcast_rx) -> event => {
+                    match event.expect(READ_CHAN_ERR) {
+                        BroadcastEvent::Immediate(msg) => {
+                            broadcasts += fanout_broadcast(
+                                &receives,
+                                &test.drop_prob,
+                                seed,
+                                &trace,
+                                &clock,
+                                msg,
+                            );
                         }
+                        BroadcastEvent::Delayed(delayed) => pending.push(delayed),
+                    }
+                    clock.advance(Duration::from_millis(1));
+                    if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
+                        cts.cancel();
+                        clock.cancel();
+                        panic!(
+                            "qbft test hang: decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}",
+                            decided,
+                            done,
+                            count,
+                            clock.elapsed(),
+                            real_start.elapsed(),
+                            broadcasts,
+                            seed,
+                            trace.dump()
+                        );
                     }
                 }
 
+                recv(unjust_rx) -> unjust => {
+                    let unjust = unjust.expect(READ_CHAN_ERR);
+                    cts.cancel();
+                    clock.cancel();
+                    panic!("unjust message: {unjust} elapsed={:?} seed={}\n{}", clock.elapsed(), seed, trace.dump());
+                }
+
                 recv(result_chan_rx) -> res => {
                     let q_commit = res.expect(READ_CHAN_ERR);
 
                     for commit in q_commit.clone() {
                         for (_, previous) in results.iter() {
-                            assert_eq!(previous.value(), commit.value(), "commit values");
+                            if previous.value() != commit.value() {
+                                cts.cancel();
+                                clock.cancel();
+                                panic!(
+                                    "commit values differ: previous={:?} commit={:?} elapsed={:?} seed={}\n{}",
+                                    previous,
+                                    commit,
+                                    clock.elapsed(),
+                                    seed,
+                                    trace.dump()
+                                );
+                            }
                         }
 
                         if !test.random_round {
-                            assert_eq!(i64::from(test.decide_round), commit.round(), "wrong decide round");
+                            if i64::from(test.decide_round) != commit.round() {
+                                cts.cancel();
+                                clock.cancel();
+                                panic!(
+                                    "wrong decide round: want={} got={} commit={:?} elapsed={:?} seed={}\n{}",
+                                    test.decide_round,
+                                    commit.round(),
+                                    commit,
+                                    clock.elapsed(),
+                                    seed,
+                                    trace.dump()
+                                );
+                            }
 
                             if test.prepared_val != 0 { // Check prepared value if set
-                                assert_eq!(i64::from(test.prepared_val), commit.value(), "wrong prepared value");
+                                if i64::from(test.prepared_val) != commit.value() {
+                                    cts.cancel();
+                                    clock.cancel();
+                                    panic!(
+                                        "wrong prepared value: want={} got={} commit={:?} elapsed={:?} seed={}\n{}",
+                                        test.prepared_val,
+                                        commit.value(),
+                                        commit,
+                                        clock.elapsed(),
+                                        seed,
+                                        trace.dump()
+                                    );
+                                }
                             } else { // Otherwise check that leader value was used.
-                                assert!(is_leader(&test.instance, commit.round(), commit.value()), "not leader");
+                                if !is_leader(&test.instance, commit.round(), commit.value()) {
+                                    cts.cancel();
+                                    clock.cancel();
+                                    panic!(
+                                        "not leader value: instance={} round={} value={} commit={:?} elapsed={:?} seed={}\n{}",
+                                        test.instance,
+                                        commit.round(),
+                                        commit.value(),
+                                        commit,
+                                        clock.elapsed(),
+                                        seed,
+                                        trace.dump()
+                                    );
+                                }
                             }
                         }
 
@@ -282,7 +463,7 @@ fn test_qbft(test: Test) {
                     }
 
                     let round = q_commit[0].round();
-                    println!("Got all results in round {} after {:?}: {:?}", round, clock.elapsed(), results);
+                    trace.push(format!("Got all results in round {} after {:?}: {:?}", round, clock.elapsed(), results));
 
                     // Trigger shutdown
                     decided = true;
@@ -292,11 +473,24 @@ fn test_qbft(test: Test) {
                 }
 
                 recv(run_chan_rx) -> res => {
-                    let err = res.expect(READ_CHAN_ERR);
+                    let (node, outcome) = res.expect(READ_CHAN_ERR);
 
-                    if err.is_err() {
+                    if !matches!(outcome, Ok(Ok(()))) {
                         if !decided {
-                            panic!("unexpected run error");
+                            cts.cancel();
+                            clock.cancel();
+                            panic!(
+                                "unexpected run error: node={} outcome={} decided={} done={} count={} elapsed={:?} broadcasts={} seed={}\n{}",
+                                node,
+                                format_run_outcome(&outcome),
+                                decided,
+                                done,
+                                count,
+                                clock.elapsed(),
+                                broadcasts,
+                                seed,
+                                trace.dump()
+                            );
                         }
                     }
 
@@ -309,12 +503,98 @@ fn test_qbft(test: Test) {
                 default => {
                     thread::sleep(time::Duration::from_micros(1));
                     clock.advance(Duration::from_millis(1));
+                    if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
+                        cts.cancel();
+                        clock.cancel();
+                        panic!(
+                            "qbft test hang: decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}",
+                            decided,
+                            done,
+                            count,
+                            clock.elapsed(),
+                            real_start.elapsed(),
+                            broadcasts,
+                            seed,
+                            trace.dump()
+                        );
+                    }
                 }
             }
         }
     });
 }
 
+#[derive(Clone, Default)]
+struct Trace(Arc<Mutex<Vec<String>>>);
+
+impl Trace {
+    fn new() -> Self {
+        Self::default()
+    }
+
+    fn push(&self, line: String) {
+        self.0.lock().unwrap().push(line);
+    }
+
+    fn dump(&self) -> String {
+        let lines = self.0.lock().unwrap();
+        let start = lines.len().saturating_sub(200);
+        let mut out = String::new();
+        for line in &lines[start..] {
+            let _ = writeln!(out, "{line}");
+        }
+        out
+    }
+}
+
+fn format_run_outcome(outcome: &RunOutcome) -> String {
+    match outcome {
+        Ok(Ok(())) => "ok".to_string(),
+        Ok(Err(err)) => format!("error {err:?}"),
+        Err(payload) => {
+            if let Some(msg) = payload.downcast_ref::<&str>() {
+                format!("panic {msg}")
+            } else if let Some(msg) = payload.downcast_ref::<String>() {
+                format!("panic {msg}")
+            } else {
+                "panic <non-string payload>".to_string()
+            }
+        }
+    }
+}
+
+fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> bool {
+    matches!(outcome, Ok(Err(err)) if expected(err))
+}
+
+fn send_compare_result<C: Send + 'static>(
+    return_err: &mpmc::Sender<Result<()>>,
+    return_value: &mpmc::Sender<C>,
+    result: Result<()>,
+) {
+    // Keep the auxiliary compare channels alive until the result is consumed.
+    // Go channels do not close when the callback returns; Rust senders do.
+    let keepalive = return_value.clone();
+    thread::spawn(move || {
+        thread::sleep(Duration::from_millis(50));
+        drop(keepalive);
+    });
+    return_err.send(result).expect(WRITE_CHAN_ERR);
+}
+
+fn test_seed(test: &Test) -> u64 {
+    let mut seed = 0x5142_4654_5445_5354_u64;
+    seed ^= test.instance as u64;
+    seed ^= u64::from(test.const_period) << 8;
+    seed ^= (test.bcast_jitter_ms as u64) << 16;
+    seed ^= (test.commits_after as u64) << 32;
+    seed ^= (test.decide_round as u64) << 40;
+    seed ^= (test.prepared_val as u64) << 48;
+    seed ^= u64::from(test.random_round) << 56;
+    seed ^= u64::from(test.fuzz) << 57;
+    seed
+}
+
 /// Construct a leader election function.
 fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone {
     move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process }
@@ -365,32 +645,167 @@ fn new_msg(
 // Delays the message broadcast by between 1x and 2x jitter_ms and drops
 // messages.
 fn bcast(
-    broadcast: mpmc::Sender<Msg<i64, i64, i64>>,
+    broadcast: mpmc::Sender<BroadcastEvent>,
     msg: Msg<i64, i64, i64>,
     jitter_ms: i32,
     clock: FakeClock,
+    trace: Trace,
+    seed: u64,
 ) {
     if jitter_ms == 0 {
-        broadcast.send(msg.clone()).expect(WRITE_CHAN_ERR);
+        broadcast
+            .send(BroadcastEvent::Immediate(msg.clone()))
+            .expect(WRITE_CHAN_ERR);
         return;
     }
 
-    thread::spawn(move || {
-        let delta_ms = (f64::from(jitter_ms) * rand::random::<f64>()) as i32;
-        let delay = Duration::from_millis((jitter_ms + delta_ms) as u64);
-        println!(
-            "{:?} {} => {}@{} (bcast delay {:?})",
-            clock.elapsed(),
-            msg.source(),
-            msg.type_(),
-            msg.round(),
-            delay
-        );
-        let (delay_ch, _) = clock.new_timer(delay);
-        _ = delay_ch.recv();
+    let delta_ms = (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, 3)) as i32;
+    let delay = Duration::from_millis((jitter_ms + delta_ms) as u64);
+    trace.push(format!(
+        "{:?} {} => {}@{} (bcast delay {:?})",
+        clock.elapsed(),
+        msg.source(),
+        msg.type_(),
+        msg.round(),
+        delay
+    ));
+    let key = deterministic_msg_u64(seed, &msg, 0, 4);
+    broadcast
+        .send(BroadcastEvent::Delayed(PendingBroadcast {
+            deliver_at: clock.elapsed() + delay,
+            key,
+            msg,
+        }))
+        .expect(WRITE_CHAN_ERR);
+}
 
-        _ = broadcast.send(msg);
-    });
+fn deliver_ready_broadcasts(
+    pending: &mut Vec<PendingBroadcast>,
+    receives: &BTreeMap<i64, (mpmc::Sender<TestMsgRef>, mpmc::Receiver<TestMsgRef>)>,
+    drop_prob: &HashMap<i64, f64>,
+    seed: u64,
+    trace: &Trace,
+    clock: &FakeClock,
+) -> usize {
+    pending.sort_by_key(|delayed| (delayed.deliver_at, delayed.key));
+    let ready_count = pending
+        .iter()
+        .take_while(|delayed| delayed.deliver_at <= clock.elapsed())
+        .count();
+    let ready = pending.drain(..ready_count).collect::<Vec<_>>();
+
+    ready
+        .into_iter()
+        .map(|delayed| fanout_broadcast(receives, drop_prob, seed, trace, clock, delayed.msg))
+        .sum()
+}
+
+fn fanout_broadcast(
+    receives: &BTreeMap<i64, (mpmc::Sender<TestMsgRef>, mpmc::Receiver<TestMsgRef>)>,
+    drop_prob: &HashMap<i64, f64>,
+    seed: u64,
+    trace: &Trace,
+    clock: &FakeClock,
+    msg: TestMsgRef,
+) -> usize {
+    let mut broadcasts = 0;
+    for (target, (out_tx, _)) in receives.iter() {
+        if *target == msg.source() {
+            continue; // Do not broadcast to self, we sent to self already.
+        }
+
+        if let Some(p) = drop_prob.get(&msg.source()) {
+            if deterministic_unit(seed, &msg, *target, 1) < *p {
+                trace.push(format!(
+                    "{:?} {} => {}@{} => {} (dropped)",
+                    clock.elapsed(),
+                    msg.source(),
+                    msg.type_(),
+                    msg.round(),
+                    target
+                ));
+                continue;
+            }
+        }
+
+        out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+        broadcasts += 1;
+
+        if deterministic_unit(seed, &msg, *target, 2) < 0.1 {
+            out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+            broadcasts += 1;
+            trace.push(format!(
+                "{:?} {} => {}@{} => {} (duplicate)",
+                clock.elapsed(),
+                msg.source(),
+                msg.type_(),
+                msg.round(),
+                target
+            ));
+        }
+    }
+
+    broadcasts
+}
+
+fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg<i64, i64, i64> {
+    let message_types = [
+        MSG_PRE_PREPARE,
+        MSG_PREPARE,
+        MSG_COMMIT,
+        MSG_ROUND_CHANGE,
+        MSG_DECIDED,
+    ];
+    new_msg(
+        message_types[deterministic_range(seed, counter, 10, message_types.len())],
+        instance,
+        peer_idx,
+        deterministic_i64(seed, counter, 11, 10),
+        deterministic_i64(seed, counter, 12, 10),
+        0,
+        deterministic_i64(seed, counter, 13, 10),
+        deterministic_i64(seed, counter, 14, 10),
+        None,
+    )
+}
+
+fn deterministic_unit(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, salt: u64) -> f64 {
+    let value = deterministic_msg_u64(seed, msg, target, salt) >> 11;
+    value as f64 / ((1_u64 << 53) as f64)
+}
+
+fn deterministic_msg_u64(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, salt: u64) -> u64 {
+    let mut value = splitmix64(seed ^ salt);
+    value = splitmix64(value ^ i64_to_u64(msg.type_().0));
+    value = splitmix64(value ^ i64_to_u64(msg.instance()));
+    value = splitmix64(value ^ i64_to_u64(msg.source()));
+    value = splitmix64(value ^ i64_to_u64(msg.round()));
+    value = splitmix64(value ^ i64_to_u64(msg.value()));
+    value = splitmix64(value ^ i64_to_u64(msg.value_source().unwrap_or_default()));
+    value = splitmix64(value ^ i64_to_u64(msg.prepared_round()));
+    value = splitmix64(value ^ i64_to_u64(msg.prepared_value()));
+    splitmix64(value ^ i64_to_u64(target))
+}
+
+fn deterministic_range(seed: u64, counter: u64, salt: u64, upper: usize) -> usize {
+    let upper = u64::try_from(upper).expect("upper fits in u64");
+    usize::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in usize")
+}
+
+fn deterministic_i64(seed: u64, counter: u64, salt: u64, upper: i64) -> i64 {
+    let upper = u64::try_from(upper).expect("upper is positive");
+    i64::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in i64")
+}
+
+fn splitmix64(mut value: u64) -> u64 {
+    value = value.wrapping_add(0x9e37_79b9_7f4a_7c15);
+    value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9);
+    value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb);
+    value ^ (value >> 31)
+}
+
+fn i64_to_u64(value: i64) -> u64 {
+    u64::from_le_bytes(value.to_le_bytes())
 }
 
 #[derive(Clone, Debug)]
@@ -455,7 +870,6 @@ impl SomeMsg<i64, i64, i64> for TestMsg {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn happy_0() {
     test_qbft(Test {
         instance: 0,
@@ -465,7 +879,6 @@ fn happy_0() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn happy_1() {
     test_qbft(Test {
         instance: 1,
@@ -475,7 +888,6 @@ fn happy_1() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn prepare_round_1_decide_round_2() {
     test_qbft(Test {
         instance: 0,
@@ -487,12 +899,11 @@ fn prepare_round_1_decide_round_2() {
 }
 
 #[test]
-#[ignore = "wrong prepared value"]
 fn prepare_round_2_decide_round_3() {
     test_qbft(Test {
         instance: 0,
         commits_after: 2,
-        value_delay: HashMap::from([(1, Duration::from_millis(200))]),
+        value_delay: HashMap::from([(1, Duration::from_secs(2))]),
         decide_round: 3,
         prepared_val: 2,
         const_period: true,
@@ -501,32 +912,27 @@ fn prepare_round_2_decide_round_3() {
 }
 
 #[test]
-#[ignore = "wrong decide round"]
-fn leader_late_xp() {
+fn leader_late_exp() {
     test_qbft(Test {
         instance: 0,
-        start_delay: HashMap::from([(1, Duration::from_millis(200))]),
+        start_delay: HashMap::from([(1, Duration::from_secs(2))]),
         decide_round: 2,
         ..Default::default()
     });
 }
 
 #[test]
-#[ignore = "wrong decide round"]
 fn leader_down_const() {
     test_qbft(Test {
-        instance: 3,
-        start_delay: HashMap::from([
-            (1, Duration::from_millis(50)),
-            (2, Duration::from_millis(100)),
-        ]),
-        decide_round: 4,
+        instance: 0,
+        start_delay: HashMap::from([(1, Duration::from_secs(2))]),
+        const_period: true,
+        decide_round: 2,
         ..Default::default()
     });
 }
 
 #[test]
-#[ignore = "flaky"]
 fn very_late_exp() {
     test_qbft(Test {
         instance: 3,
@@ -537,7 +943,6 @@ fn very_late_exp() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn very_late_const() {
     test_qbft(Test {
         instance: 1,
@@ -549,7 +954,6 @@ fn very_late_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn stagger_start_exp() {
     test_qbft(Test {
         instance: 0,
@@ -565,7 +969,6 @@ fn stagger_start_exp() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn stagger_start_const() {
     test_qbft(Test {
         instance: 0,
@@ -582,7 +985,6 @@ fn stagger_start_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn very_delayed_value_exp() {
     test_qbft(Test {
         instance: 3,
@@ -593,7 +995,6 @@ fn very_delayed_value_exp() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn very_delayed_value_const() {
     test_qbft(Test {
         instance: 1,
@@ -605,7 +1006,6 @@ fn very_delayed_value_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn stagger_delayed_value_exp() {
     test_qbft(Test {
         instance: 0,
@@ -621,7 +1021,6 @@ fn stagger_delayed_value_exp() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn stagger_delayed_value_const() {
     test_qbft(Test {
         instance: 0,
@@ -638,7 +1037,6 @@ fn stagger_delayed_value_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn round1_leader_no_value_round2_leader_offline() {
     test_qbft(Test {
         instance: 0,
@@ -651,7 +1049,6 @@ fn round1_leader_no_value_round2_leader_offline() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn jitter_500ms_exp() {
     test_qbft(Test {
         instance: 3,
@@ -662,7 +1059,6 @@ fn jitter_500ms_exp() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn jitter_200ms_const() {
     test_qbft(Test {
         instance: 3,
@@ -674,7 +1070,6 @@ fn jitter_200ms_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn drop_10_percent_const() {
     test_qbft(Test {
         instance: 1,
@@ -686,7 +1081,6 @@ fn drop_10_percent_const() {
 }
 
 #[test]
-#[ignore = "flaky"]
 fn drop_30_percent_const() {
     test_qbft(Test {
         instance: 1,
@@ -697,6 +1091,41 @@ fn drop_30_percent_const() {
     });
 }
 
+#[test]
+fn fuzz() {
+    test_qbft(Test {
+        instance: 1,
+        fuzz: true,
+        const_period: true,
+        decide_round: 1,
+        ..Default::default()
+    });
+}
+
+#[test]
+fn fuzz_with_late_leader() {
+    test_qbft(Test {
+        instance: 1,
+        fuzz: true,
+        start_delay: HashMap::from([(1, Duration::from_secs(2)), (2, Duration::from_secs(2))]),
+        const_period: true,
+        random_round: true,
+        ..Default::default()
+    });
+}
+
+#[test]
+fn fuzz_with_very_late_leader() {
+    test_qbft(Test {
+        instance: 1,
+        fuzz: true,
+        start_delay: HashMap::from([(1, Duration::from_secs(10)), (2, Duration::from_secs(10))]),
+        const_period: true,
+        random_round: true,
+        ..Default::default()
+    });
+}
+
 fn noop_definition() -> Definition<i64, i64, i64> {
     Definition {
         is_leader: Box::new(|_, _, _| false),
@@ -719,7 +1148,71 @@ fn noop_transport() -> Transport<i64, i64, i64> {
 }
 
 #[test]
-#[ignore = "flaky"]
+fn formulas() {
+    let expected = [
+        (1, 1, 0),
+        (2, 2, 0),
+        (3, 2, 0),
+        (4, 3, 1),
+        (5, 4, 1),
+        (6, 4, 1),
+        (7, 5, 2),
+        (8, 6, 2),
+        (9, 6, 2),
+        (10, 7, 3),
+        (11, 8, 3),
+        (12, 8, 3),
+        (13, 9, 4),
+        (14, 10, 4),
+        (15, 10, 4),
+        (16, 11, 5),
+        (17, 12, 5),
+        (18, 12, 5),
+        (19, 13, 6),
+        (20, 14, 6),
+        (21, 14, 6),
+        (22, 15, 7),
+    ];
+
+    for (n, q, f) in expected {
+        let d = Definition::<i64, i64, i64> {
+            nodes: n,
+            ..noop_definition()
+        };
+        assert_eq!(q, d.quorum(), "Quorum given N={n}");
+        assert_eq!(f, d.faulty(), "Faulty given N={n}");
+    }
+}
+
+#[test]
+fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() {
+    let preprepare = new_msg(
+        MSG_PRE_PREPARE,
+        1,
+        3,
+        6,
+        2,
+        0,
+        0,
+        0,
+        Some(&vec![
+            new_msg(MSG_ROUND_CHANGE, 1, 2, 6, 0, 0, 2, 3, None),
+            new_msg(MSG_ROUND_CHANGE, 1, 3, 6, 0, 0, 2, 3, None),
+            new_msg(MSG_ROUND_CHANGE, 1, 1, 6, 0, 0, 2, 2, None),
+            new_msg(MSG_PREPARE, 1, 3, 2, 2, 0, 0, 0, None),
+            new_msg(MSG_PREPARE, 1, 4, 2, 2, 0, 0, 0, None),
+            new_msg(MSG_PREPARE, 1, 1, 2, 2, 0, 0, 0, None),
+            new_msg(MSG_PREPARE, 1, 2, 2, 2, 0, 0, 0, None),
+        ]),
+    );
+    let mut def = noop_definition();
+    def.nodes = 4;
+    def.is_leader = Box::new(make_is_leader(4));
+
+    assert!(is_justified_pre_prepare(&def, &1, &preprepare, 0));
+}
+
+#[test]
 fn duplicate_pre_prepare_rules() {
     let cts = CancellationTokenSource::new();
     let ct = &cts.token().clone();
@@ -760,8 +1253,11 @@ fn duplicate_pre_prepare_rules() {
 
         panic!("unexpected round {}", round);
     });
-    def.compare = Box::new(|_, _, _, _, return_err, _| {
-        _ = return_err.send(Ok(()));
+    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
+        return_value
+            .send(*input_value_source)
+            .expect(WRITE_CHAN_ERR);
+        send_compare_result(return_err, return_value, Ok(()));
     });
 
     let (r_chan_tx, r_chan_rx) = mpmc::bounded::<Msg<i64, i64, i64>>(2);
@@ -788,3 +1284,489 @@ fn duplicate_pre_prepare_rules() {
 
     assert!(res.is_ok());
 }
+
+#[test]
+fn classify_rules() {
+    let mut def = noop_definition();
+    def.nodes = 4;
+    def.is_leader = Box::new(make_is_leader(4));
+
+    let preprepare = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 1, 0, 0, 0, None);
+    assert!(classify(&def, &0, 1, 2, &HashMap::new(), &preprepare).0 == UPON_JUSTIFIED_PRE_PREPARE);
+
+    let prepares = vec![
+        new_msg(MSG_PREPARE, 0, 1, 1, 2, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 2, 1, 2, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 3, 1, 2, 0, 0, 0, None),
+    ];
+    let buffer = buffer_by_source(&prepares);
+    assert!(classify(&def, &0, 1, 2, &buffer, &prepares[2]).0 == UPON_QUORUM_PREPARES);
+
+    let commits = vec![
+        new_msg(MSG_COMMIT, 0, 1, 1, 2, 0, 0, 0, None),
+        new_msg(MSG_COMMIT, 0, 2, 1, 2, 0, 0, 0, None),
+        new_msg(MSG_COMMIT, 0, 3, 1, 2, 0, 0, 0, None),
+    ];
+    let buffer = buffer_by_source(&commits);
+    assert!(classify(&def, &0, 1, 2, &buffer, &commits[2]).0 == UPON_QUORUM_COMMITS);
+
+    let future_round_changes = vec![
+        new_msg(MSG_ROUND_CHANGE, 0, 1, 3, 0, 0, 0, 0, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 2, 3, 0, 0, 0, 0, None),
+    ];
+    let buffer = buffer_by_source(&future_round_changes);
+    assert!(
+        classify(&def, &0, 1, 2, &buffer, &future_round_changes[1]).0 == UPON_F_PLUS1_ROUND_CHANGES
+    );
+
+    let unjust_round_changes = vec![
+        new_msg(MSG_ROUND_CHANGE, 0, 1, 1, 0, 0, 2, 9, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 2, 1, 0, 0, 2, 9, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 3, 1, 0, 0, 2, 9, None),
+    ];
+    let buffer = buffer_by_source(&unjust_round_changes);
+    assert!(
+        classify(&def, &0, 1, 2, &buffer, &unjust_round_changes[2]).0
+            == UPON_UNJUST_QUORUM_ROUND_CHANGES
+    );
+}
+
+#[test]
+fn justified_qrc_j1_and_j2() {
+    let mut def = noop_definition();
+    def.nodes = 4;
+    let j1 = vec![
+        new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 0, 0, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 0, 0, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None),
+    ];
+    assert_eq!(Some(0), contains_justified_qrc(&def, &j1, 2));
+    assert_eq!(3, get_justified_qrc(&def, &j1, 2).unwrap().len());
+
+    let j2 = vec![
+        new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 1, 7, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 1, 7, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 2, 1, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 3, 1, 7, 0, 0, 0, None),
+    ];
+    assert_eq!(Some(7), contains_justified_qrc(&def, &j2, 2));
+    assert!(get_justified_qrc(&def, &j2, 2).unwrap().len() >= 6);
+}
+
+#[test]
+fn filter_msgs_keeps_one_per_source() {
+    let msgs = vec![
+        new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 2, 1, 7, 0, 0, 0, None),
+    ];
+
+    let filtered = filter_msgs(&msgs, MSG_PREPARE, 1, Some(&7), None, None);
+
+    assert_eq!(2, filtered.len());
+    assert_eq!(
+        vec![1, 2],
+        filtered.iter().map(|msg| msg.source()).collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn compare_success_error_cached_value_source_and_timeout() {
+    let cts = CancellationTokenSource::new();
+    let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None);
+    let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    let timer = mpmc::never();
+    let mut def = noop_definition();
+    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
+        return_value
+            .send(*input_value_source)
+            .expect(WRITE_CHAN_ERR);
+        send_compare_result(return_err, return_value, Ok(()));
+    });
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
+        Ok(0)
+    ));
+
+    let mut def = noop_definition();
+    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
+        return_value
+            .send(*input_value_source)
+            .expect(WRITE_CHAN_ERR);
+        send_compare_result(return_err, return_value, Err(QbftError::CompareError));
+    });
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
+        Err(QbftError::CompareError)
+    ));
+
+    let (vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    vs_tx.send(42).expect(WRITE_CHAN_ERR);
+    let mut def = noop_definition();
+    def.compare = Box::new(
+        |_, _, input_value_source_ch, input_value_source, return_err, return_value| {
+            let cached = if *input_value_source == 0 {
+                let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
+                return_value.send(value).expect(WRITE_CHAN_ERR);
+                value
+            } else {
+                *input_value_source
+            };
+            assert_eq!(42, cached);
+            send_compare_result(return_err, return_value, Ok(()));
+        },
+    );
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
+        Ok(42)
+    ));
+
+    let (timer_tx, timer_rx) = mpmc::bounded(1);
+    timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR);
+    let mut def = noop_definition();
+    def.compare = Box::new(|_, _, _, _, return_err, _| {
+        thread::sleep(Duration::from_millis(20));
+        let _ = return_err.send(Ok(()));
+    });
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx),
+        Err(QbftError::TimeoutError)
+    ));
+}
+
+fn buffer_by_source(msgs: &[Msg<i64, i64, i64>]) -> HashMap<i64, Vec<Msg<i64, i64, i64>>> {
+    let mut buffer = HashMap::new();
+    for msg in msgs {
+        buffer
+            .entry(msg.source())
+            .or_insert_with(Vec::new)
+            .push(msg.clone());
+    }
+    buffer
+}
+
+#[derive(Debug)]
+struct ChainSplitTest {
+    value_source: HashMap<i64, i64>,
+    decide_round: i32,
+    prepared_val: i32,
+    should_halt: bool,
+}
+
+#[test]
+fn chain_split_same_value() {
+    test_qbft_chain_split(ChainSplitTest {
+        decide_round: 1,
+        value_source: HashMap::from([(1, 1), (2, 1), (3, 1), (4, 1)]),
+        prepared_val: 1,
+        should_halt: false,
+    });
+}
+
+#[test]
+fn chain_split_non_leader_peer_has_different_value() {
+    test_qbft_chain_split(ChainSplitTest {
+        decide_round: 1,
+        value_source: HashMap::from([(1, 1), (2, 3), (3, 1), (4, 1)]),
+        prepared_val: 1,
+        should_halt: false,
+    });
+}
+
+#[test]
+fn chain_split_first_leader_has_different_value_second_leader_succeeds() {
+    test_qbft_chain_split(ChainSplitTest {
+        decide_round: 2,
+        value_source: HashMap::from([(1, 3), (2, 1), (3, 1), (4, 1)]),
+        prepared_val: 1,
+        should_halt: false,
+    });
+}
+
+#[test]
+fn zz_chain_split_no_consensus_halt() {
+    test_qbft_chain_split(ChainSplitTest {
+        decide_round: 0,
+        value_source: HashMap::from([(1, 1), (2, 1), (3, 3), (4, 3)]),
+        prepared_val: 0,
+        should_halt: true,
+    });
+}
+
+fn test_qbft_chain_split(test: ChainSplitTest) {
+    const N: usize = 4;
+    const MAX_ROUND: i64 = 10;
+    const FIFO_LIMIT: i64 = 100;
+
+    let clock = FakeClock::new(time::Instant::now());
+    let cts = CancellationTokenSource::new();
+    let trace = Trace::new();
+    let mut receives = BTreeMap::<
+        i64,
+        (
+            mpmc::Sender<Msg<i64, i64, i64>>,
+            mpmc::Receiver<Msg<i64, i64, i64>>,
+        ),
+    >::new();
+    let (broadcast_tx, broadcast_rx) = mpmc::unbounded::<Msg<i64, i64, i64>>();
+    let (result_chan_tx, result_chan_rx) = mpmc::bounded::<Vec<Msg<i64, i64, i64>>>(N);
+    let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N);
+    let instance = 0;
+
+    let defs = Arc::new(Definition {
+        is_leader: Box::new(make_is_leader(N as i64)),
+        new_timer: {
+            let clock = clock.clone();
+            Box::new(move |round| {
+                clock.new_timer(Duration::from_secs(u64::pow(2, (round as u32) - 1)))
+            })
+        },
+        decide: {
+            let result_chan_tx = result_chan_tx.clone();
+            Box::new(move |_, _, _, q_commit| {
+                result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
+            })
+        },
+        compare: Box::new(
+            |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| {
+                let leader_value_source = qcommit.value_source().expect("value source");
+                let local = if *input_value_source == 0 {
+                    let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
+                    return_value.send(value).expect(WRITE_CHAN_ERR);
+                    value
+                } else {
+                    *input_value_source
+                };
+
+                if leader_value_source != local {
+                    return_value.send(local).expect(WRITE_CHAN_ERR);
+                    send_compare_result(return_err, return_value, Err(QbftError::CompareError));
+                    return;
+                }
+
+                return_value.send(local).expect(WRITE_CHAN_ERR);
+                send_compare_result(return_err, return_value, Ok(()));
+            },
+        ),
+        nodes: N as i64,
+        fifo_limit: FIFO_LIMIT,
+        log_round_change: {
+            let clock = clock.clone();
+            let trace = trace.clone();
+            Box::new(move |_, process, round, new_round, upon_rule, _| {
+                trace.push(format!(
+                    "{:?} - {}@{} change to {} ~= {}",
+                    clock.elapsed(),
+                    process,
+                    round,
+                    new_round,
+                    upon_rule
+                ));
+            })
+        },
+        log_unjust: {
+            let trace = trace.clone();
+            Box::new(move |_, process, msg| {
+                trace.push(format!("Unjust: process={} msg={:?}", process, msg))
+            })
+        },
+        log_upon_rule: {
+            let clock = clock.clone();
+            let trace = trace.clone();
+            Box::new(move |_, process, round, msg, upon_rule| {
+                trace.push(format!(
+                    "{:?} {} => {}@{} -> {}@{} ~= {}",
+                    clock.elapsed(),
+                    msg.source(),
+                    msg.type_(),
+                    msg.round(),
+                    process,
+                    round,
+                    upon_rule
+                ));
+            })
+        },
+    });
+
+    thread::scope(|s| {
+        for i in 1..=N as i64 {
+            let (sender, receiver) = mpmc::bounded::<Msg<i64, i64, i64>>(1000);
+            receives.insert(i, (sender.clone(), receiver.clone()));
+            let broadcast_tx = broadcast_tx.clone();
+            let trace = trace.clone();
+            let clock = clock.clone();
+
+            let transport = Transport {
+                broadcast: Box::new(
+                    move |_, type_, instance, source, round, value, pr, pv, justification| {
+                        if round > MAX_ROUND {
+                            return Err(QbftError::MaxRoundReached);
+                        }
+
+                        trace.push(format!(
+                            "{:?} {} => {}@{}",
+                            clock.elapsed(),
+                            source,
+                            type_,
+                            round
+                        ));
+                        let msg = new_msg(
+                            type_,
+                            *instance,
+                            source,
+                            round,
+                            *value,
+                            *value,
+                            pr,
+                            *pv,
+                            justification,
+                        );
+                        sender.send(msg.clone()).expect(WRITE_CHAN_ERR);
+                        broadcast_tx.send(msg).expect(WRITE_CHAN_ERR);
+                        Ok(())
+                    },
+                ),
+                receive: receiver,
+            };
+
+            let token = cts.token().clone();
+            let defs = defs.clone();
+            let run_chan_tx = run_chan_tx.clone();
+            let value_source = test.value_source[&i];
+            s.spawn(move || {
+                let (v_tx, v_rx) = mpmc::bounded::<i64>(1);
+                let (vs_tx, vs_rx) = mpmc::unbounded::<i64>();
+                v_tx.send(value_source).expect(WRITE_CHAN_ERR);
+                for _ in 0..=MAX_ROUND {
+                    vs_tx.send(value_source).expect(WRITE_CHAN_ERR);
+                }
+                let run_result = panic::catch_unwind(AssertUnwindSafe(|| {
+                    qbft::run(&token, &defs, &transport, &instance, i, v_rx, vs_rx)
+                }));
+                drop(v_tx);
+                drop(vs_tx);
+                run_chan_tx.send((i, run_result)).expect(WRITE_CHAN_ERR);
+            });
+        }
+
+        let mut results = BTreeMap::<i64, Msg<i64, i64, i64>>::new();
+        let mut count = 0;
+        let mut decided = false;
+        let mut done = 0;
+
+        loop {
+            mpmc::select! {
+            recv(broadcast_rx) -> msg => {
+                let msg = msg.expect(READ_CHAN_ERR);
+                for (target, (out_tx, _)) in receives.iter() {
+                    if *target == msg.source() {
+                        continue;
+                    }
+                    out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+                    if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, 1) < 0.1 {
+                        out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
+                    }
+                }
+            }
+            recv(result_chan_rx) -> res => {
+                let q_commit = res.expect(READ_CHAN_ERR);
+                for commit in q_commit.clone() {
+                    for previous in results.values() {
+                        if previous.value() != commit.value() {
+                            cts.cancel();
+                            clock.cancel();
+                            panic!(
+                                "chain split commit values differ: previous={:?} commit={:?} elapsed={:?}\n{}",
+                                previous,
+                                commit,
+                                clock.elapsed(),
+                                trace.dump()
+                            );
+                        }
+                    }
+                    if i64::from(test.decide_round) != commit.round() {
+                        cts.cancel();
+                        clock.cancel();
+                        panic!(
+                            "chain split wrong decide round: want={} got={} commit={:?} elapsed={:?}\n{}",
+                            test.decide_round,
+                            commit.round(),
+                            commit,
+                            clock.elapsed(),
+                            trace.dump()
+                        );
+                    }
+                    if test.prepared_val != 0 {
+                        if i64::from(test.prepared_val) != commit.value() {
+                            cts.cancel();
+                            clock.cancel();
+                            panic!(
+                                "chain split wrong prepared value: want={} got={} commit={:?} elapsed={:?}\n{}",
+                                test.prepared_val,
+                                commit.value(),
+                                commit,
+                                clock.elapsed(),
+                                trace.dump()
+                            );
+                        }
+                    }
+                    results.insert(commit.source(), commit);
+                }
+                count += 1;
+                if count == N {
+                    decided = true;
+                    clock.cancel();
+                    cts.cancel();
+                }
+            }
+            recv(run_chan_rx) -> res => {
+                let (node, outcome) = res.expect(READ_CHAN_ERR);
+                let expected_halt = test.should_halt
+                    && outcome_is_error(&outcome, |err| matches!(err, QbftError::MaxRoundReached));
+                if !(decided || expected_halt) {
+                    cts.cancel();
+                    clock.cancel();
+                    panic!(
+                        "unexpected chain split run error: node={} outcome={} decided={} done={} count={} elapsed={:?}\n{}",
+                        node,
+                        format_run_outcome(&outcome),
+                        decided,
+                        done,
+                        count,
+                        clock.elapsed(),
+                        trace.dump()
+                    );
+                }
+                done += 1;
+                if done == N {
+                    if test.should_halt {
+                        assert!(!decided, "halt case unexpectedly decided");
+                    }
+                    return;
+                }
+            }
+            default => {
+                thread::sleep(Duration::from_micros(1));
+                let tick = if test.should_halt {
+                    Duration::from_millis(100)
+                } else {
+                    Duration::from_millis(1)
+                };
+                clock.advance(tick);
+                let limit = if test.should_halt {
+                    Duration::from_secs(600)
+                } else {
+                    Duration::from_secs(60)
+                };
+                if clock.elapsed() > limit {
+                    cts.cancel();
+                    clock.cancel();
+                    panic!("chain split hang: decided={decided} done={done} count={count} elapsed={:?}\n{}", clock.elapsed(), trace.dump());
+                }
+            }
+            }
+        }
+    });
+}

From 0ec38dede4cac52ff4ba21a1f51d160c89efb4a9 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 10:22:23 +0700
Subject: [PATCH 02/28] fix: compare run on retached thread

---
 crates/core/src/qbft/fake_clock.rs    |   4 +-
 crates/core/src/qbft/internal_test.rs | 231 ++++++++++++++++++++------
 crates/core/src/qbft/mod.rs           | 169 +++++++++++--------
 3 files changed, 283 insertions(+), 121 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 3b7baeea..0a3ab477 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -1,6 +1,6 @@
 use crossbeam::channel as mpmc;
 use std::{
-    collections::BTreeMap,
+    collections::HashMap,
     sync::{Arc, Mutex},
     thread,
     time::{Duration, Instant},
@@ -15,7 +15,7 @@ struct FakeClockInner {
     start: Instant,
     now: Instant,
     last_id: usize,
-    clients: BTreeMap<usize, (mpmc::Sender<Instant>, Instant)>,
+    clients: HashMap<usize, (mpmc::Sender<Instant>, Instant)>,
 }
 
 impl FakeClock {
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 5c5659f2..ea072f94 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -12,6 +12,7 @@ use std::{
 
 const WRITE_CHAN_ERR: &str = "Failed to write to channel";
 const READ_CHAN_ERR: &str = "Failed to read from channel";
+// Fixed seed for deterministic chain-split duplicate-message simulation.
 const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c;
 
 type RunOutcome = std::thread::Result<Result<()>>;
@@ -66,6 +67,9 @@ fn test_qbft(test: Test) {
     let clock = FakeClock::new(start_time);
 
     let cts = CancellationTokenSource::new();
+    // Keep peer iteration deterministic. These fake-clock tests assert exact
+    // rounds, and broadcast fanout order affects which node observes quorums
+    // first when tests run in parallel.
     let mut receives = BTreeMap::<
         i64,
         (
@@ -102,11 +106,8 @@ fn test_qbft(test: Test) {
                 result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
             })
         },
-        compare: Box::new(|_, _, _, input_value_source, return_err, return_value| {
-            return_value
-                .send(*input_value_source)
-                .expect(WRITE_CHAN_ERR);
-            send_compare_result(return_err, return_value, Ok(()));
+        compare: Arc::new(|_, _, _, _, return_err, _| {
+            return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
         }),
         nodes: N as i64,
         fifo_limit: FIFO_LIMIT as i64,
@@ -329,6 +330,10 @@ fn test_qbft(test: Test) {
                 &clock,
             );
 
+            if decided {
+                next_fuzz_at = None;
+            }
+
             while let Some(next) = next_fuzz_at {
                 if clock.elapsed() < next {
                     break;
@@ -467,6 +472,7 @@ fn test_qbft(test: Test) {
 
                     // Trigger shutdown
                     decided = true;
+                    next_fuzz_at = None;
 
                     clock.cancel();
                     cts.cancel();
@@ -567,21 +573,6 @@ fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> b
     matches!(outcome, Ok(Err(err)) if expected(err))
 }
 
-fn send_compare_result<C: Send + 'static>(
-    return_err: &mpmc::Sender<Result<()>>,
-    return_value: &mpmc::Sender<C>,
-    result: Result<()>,
-) {
-    // Keep the auxiliary compare channels alive until the result is consumed.
-    // Go channels do not close when the callback returns; Rust senders do.
-    let keepalive = return_value.clone();
-    thread::spawn(move || {
-        thread::sleep(Duration::from_millis(50));
-        drop(keepalive);
-    });
-    return_err.send(result).expect(WRITE_CHAN_ERR);
-}
-
 fn test_seed(test: &Test) -> u64 {
     let mut seed = 0x5142_4654_5445_5354_u64;
     seed ^= test.instance as u64;
@@ -1131,7 +1122,7 @@ fn noop_definition() -> Definition<i64, i64, i64> {
         is_leader: Box::new(|_, _, _| false),
         new_timer: Box::new(|_| (mpmc::never(), Box::new(|| {}))),
         decide: Box::new(|_, _, _, _| {}),
-        compare: Box::new(|_, _, _, _, _, _| {}),
+        compare: Arc::new(|_, _, _, _, _, _| {}),
         nodes: 0,
         fifo_limit: 0,
         log_round_change: Box::new(|_, _, _, _, _, _| {}),
@@ -1253,11 +1244,8 @@ fn duplicate_pre_prepare_rules() {
 
         panic!("unexpected round {}", round);
     });
-    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
-        return_value
-            .send(*input_value_source)
-            .expect(WRITE_CHAN_ERR);
-        send_compare_result(return_err, return_value, Ok(()));
+    def.compare = Arc::new(|_, _, _, _, return_err, _| {
+        return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
     });
 
     let (r_chan_tx, r_chan_rx) = mpmc::bounded::<Msg<i64, i64, i64>>(2);
@@ -1285,6 +1273,35 @@ fn duplicate_pre_prepare_rules() {
     assert!(res.is_ok());
 }
 
+#[test]
+fn idle_run_returns_when_cancelled() {
+    let cts = CancellationTokenSource::new();
+    let token = cts.token().clone();
+    let def = noop_definition();
+    let transport = noop_transport();
+    let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
+    let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
+    let (done_tx, done_rx) = mpmc::bounded(1);
+
+    thread::spawn(move || {
+        done_tx
+            .send(qbft::run(
+                &token, &def, &transport, &0, 1, input_rx, source_rx,
+            ))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    thread::sleep(Duration::from_millis(10));
+    cts.cancel();
+
+    assert!(matches!(
+        done_rx
+            .recv_timeout(Duration::from_millis(100))
+            .expect("idle run must unblock on cancellation"),
+        Ok(())
+    ));
+}
+
 #[test]
 fn classify_rules() {
     let mut def = noop_definition();
@@ -1379,33 +1396,42 @@ fn compare_success_error_cached_value_source_and_timeout() {
     let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
     let timer = mpmc::never();
     let mut def = noop_definition();
-    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
-        return_value
-            .send(*input_value_source)
-            .expect(WRITE_CHAN_ERR);
-        send_compare_result(return_err, return_value, Ok(()));
+    def.compare = Arc::new(|_, _, _, _, return_err, _| {
+        return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
     });
     assert!(matches!(
         compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
-        Ok(0)
+        (0, Ok(()))
+    ));
+
+    let mut def = noop_definition();
+    def.compare = Arc::new(|_, _, _, _, return_err, _| {
+        let return_err = return_err.clone();
+        thread::spawn(move || {
+            thread::sleep(Duration::from_millis(10));
+            return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+        });
+    });
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 41, &timer),
+        (41, Ok(()))
     ));
 
     let mut def = noop_definition();
-    def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| {
-        return_value
-            .send(*input_value_source)
+    def.compare = Arc::new(|_, _, _, _, return_err, _| {
+        return_err
+            .send(Err(QbftError::CompareError))
             .expect(WRITE_CHAN_ERR);
-        send_compare_result(return_err, return_value, Err(QbftError::CompareError));
     });
     assert!(matches!(
         compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
-        Err(QbftError::CompareError)
+        (0, Err(QbftError::CompareError))
     ));
 
     let (vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
     vs_tx.send(42).expect(WRITE_CHAN_ERR);
     let mut def = noop_definition();
-    def.compare = Box::new(
+    def.compare = Arc::new(
         |_, _, input_value_source_ch, input_value_source, return_err, return_value| {
             let cached = if *input_value_source == 0 {
                 let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
@@ -1415,27 +1441,121 @@ fn compare_success_error_cached_value_source_and_timeout() {
                 *input_value_source
             };
             assert_eq!(42, cached);
-            send_compare_result(return_err, return_value, Ok(()));
+            return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+        },
+    );
+    assert!(matches!(
+        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
+        (42, Ok(()))
+    ));
+
+    let (vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    vs_tx.send(43).expect(WRITE_CHAN_ERR);
+    let mut def = noop_definition();
+    def.compare = Arc::new(
+        |_, _, input_value_source_ch, input_value_source, return_err, return_value| {
+            let cached = if *input_value_source == 0 {
+                let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
+                return_value.send(value).expect(WRITE_CHAN_ERR);
+                value
+            } else {
+                *input_value_source
+            };
+            assert_eq!(43, cached);
+            return_err
+                .send(Err(QbftError::CompareError))
+                .expect(WRITE_CHAN_ERR);
         },
     );
     assert!(matches!(
         compare(cts.token(), &def, &msg, &vs_rx, 0, &timer),
-        Ok(42)
+        (43, Err(QbftError::CompareError))
     ));
 
     let (timer_tx, timer_rx) = mpmc::bounded(1);
     timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR);
     let mut def = noop_definition();
-    def.compare = Box::new(|_, _, _, _, return_err, _| {
+    def.compare = Arc::new(|_, _, _, _, return_err, _| {
         thread::sleep(Duration::from_millis(20));
         let _ = return_err.send(Ok(()));
     });
     assert!(matches!(
-        compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx),
-        Err(QbftError::TimeoutError)
+        compare(cts.token(), &def, &msg, &vs_rx, 44, &timer_rx),
+        (44, Err(QbftError::TimeoutError))
+    ));
+}
+
+#[test]
+fn compare_timeout_does_not_wait_for_blocked_callback() {
+    let cts = CancellationTokenSource::new();
+    let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None);
+    let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    let (timer_tx, timer_rx) = mpmc::bounded(1);
+    timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR);
+
+    let mut def = noop_definition();
+    def.compare = Arc::new(|ct, _, _, _, return_err, _| {
+        while !ct.is_canceled() {
+            thread::sleep(Duration::from_millis(1));
+        }
+        let _ = return_err.send(Ok(()));
+    });
+
+    let (result_tx, result_rx) = mpmc::bounded(1);
+    thread::spawn(move || {
+        result_tx
+            .send(compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    assert!(matches!(
+        result_rx
+            .recv_timeout(Duration::from_millis(100))
+            .expect("compare must return on timer without waiting for blocked callback"),
+        (0, Err(QbftError::TimeoutError))
     ));
 }
 
+#[test]
+fn compare_parent_cancel_cancels_callback_token() {
+    let cts = CancellationTokenSource::new();
+    let token = cts.token().clone();
+    let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None);
+    let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    let (timer_tx, timer_rx) = mpmc::bounded(1);
+    let (token_cancelled_tx, token_cancelled_rx) = mpmc::bounded(1);
+
+    let mut def = noop_definition();
+    def.compare = Arc::new(move |ct, _, _, _, return_err, _| {
+        while !ct.is_canceled() {
+            thread::sleep(Duration::from_millis(1));
+        }
+        token_cancelled_tx.send(()).expect(WRITE_CHAN_ERR);
+        return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+    });
+
+    let (result_tx, result_rx) = mpmc::bounded(1);
+    thread::spawn(move || {
+        result_tx
+            .send(compare(&token, &def, &msg, &vs_rx, 0, &timer_rx))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    thread::sleep(Duration::from_millis(10));
+    cts.cancel();
+
+    match result_rx.recv_timeout(Duration::from_millis(100)) {
+        Ok(result) => assert!(matches!(result, (0, Ok(())))),
+        Err(err) => {
+            let _ = timer_tx.send(time::Instant::now());
+            panic!("compare callback token must be canceled by parent token: {err}");
+        }
+    }
+    token_cancelled_rx
+        .recv_timeout(Duration::from_millis(100))
+        .expect("callback token must be canceled by parent token");
+}
+
 fn buffer_by_source(msgs: &[Msg<i64, i64, i64>]) -> HashMap<i64, Vec<Msg<i64, i64, i64>>> {
     let mut buffer = HashMap::new();
     for msg in msgs {
@@ -1503,6 +1623,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
     let clock = FakeClock::new(time::Instant::now());
     let cts = CancellationTokenSource::new();
     let trace = Trace::new();
+    // Keep peer iteration deterministic. These fake-clock tests assert exact
+    // rounds, and broadcast fanout order affects which node observes quorums
+    // first when tests run in parallel.
     let mut receives = BTreeMap::<
         i64,
         (
@@ -1529,7 +1652,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                 result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
             })
         },
-        compare: Box::new(
+        compare: Arc::new(
             |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| {
                 let leader_value_source = qcommit.value_source().expect("value source");
                 let local = if *input_value_source == 0 {
@@ -1541,13 +1664,13 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                 };
 
                 if leader_value_source != local {
-                    return_value.send(local).expect(WRITE_CHAN_ERR);
-                    send_compare_result(return_err, return_value, Err(QbftError::CompareError));
+                    return_err
+                        .send(Err(QbftError::CompareError))
+                        .expect(WRITE_CHAN_ERR);
                     return;
                 }
 
-                return_value.send(local).expect(WRITE_CHAN_ERR);
-                send_compare_result(return_err, return_value, Ok(()));
+                return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
             },
         ),
         nodes: N as i64,
@@ -1637,11 +1760,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
             let value_source = test.value_source[&i];
             s.spawn(move || {
                 let (v_tx, v_rx) = mpmc::bounded::<i64>(1);
-                let (vs_tx, vs_rx) = mpmc::unbounded::<i64>();
+                let (vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
                 v_tx.send(value_source).expect(WRITE_CHAN_ERR);
-                for _ in 0..=MAX_ROUND {
-                    vs_tx.send(value_source).expect(WRITE_CHAN_ERR);
-                }
+                vs_tx.send(value_source).expect(WRITE_CHAN_ERR);
                 let run_result = panic::catch_unwind(AssertUnwindSafe(|| {
                     qbft::run(&token, &defs, &transport, &instance, i, v_rx, vs_rx)
                 }));
@@ -1756,7 +1877,11 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                 };
                 clock.advance(tick);
                 let limit = if test.should_halt {
-                    Duration::from_secs(600)
+                    let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32");
+                    let seconds = 1_u64
+                        .checked_shl(max_round.checked_add(1).expect("MAX_ROUND permits timeout limit"))
+                        .expect("MAX_ROUND permits timeout limit");
+                    Duration::from_secs(seconds)
                 } else {
                     Duration::from_secs(60)
                 };
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 41d0a2d5..e444373b 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -20,7 +20,7 @@
 #![allow(clippy::cast_possible_truncation)]
 #![allow(clippy::arithmetic_side_effects)]
 
-use cancellation::CancellationToken;
+use cancellation::{CancellationToken, CancellationTokenSource};
 use crossbeam::channel as mpmc;
 use std::{
     any,
@@ -33,6 +33,19 @@ use std::{
 
 type Result<T> = std::result::Result<T, QbftError>;
 
+const RUN_CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1);
+
+type CompareFn<I, V, C> = dyn Fn(
+        /* ct */ &CancellationToken,
+        /* qcommit */ &Msg<I, V, C>,
+        /* input_value_source_ch */ &mpmc::Receiver<C>,
+        /* input_value_source */ &C,
+        /* return_err */ &mpmc::Sender<Result<()>>,
+        /* return_value */ &mpmc::Sender<C>,
+    ) + Send
+    + Sync
+    + 'static;
+
 #[derive(Debug, thiserror::Error)]
 pub enum QbftError {
     #[error("Timeout")]
@@ -41,6 +54,9 @@ pub enum QbftError {
     #[error("Compare leader value with local value failed")]
     CompareError,
 
+    #[error("bug: expected only comparison or timeout error")]
+    UnexpectedCompareError,
+
     #[error("Maximum round reached")]
     MaxRoundReached,
 
@@ -99,20 +115,10 @@ where
             + Sync,
     >,
 
-    /// Called when leader proposes value and we compare it with our local
-    /// value. It's an opt-in feature that should instantly return `None` on
-    /// `return_err` channel if it is not turned on.
-    pub compare: Box<
-        dyn Fn(
-                /* ct */ &CancellationToken,
-                /* qcommit */ &Msg<I, V, C>,
-                /* input_value_source_ch */ &mpmc::Receiver<C>,
-                /* input_value_source */ &C,
-                /* return_err */ &mpmc::Sender<Result<()>>,
-                /* return_value */ &mpmc::Sender<C>,
-            ) + Send
-            + Sync,
-    >,
+    /// Charon parity hook called when the leader proposes a value. The core
+    /// algorithm only runs this callback and reacts to its result; any
+    /// value-source comparison policy belongs to the caller.
+    pub compare: sync::Arc<CompareFn<I, V, C>>,
 
     /// Called when consensus has been reached on a value.
     pub decide: Box<
@@ -194,12 +200,14 @@ pub const MSG_DECIDED: MessageType = MessageType(5);
 const MSG_SENTINEL: MessageType = MessageType(6); // intentionally not public
 
 impl MessageType {
+    /// Returns true when the message type is one of the known QBFT wire types.
     pub fn valid(&self) -> bool {
         self.0 > MSG_UNKNOWN.0 && self.0 < MSG_SENTINEL.0
     }
 }
 
 impl Display for MessageType {
+    /// Formats the message type using the stable wire/debug label.
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let s = match self.0 {
             0 => "unknown",
@@ -260,6 +268,7 @@ pub const UPON_JUSTIFIED_DECIDED: UponRule = UponRule(7);
 pub const UPON_ROUND_TIMEOUT: UponRule = UponRule(8); // This is not triggered by a message, but by a timer.
 
 impl Display for UponRule {
+    /// Formats the upon-rule using the stable debug label.
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let s = match self.0 {
             0 => "nothing",
@@ -284,11 +293,12 @@ struct DedupKey {
     round: i64,
 }
 
-/// Executes the consensus algorithm until the context is closed.
-/// The generic type `I` is the instance of consensus and can be anything.
-/// The generic type `V` is the arbitrary data value being proposed; it only
-/// requires an Equal method. The generic type `C` is the compare value, used to
-/// compare leader's proposed value with local value and can be anything.
+/// Executes one QBFT consensus instance until it decides, errors, or is
+/// cancelled.
+///
+/// `I` identifies the consensus instance, `V` is the comparable proposed value,
+/// and `C` is the application value used by `Definition::compare` to compare a
+/// leader proposal with the local input source.
 pub fn run<I, V, C>(
     ct: &CancellationToken,
     d: &Definition<I, V, C>,
@@ -299,8 +309,9 @@ pub fn run<I, V, C>(
     input_value_source_ch: mpmc::Receiver<C>,
 ) -> Result<()>
 where
-    V: PartialEq + Eq + Hash + Default,
-    C: Clone + Send + Sync + Default,
+    I: Send + Sync + 'static,
+    V: PartialEq + Eq + Hash + Default + 'static,
+    C: Clone + Send + Sync + Default + 'static,
 {
     // === State ===
     let round: Cell<i64> = Cell::new(1);
@@ -414,7 +425,7 @@ where
         (timer_chan, stop_timer) = (d.new_timer)(round.get());
     }
 
-    while !ct.is_canceled() {
+    loop {
         mpmc::select! {
             recv(input_value_ch) -> result => {
                 let iv = result?;
@@ -472,7 +483,7 @@ where
                         stop_timer();
                         (timer_chan, stop_timer) = (d.new_timer)(round.get());
 
-                        let compare_result = compare(
+                        let (new_input_value_source, compare_result) = compare(
                             ct,
                             d,
                             &msg,
@@ -480,12 +491,10 @@ where
                             input_value_source.clone(),
                             &timer_chan,
                         );
+                        input_value_source = new_input_value_source;
 
                         match compare_result {
-                            Ok(v) => {
-                                input_value_source = v;
-                                broadcast_msg(MSG_PREPARE, &msg.value(), None)?;
-                            }
+                            Ok(()) => broadcast_msg(MSG_PREPARE, &msg.value(), None)?,
                             Err(qbft_err) => {
                                 match qbft_err {
                                     QbftError::CompareError => {
@@ -503,7 +512,7 @@ where
 
                                         broadcast_round_change()?;
                                     }
-                                    _ => panic!("bug: expected only {} or {} error", QbftError::CompareError, QbftError::TimeoutError)
+                                    _ => return Err(QbftError::UnexpectedCompareError),
                                 }
                             }
                         }
@@ -579,7 +588,7 @@ where
                 broadcast_round_change()?;
             }
 
-            default => {
+            default(RUN_CANCELLATION_POLL_INTERVAL) => {
                 if ct.is_canceled() {
                     break;
                 }
@@ -590,6 +599,9 @@ where
     Ok(())
 }
 
+/// The callback may cache the local input source and return success/failure.
+/// This helper only preserves that callback result and lets the round timer win
+/// if the callback blocks.
 fn compare<I, V, C>(
     ct: &CancellationToken,
     d: &Definition<I, V, C>,
@@ -597,13 +609,14 @@ fn compare<I, V, C>(
     input_value_source_ch: &mpmc::Receiver<C>,
     input_value_source: C,
     timer_chan: &mpmc::Receiver<time::Instant>,
-) -> Result<C>
+) -> (C, Result<()>)
 where
-    V: PartialEq,
-    C: Clone + Send + Sync,
+    I: Send + Sync + 'static,
+    V: PartialEq + 'static,
+    C: Clone + Send + Sync + 'static,
 {
     let (compare_err_tx, compare_err_rx) = mpmc::bounded::<Result<()>>(1);
-    let (compare_value_tx, compare_value_rx) = mpmc::bounded::<C>(1);
+    let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::<C>(1);
 
     // d.Compare has 2 roles:
     // 1. Read from the `input_value_source_ch` (if `input_value_source` is empty).
@@ -615,46 +628,69 @@ where
     // If comparison or any other unexpected error occurs, the error is returned on
     // `compare_err` channel.
 
-    thread::scope(|s| {
-        let mut result = input_value_source.clone();
-        let compare = &d.compare;
+    let mut result = input_value_source.clone();
+    let compare = d.compare.clone();
+    let compare_cts = sync::Arc::new(CancellationTokenSource::new());
+    let compare_ct = compare_cts.token().clone();
+    let msg = msg.clone();
+    let input_value_source_ch = input_value_source_ch.clone();
+
+    thread::spawn(move || {
+        (compare)(
+            &compare_ct,
+            &msg,
+            &input_value_source_ch,
+            &input_value_source,
+            &compare_err_tx,
+            &compare_value_tx,
+        );
+    });
 
-        s.spawn(move || {
-            (compare)(
-                ct,
-                msg,
-                input_value_source_ch,
-                &input_value_source,
-                &compare_err_tx,
-                &compare_value_tx,
-            );
-        });
-
-        loop {
-            mpmc::select! {
-                recv(compare_err_rx) -> msg => {
-                    let err = msg?;
-
-                    return match err {
-                        Ok(_) => Ok(result),
-                        Err(_) => Err(QbftError::CompareError),
-                    };
-                },
-
-                recv(compare_value_rx) -> msg => {
-                    let value = msg?;
+    loop {
+        mpmc::select! {
+            recv(compare_err_rx) -> msg => {
+                let err = match msg {
+                    Ok(err) => err,
+                    Err(err) => {
+                        compare_cts.cancel();
+                        return (result, Err(QbftError::ChannelError(err)));
+                    }
+                };
 
+                while let Ok(value) = compare_value_rx.try_recv() {
                     result = value;
-                },
+                }
 
-                recv(timer_chan) -> msg => {
-                    msg?;
+                compare_cts.cancel();
+                return match err {
+                    Ok(()) => (result, Ok(())),
+                    Err(_) => (result, Err(QbftError::CompareError)),
+                };
+            },
 
-                    return Err(QbftError::TimeoutError);
+            recv(compare_value_rx) -> msg => {
+                match msg {
+                    Ok(value) => result = value,
+                    Err(_) => compare_value_rx = mpmc::never(),
+                }
+            },
+
+            recv(timer_chan) -> msg => {
+                compare_cts.cancel();
+                if let Err(err) = msg {
+                    return (result, Err(QbftError::ChannelError(err)));
+                }
+
+                return (result, Err(QbftError::TimeoutError));
+            }
+
+            default(RUN_CANCELLATION_POLL_INTERVAL) => {
+                if ct.is_canceled() {
+                    compare_cts.cancel();
                 }
             }
         }
-    })
+    }
 }
 
 /// Returns all messages from the provided round.
@@ -1122,6 +1158,7 @@ where
     value: V,
 }
 
+/// Returns all unique-source PREPARE quorums grouped by identical round and value.
 fn get_prepare_quorums<I, V, C>(
     d: &Definition<I, V, C>,
     all: &Vec<Msg<I, V, C>>,

From b752caaf8b9d09a8847c44c27141832f14dba1f8 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 10:26:17 +0700
Subject: [PATCH 03/28] fix: removed hard coded salt in tests

---
 crates/core/src/qbft/internal_test.rs | 49 +++++++++++++++++----------
 crates/core/src/qbft/mod.rs           |  3 +-
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index ea072f94..5f32d92f 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -14,6 +14,15 @@ const WRITE_CHAN_ERR: &str = "Failed to write to channel";
 const READ_CHAN_ERR: &str = "Failed to read from channel";
 // Fixed seed for deterministic chain-split duplicate-message simulation.
 const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c;
+const TEST_STREAM_DROP: u64 = 1;
+const TEST_STREAM_DUPLICATE: u64 = 2;
+const TEST_STREAM_JITTER: u64 = 3;
+const TEST_STREAM_DELAY_ORDER: u64 = 4;
+const TEST_STREAM_MSG_TYPE: u64 = 10;
+const TEST_STREAM_MSG_ROUND: u64 = 11;
+const TEST_STREAM_MSG_VALUE: u64 = 12;
+const TEST_STREAM_MSG_PREPARED_ROUND: u64 = 13;
+const TEST_STREAM_MSG_PREPARED_VALUE: u64 = 14;
 
 type RunOutcome = std::thread::Result<Result<()>>;
 type TestMsgRef = Msg<i64, i64, i64>;
@@ -650,7 +659,8 @@ fn bcast(
         return;
     }
 
-    let delta_ms = (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, 3)) as i32;
+    let delta_ms =
+        (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, TEST_STREAM_JITTER)) as i32;
     let delay = Duration::from_millis((jitter_ms + delta_ms) as u64);
     trace.push(format!(
         "{:?} {} => {}@{} (bcast delay {:?})",
@@ -660,7 +670,7 @@ fn bcast(
         msg.round(),
         delay
     ));
-    let key = deterministic_msg_u64(seed, &msg, 0, 4);
+    let key = deterministic_msg_u64(seed, &msg, 0, TEST_STREAM_DELAY_ORDER);
     broadcast
         .send(BroadcastEvent::Delayed(PendingBroadcast {
             deliver_at: clock.elapsed() + delay,
@@ -706,7 +716,7 @@ fn fanout_broadcast(
         }
 
         if let Some(p) = drop_prob.get(&msg.source()) {
-            if deterministic_unit(seed, &msg, *target, 1) < *p {
+            if deterministic_unit(seed, &msg, *target, TEST_STREAM_DROP) < *p {
                 trace.push(format!(
                     "{:?} {} => {}@{} => {} (dropped)",
                     clock.elapsed(),
@@ -722,7 +732,7 @@ fn fanout_broadcast(
         out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
         broadcasts += 1;
 
-        if deterministic_unit(seed, &msg, *target, 2) < 0.1 {
+        if deterministic_unit(seed, &msg, *target, TEST_STREAM_DUPLICATE) < 0.1 {
             out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
             broadcasts += 1;
             trace.push(format!(
@@ -748,25 +758,26 @@ fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg<i64,
         MSG_DECIDED,
     ];
     new_msg(
-        message_types[deterministic_range(seed, counter, 10, message_types.len())],
+        message_types
+            [deterministic_range(seed, counter, TEST_STREAM_MSG_TYPE, message_types.len())],
         instance,
         peer_idx,
-        deterministic_i64(seed, counter, 11, 10),
-        deterministic_i64(seed, counter, 12, 10),
+        deterministic_i64(seed, counter, TEST_STREAM_MSG_ROUND, 10),
+        deterministic_i64(seed, counter, TEST_STREAM_MSG_VALUE, 10),
         0,
-        deterministic_i64(seed, counter, 13, 10),
-        deterministic_i64(seed, counter, 14, 10),
+        deterministic_i64(seed, counter, TEST_STREAM_MSG_PREPARED_ROUND, 10),
+        deterministic_i64(seed, counter, TEST_STREAM_MSG_PREPARED_VALUE, 10),
         None,
     )
 }
 
-fn deterministic_unit(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, salt: u64) -> f64 {
-    let value = deterministic_msg_u64(seed, msg, target, salt) >> 11;
+fn deterministic_unit(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, stream_id: u64) -> f64 {
+    let value = deterministic_msg_u64(seed, msg, target, stream_id) >> 11;
     value as f64 / ((1_u64 << 53) as f64)
 }
 
-fn deterministic_msg_u64(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, salt: u64) -> u64 {
-    let mut value = splitmix64(seed ^ salt);
+fn deterministic_msg_u64(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, stream_id: u64) -> u64 {
+    let mut value = splitmix64(seed ^ stream_id);
     value = splitmix64(value ^ i64_to_u64(msg.type_().0));
     value = splitmix64(value ^ i64_to_u64(msg.instance()));
     value = splitmix64(value ^ i64_to_u64(msg.source()));
@@ -778,14 +789,14 @@ fn deterministic_msg_u64(seed: u64, msg: &Msg<i64, i64, i64>, target: i64, salt:
     splitmix64(value ^ i64_to_u64(target))
 }
 
-fn deterministic_range(seed: u64, counter: u64, salt: u64, upper: usize) -> usize {
+fn deterministic_range(seed: u64, counter: u64, stream_id: u64, upper: usize) -> usize {
     let upper = u64::try_from(upper).expect("upper fits in u64");
-    usize::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in usize")
+    usize::try_from(splitmix64(seed ^ counter ^ stream_id) % upper).expect("range fits in usize")
 }
 
-fn deterministic_i64(seed: u64, counter: u64, salt: u64, upper: i64) -> i64 {
+fn deterministic_i64(seed: u64, counter: u64, stream_id: u64, upper: i64) -> i64 {
     let upper = u64::try_from(upper).expect("upper is positive");
-    i64::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in i64")
+    i64::try_from(splitmix64(seed ^ counter ^ stream_id) % upper).expect("range fits in i64")
 }
 
 fn splitmix64(mut value: u64) -> u64 {
@@ -1786,7 +1797,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                         continue;
                     }
                     out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
-                    if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, 1) < 0.1 {
+                    if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, TEST_STREAM_DUPLICATE)
+                        < 0.1
+                    {
                         out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
                     }
                 }
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index e444373b..78f111d5 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -1158,7 +1158,8 @@ where
     value: V,
 }
 
-/// Returns all unique-source PREPARE quorums grouped by identical round and value.
+/// Returns all unique-source PREPARE quorums grouped by identical round and
+/// value.
 fn get_prepare_quorums<I, V, C>(
     d: &Definition<I, V, C>,
     all: &Vec<Msg<I, V, C>>,

From ed912eebf1bd871d3da44e884832a602752cf4d3 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 12:52:18 +0700
Subject: [PATCH 04/28] fix: return error on Context cancelled

---
 crates/core/src/qbft/internal_test.rs | 4 ++--
 crates/core/src/qbft/mod.rs           | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 5f32d92f..87ef8b98 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1281,7 +1281,7 @@ fn duplicate_pre_prepare_rules() {
         input_value_source_ch,
     );
 
-    assert!(res.is_ok());
+    assert!(matches!(res, Err(QbftError::ContextCanceled)));
 }
 
 #[test]
@@ -1309,7 +1309,7 @@ fn idle_run_returns_when_cancelled() {
         done_rx
             .recv_timeout(Duration::from_millis(100))
             .expect("idle run must unblock on cancellation"),
-        Ok(())
+        Err(QbftError::ContextCanceled)
     ));
 }
 
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 78f111d5..9cf6d163 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -57,6 +57,9 @@ pub enum QbftError {
     #[error("bug: expected only comparison or timeout error")]
     UnexpectedCompareError,
 
+    #[error("context canceled")]
+    ContextCanceled,
+
     #[error("Maximum round reached")]
     MaxRoundReached,
 
@@ -590,13 +593,11 @@ where
 
             default(RUN_CANCELLATION_POLL_INTERVAL) => {
                 if ct.is_canceled() {
-                    break;
+                    return Err(QbftError::ContextCanceled);
                 }
             }
         }
     }
-
-    Ok(())
 }
 
 /// The callback may cache the local input source and return success/failure.

From e01f64fc4969b7e2746e1d8ecc1f01c3375a04e1 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 14:13:28 +0700
Subject: [PATCH 05/28] fix: hash from string, not magic number

---
 crates/core/src/qbft/internal_test.rs | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 87ef8b98..e2e1b509 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -12,8 +12,8 @@ use std::{
 
 const WRITE_CHAN_ERR: &str = "Failed to write to channel";
 const READ_CHAN_ERR: &str = "Failed to read from channel";
-// Fixed seed for deterministic chain-split duplicate-message simulation.
-const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c;
+const TEST_SEED_LABEL: &str = "qbft-test";
+const CHAIN_SPLIT_SEED_LABEL: &str = "chain-split";
 const TEST_STREAM_DROP: u64 = 1;
 const TEST_STREAM_DUPLICATE: u64 = 2;
 const TEST_STREAM_JITTER: u64 = 3;
@@ -583,7 +583,7 @@ fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> b
 }
 
 fn test_seed(test: &Test) -> u64 {
-    let mut seed = 0x5142_4654_5445_5354_u64;
+    let mut seed = seed_from_label(TEST_SEED_LABEL);
     seed ^= test.instance as u64;
     seed ^= u64::from(test.const_period) << 8;
     seed ^= (test.bcast_jitter_ms as u64) << 16;
@@ -595,6 +595,14 @@ fn test_seed(test: &Test) -> u64 {
     seed
 }
 
+fn seed_from_label(label: &str) -> u64 {
+    // Small rolling-hash multiplier; only separates deterministic test labels,
+    // not used for cryptographic randomness or protocol behavior.
+    label.bytes().fold(0_u64, |seed, byte| {
+        seed.wrapping_mul(131).wrapping_add(u64::from(byte))
+    })
+}
+
 /// Construct a leader election function.
 fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone {
     move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process }
@@ -1797,8 +1805,12 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                         continue;
                     }
                     out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
-                    if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, TEST_STREAM_DUPLICATE)
-                        < 0.1
+                    if deterministic_unit(
+                        seed_from_label(CHAIN_SPLIT_SEED_LABEL),
+                        &msg,
+                        *target,
+                        TEST_STREAM_DUPLICATE,
+                    ) < 0.1
                     {
                         out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR);
                     }

From b22fb417801ce1d90dab9977ce2da76781e244e7 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 14:15:06 +0700
Subject: [PATCH 06/28] fix: one shot cancel when parent is cancelled

---
 crates/core/src/qbft/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 9cf6d163..4ee494b3 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -635,6 +635,7 @@ where
     let compare_ct = compare_cts.token().clone();
     let msg = msg.clone();
     let input_value_source_ch = input_value_source_ch.clone();
+    let mut compare_parent_cancelled = false;
 
     thread::spawn(move || {
         (compare)(
@@ -686,8 +687,9 @@ where
             }
 
             default(RUN_CANCELLATION_POLL_INTERVAL) => {
-                if ct.is_canceled() {
+                if !compare_parent_cancelled && ct.is_canceled() {
                     compare_cts.cancel();
+                    compare_parent_cancelled = true;
                 }
             }
         }

From 081d8a47862fec96e56b644f2b6a6650989e1357 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 14:18:48 +0700
Subject: [PATCH 07/28] fix: fix make_is_leader test

---
 crates/core/src/qbft/internal_test.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index e2e1b509..c52ad70c 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -605,7 +605,11 @@ fn seed_from_label(label: &str) -> u64 {
 
 /// Construct a leader election function.
 fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone {
-    move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process }
+    move |instance: &i64, round: i64, process: i64| -> bool {
+        // Go's helper uses 0-indexed process IDs. These Rust tests use 1..=n,
+        // so translate the same modulo schedule into the local process range.
+        ((instance + round - 1).rem_euclid(n)) + 1 == process
+    }
 }
 
 /// Returns a new message to be broadcast.
@@ -998,7 +1002,11 @@ fn stagger_start_const() {
 fn very_delayed_value_exp() {
     test_qbft(Test {
         instance: 3,
-        value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]),
+        value_delay: HashMap::from([
+            (1, Duration::from_secs(5)),
+            (2, Duration::from_secs(10)),
+            (4, Duration::from_secs(5)),
+        ]),
         decide_round: 4,
         ..Default::default()
     });

From 1a0a33700001f6a8ffa8452d1bd5a27b9e024905 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 14:24:10 +0700
Subject: [PATCH 08/28] fix: minors naming and comments

---
 crates/core/src/qbft/fake_clock.rs | 8 ++++++--
 crates/core/src/qbft/mod.rs        | 8 +++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 0a3ab477..8392e7f3 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -123,7 +123,9 @@ fn multiple_threads_timers() {
         clock.advance(Duration::from_secs(6));
     });
 
-    assert_eq!(vec![true, true], done_rx.try_iter().collect::<Vec<_>>());
+    let done = done_rx.try_iter().collect::<Vec<_>>();
+    assert_eq!(2, done.len());
+    assert!(done.into_iter().all(|done| done));
     assert_eq!(Duration::from_secs(10), clock.elapsed());
 }
 
@@ -150,7 +152,9 @@ fn multiple_threads_cancellation() {
         clock.cancel();
     });
 
-    assert_eq!(vec![true, true], done_rx.try_iter().collect::<Vec<_>>());
+    let done = done_rx.try_iter().collect::<Vec<_>>();
+    assert_eq!(2, done.len());
+    assert!(done.into_iter().all(|done| done));
     assert_eq!(Duration::ZERO, clock.elapsed());
 }
 
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 4ee494b3..e064292d 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -33,7 +33,7 @@ use std::{
 
 type Result<T> = std::result::Result<T, QbftError>;
 
-const RUN_CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1);
+const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1);
 
 type CompareFn<I, V, C> = dyn Fn(
         /* ct */ &CancellationToken,
@@ -591,7 +591,7 @@ where
                 broadcast_round_change()?;
             }
 
-            default(RUN_CANCELLATION_POLL_INTERVAL) => {
+            default(CANCELLATION_POLL_INTERVAL) => {
                 if ct.is_canceled() {
                     return Err(QbftError::ContextCanceled);
                 }
@@ -637,6 +637,8 @@ where
     let input_value_source_ch = input_value_source_ch.clone();
     let mut compare_parent_cancelled = false;
 
+    // Detached by design, matching Charon's goroutine behavior: if a caller-provided
+    // compare callback ignores cancellation and never reports, it may outlive this call.
     thread::spawn(move || {
         (compare)(
             &compare_ct,
@@ -686,7 +688,7 @@ where
                 return (result, Err(QbftError::TimeoutError));
             }
 
-            default(RUN_CANCELLATION_POLL_INTERVAL) => {
+            default(CANCELLATION_POLL_INTERVAL) => {
                 if !compare_parent_cancelled && ct.is_canceled() {
                     compare_cts.cancel();
                     compare_parent_cancelled = true;

From 44b9a1335a25924db219d5fff47b62cb4f8433bc Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 14:48:07 +0700
Subject: [PATCH 09/28] fix: linter

---
 crates/core/src/qbft/mod.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index e064292d..5c21e9cc 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -637,8 +637,9 @@ where
     let input_value_source_ch = input_value_source_ch.clone();
     let mut compare_parent_cancelled = false;
 
-    // Detached by design, matching Charon's goroutine behavior: if a caller-provided
-    // compare callback ignores cancellation and never reports, it may outlive this call.
+    // Detached by design, matching Charon's goroutine behavior: if a
+    // caller-provided compare callback ignores cancellation and never reports,
+    // it may outlive this call.
     thread::spawn(move || {
         (compare)(
             &compare_ct,

From cc59baf6ca395e6acdb9a75a5467ea871bfc181b Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 18:28:14 +0700
Subject: [PATCH 10/28] fix: early cancel on the loop

---
 crates/core/src/qbft/internal_test.rs | 52 +++++++++++++++++++++++++++
 crates/core/src/qbft/mod.rs           |  4 +++
 2 files changed, 56 insertions(+)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index c52ad70c..8a150546 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1329,6 +1329,58 @@ fn idle_run_returns_when_cancelled() {
     ));
 }
 
+#[test]
+fn run_cancels_under_hot_receive_stream() {
+    let cts = CancellationTokenSource::new();
+    let token = cts.token().clone();
+    let mut def = noop_definition();
+    def.nodes = 4;
+    def.fifo_limit = 100;
+
+    let (receive_tx, receive_rx) = mpmc::bounded::<Msg<i64, i64, i64>>(1024);
+    let transport = Transport {
+        receive: receive_rx,
+        ..noop_transport()
+    };
+    let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
+    let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
+    let (done_tx, done_rx) = mpmc::bounded(1);
+
+    let sender_cts = CancellationTokenSource::new();
+    let sender_token = sender_cts.token().clone();
+    let sender = thread::spawn(move || {
+        let msg = new_msg(MSG_PREPARE, 0, 1, 2, 1, 0, 0, 0, None);
+        while !sender_token.is_canceled() {
+            match receive_tx.try_send(msg.clone()) {
+                Ok(()) => {}
+                Err(mpmc::TrySendError::Full(_)) => thread::yield_now(),
+                Err(mpmc::TrySendError::Disconnected(_)) => break,
+            }
+        }
+    });
+
+    thread::spawn(move || {
+        done_tx
+            .send(qbft::run(
+                &token, &def, &transport, &0, 1, input_rx, source_rx,
+            ))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    thread::sleep(Duration::from_millis(10));
+    cts.cancel();
+
+    assert!(matches!(
+        done_rx
+            .recv_timeout(Duration::from_millis(100))
+            .expect("run must unblock on cancellation even with a hot receive stream"),
+        Err(QbftError::ContextCanceled)
+    ));
+
+    sender_cts.cancel();
+    sender.join().expect("sender thread must exit");
+}
+
 #[test]
 fn classify_rules() {
     let mut def = noop_definition();
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 5c21e9cc..c192d240 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -429,6 +429,10 @@ where
     }
 
     loop {
+        if ct.is_canceled() {
+            return Err(QbftError::ContextCanceled);
+        }
+
         mpmc::select! {
             recv(input_value_ch) -> result => {
                 let iv = result?;

From ed82b6e6a09711696f8f8478a012f62153749c37 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 18:32:19 +0700
Subject: [PATCH 11/28] fix: context cancel in compare

---
 crates/core/src/qbft/internal_test.rs | 70 ++++++++++++++++++++++++++-
 crates/core/src/qbft/mod.rs           | 15 ++++--
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 8a150546..4a759253 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1610,7 +1610,7 @@ fn compare_parent_cancel_cancels_callback_token() {
             thread::sleep(Duration::from_millis(1));
         }
         token_cancelled_tx.send(()).expect(WRITE_CHAN_ERR);
-        return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+        let _ = return_err.send(Ok(()));
     });
 
     let (result_tx, result_rx) = mpmc::bounded(1);
@@ -1624,7 +1624,7 @@ fn compare_parent_cancel_cancels_callback_token() {
     cts.cancel();
 
     match result_rx.recv_timeout(Duration::from_millis(100)) {
-        Ok(result) => assert!(matches!(result, (0, Ok(())))),
+        Ok(result) => assert!(matches!(result, (0, Err(QbftError::ContextCanceled)))),
         Err(err) => {
             let _ = timer_tx.send(time::Instant::now());
             panic!("compare callback token must be canceled by parent token: {err}");
@@ -1635,6 +1635,72 @@ fn compare_parent_cancel_cancels_callback_token() {
         .expect("callback token must be canceled by parent token");
 }
 
+#[test]
+fn run_parent_cancel_during_compare_does_not_prepare() {
+    const LEADER: i64 = 1;
+    const PROCESS: i64 = 2;
+
+    let cts = CancellationTokenSource::new();
+    let token = cts.token().clone();
+    let msg = new_msg(MSG_PRE_PREPARE, 0, LEADER, 1, 7, 11, 0, 0, None);
+    let (receive_tx, receive_rx) = mpmc::bounded(1);
+    receive_tx.send(msg).expect(WRITE_CHAN_ERR);
+
+    let (compare_started_tx, compare_started_rx) = mpmc::bounded(1);
+    let (compare_cancelled_tx, compare_cancelled_rx) = mpmc::bounded(1);
+    let mut def = noop_definition();
+    def.nodes = 4;
+    def.fifo_limit = 100;
+    def.is_leader = Box::new(|_, _, process| process == LEADER);
+    def.compare = Arc::new(move |ct, _, _, _, return_err, _| {
+        compare_started_tx.send(()).expect(WRITE_CHAN_ERR);
+        while !ct.is_canceled() {
+            thread::sleep(Duration::from_millis(1));
+        }
+        compare_cancelled_tx.send(()).expect(WRITE_CHAN_ERR);
+        let _ = return_err.send(Ok(()));
+    });
+
+    let (broadcast_tx, broadcast_rx) = mpmc::bounded(1);
+    let transport = Transport {
+        broadcast: Box::new(move |_, type_, _, _, _, _, _, _, _| {
+            broadcast_tx.send(type_).expect(WRITE_CHAN_ERR);
+            Ok(())
+        }),
+        receive: receive_rx,
+    };
+    let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
+    let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
+    let (done_tx, done_rx) = mpmc::bounded(1);
+
+    thread::spawn(move || {
+        done_tx
+            .send(qbft::run(
+                &token, &def, &transport, &0, PROCESS, input_rx, source_rx,
+            ))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    compare_started_rx
+        .recv_timeout(Duration::from_millis(100))
+        .expect("compare must start");
+    cts.cancel();
+
+    assert!(matches!(
+        done_rx
+            .recv_timeout(Duration::from_millis(100))
+            .expect("run must return parent cancellation from compare"),
+        Err(QbftError::ContextCanceled)
+    ));
+    compare_cancelled_rx
+        .recv_timeout(Duration::from_millis(100))
+        .expect("compare callback token must be canceled");
+    assert!(
+        broadcast_rx.try_iter().all(|type_| type_ != MSG_PREPARE),
+        "parent cancellation during compare must not broadcast PREPARE"
+    );
+}
+
 fn buffer_by_source(msgs: &[Msg<i64, i64, i64>]) -> HashMap<i64, Vec<Msg<i64, i64, i64>>> {
     let mut buffer = HashMap::new();
     for msg in msgs {
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index c192d240..cf43da2d 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -519,6 +519,7 @@ where
 
                                         broadcast_round_change()?;
                                     }
+                                    QbftError::ContextCanceled => return Err(QbftError::ContextCanceled),
                                     _ => return Err(QbftError::UnexpectedCompareError),
                                 }
                             }
@@ -639,7 +640,6 @@ where
     let compare_ct = compare_cts.token().clone();
     let msg = msg.clone();
     let input_value_source_ch = input_value_source_ch.clone();
-    let mut compare_parent_cancelled = false;
 
     // Detached by design, matching Charon's goroutine behavior: if a
     // caller-provided compare callback ignores cancellation and never reports,
@@ -656,6 +656,11 @@ where
     });
 
     loop {
+        if ct.is_canceled() {
+            compare_cts.cancel();
+            return (result, Err(QbftError::ContextCanceled));
+        }
+
         mpmc::select! {
             recv(compare_err_rx) -> msg => {
                 let err = match msg {
@@ -671,6 +676,10 @@ where
                 }
 
                 compare_cts.cancel();
+                if ct.is_canceled() {
+                    return (result, Err(QbftError::ContextCanceled));
+                }
+
                 return match err {
                     Ok(()) => (result, Ok(())),
                     Err(_) => (result, Err(QbftError::CompareError)),
@@ -694,9 +703,9 @@ where
             }
 
             default(CANCELLATION_POLL_INTERVAL) => {
-                if !compare_parent_cancelled && ct.is_canceled() {
+                if ct.is_canceled() {
                     compare_cts.cancel();
-                    compare_parent_cancelled = true;
+                    return (result, Err(QbftError::ContextCanceled));
                 }
             }
         }

From 7911c23493611716d05d510d4ce30e2be27e89e9 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 18:39:21 +0700
Subject: [PATCH 12/28] fix: validate definition

---
 crates/core/src/qbft/internal_test.rs | 52 ++++++++++++++++++++++++---
 crates/core/src/qbft/mod.rs           | 24 +++++++++++++
 2 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 4a759253..b8c14b0b 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1248,12 +1248,14 @@ fn duplicate_pre_prepare_rules() {
             0,
             0,
             0,
-            // Justification not required since nodes and quorum both 0.
+            // Round 2 is accepted after round 1 records a compare failure.
             None,
         )
     };
 
     let mut def = noop_definition();
+    def.nodes = 4;
+    def.fifo_limit = 100;
     def.is_leader = Box::new(|_, _, process| process == LEADER);
     def.log_upon_rule = Box::new(move |_, _, round, msg, upon_rule| {
         println!("UponRule: rule={} round={} ", upon_rule, msg.round());
@@ -1271,8 +1273,13 @@ fn duplicate_pre_prepare_rules() {
 
         panic!("unexpected round {}", round);
     });
-    def.compare = Arc::new(|_, _, _, _, return_err, _| {
-        return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+    def.compare = Arc::new(|_, msg, _, _, return_err, _| {
+        let result = if msg.round() == 1 {
+            Err(QbftError::CompareError)
+        } else {
+            Ok(())
+        };
+        return_err.send(result).expect(WRITE_CHAN_ERR);
     });
 
     let (r_chan_tx, r_chan_rx) = mpmc::bounded::<Msg<i64, i64, i64>>(2);
@@ -1304,7 +1311,9 @@ fn duplicate_pre_prepare_rules() {
 fn idle_run_returns_when_cancelled() {
     let cts = CancellationTokenSource::new();
     let token = cts.token().clone();
-    let def = noop_definition();
+    let mut def = noop_definition();
+    def.nodes = 4;
+    def.fifo_limit = 100;
     let transport = noop_transport();
     let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
     let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
@@ -1329,6 +1338,41 @@ fn idle_run_returns_when_cancelled() {
     ));
 }
 
+#[test]
+fn invalid_definition_rejected() {
+    let cts = CancellationTokenSource::new();
+    let transport = noop_transport();
+    let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
+    let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
+
+    let mut def = noop_definition();
+    def.nodes = 0;
+    def.fifo_limit = 1;
+    assert!(matches!(
+        qbft::run(
+            cts.token(),
+            &def,
+            &transport,
+            &0,
+            1,
+            input_rx.clone(),
+            source_rx.clone(),
+        ),
+        Err(QbftError::InvalidDefinition(
+            "nodes must be greater than zero"
+        ))
+    ));
+
+    def.nodes = 4;
+    def.fifo_limit = 0;
+    assert!(matches!(
+        qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx),
+        Err(QbftError::InvalidDefinition(
+            "fifo_limit must be greater than zero"
+        ))
+    ));
+}
+
 #[test]
 fn run_cancels_under_hot_receive_stream() {
     let cts = CancellationTokenSource::new();
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index cf43da2d..3e0c6ba6 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -66,6 +66,9 @@ pub enum QbftError {
     #[error("Zero input value not supported")]
     ZeroInputValue,
 
+    #[error("Invalid definition: {0}")]
+    InvalidDefinition(&'static str),
+
     #[error("Failed to read from channel: {0}")]
     ChannelError(#[from] mpmc::RecvError),
 }
@@ -316,6 +319,8 @@ where
     V: PartialEq + Eq + Hash + Default + 'static,
     C: Clone + Send + Sync + Default + 'static,
 {
+    validate_definition(d)?;
+
     // === State ===
     let round: Cell<i64> = Cell::new(1);
     let input_value: RefCell<V> = RefCell::new(Default::default());
@@ -605,6 +610,25 @@ where
     }
 }
 
+fn validate_definition<I, V, C>(d: &Definition<I, V, C>) -> Result<()>
+where
+    V: PartialEq,
+{
+    if d.nodes <= 0 {
+        return Err(QbftError::InvalidDefinition(
+            "nodes must be greater than zero",
+        ));
+    }
+
+    if d.fifo_limit <= 0 {
+        return Err(QbftError::InvalidDefinition(
+            "fifo_limit must be greater than zero",
+        ));
+    }
+
+    Ok(())
+}
+
 /// The callback may cache the local input source and return success/failure.
 /// This helper only preserves that callback result and lets the round timer win
 /// if the callback blocks.

From 829ba5f4f8a44bd0b4d4d0b45024f1a7f9eea2ac Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 18:43:51 +0700
Subject: [PATCH 13/28] fix: add check pr < r

---
 crates/core/src/qbft/internal_test.rs | 25 ++++++++++++++++++++++
 crates/core/src/qbft/mod.rs           | 30 +++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index b8c14b0b..33f296b5 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1493,6 +1493,31 @@ fn justified_qrc_j1_and_j2() {
     ];
     assert_eq!(Some(7), contains_justified_qrc(&def, &j2, 2));
     assert!(get_justified_qrc(&def, &j2, 2).unwrap().len() >= 6);
+
+    let invalid_pr = vec![
+        new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 2, 7, None),
+        new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 2, 7, None),
+        new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None),
+    ];
+    assert_eq!(None, contains_justified_qrc(&def, &invalid_pr, 2));
+    assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none());
+}
+
+#[test]
+fn round_change_prepared_round_must_be_before_round() {
+    let mut def = noop_definition();
+    def.nodes = 4;
+    let prepares = vec![
+        new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None),
+        new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None),
+    ];
+    let invalid = new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, Some(&prepares));
+
+    assert!(!is_justified_round_change(&def, &invalid));
 }
 
 #[test]
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 3e0c6ba6..e1786459 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -913,6 +913,13 @@ where
     let pr = msg.prepared_round();
     let pv = msg.prepared_value();
 
+    // The IBFT paper requires ROUND-CHANGE prepared_round to be lower than the
+    // target round. Go core currently omits this check, but valid Charon traffic
+    // already satisfies it.
+    if !valid_round_change_prepared_round(msg) {
+        return false;
+    }
+
     if prepares.is_empty() {
         return pr == 0 && pv == Default::default();
     }
@@ -946,6 +953,14 @@ where
     true
 }
 
+fn valid_round_change_prepared_round<I, V, C>(msg: &Msg<I, V, C>) -> bool
+where
+    V: PartialEq,
+{
+    let pr = msg.prepared_round();
+    pr >= 0 && pr < msg.round()
+}
+
 /// Returns true if the decided message is justified by quorum COMMIT messages
 /// of identical round and value.
 fn is_justified_decided<I, V, C>(d: &Definition<I, V, C>, msg: &Msg<I, V, C>) -> bool
@@ -1018,6 +1033,11 @@ where
     if (qrc.len() as i64) < d.quorum() {
         return None;
     }
+
+    if qrc.iter().any(|rc| !valid_round_change_prepared_round(rc)) {
+        return None;
+    }
+
     // No need to calculate J1 or J2 for all possible combinations,
     // since justification should only contain one.
 
@@ -1111,7 +1131,10 @@ where
         return Some(qrc);
     }
 
-    let round_changes = filter_round_change(all, round);
+    let round_changes = filter_round_change(all, round)
+        .into_iter()
+        .filter(valid_round_change_prepared_round)
+        .collect::<Vec<_>>();
 
     for prepares in get_prepare_quorums(d, all) {
         // See if we have quorum ROUND-CHANGE with HIGHEST_PREPARED(qrc) ==
@@ -1259,7 +1282,10 @@ where
     let null_pr = Default::default();
     let null_pv = Some(&Default::default());
 
-    let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv);
+    let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv)
+        .into_iter()
+        .filter(valid_round_change_prepared_round)
+        .collect::<Vec<_>>();
 
     (
         justification.clone(),

From d834463c68aa53f399ed26002c66aae0561a8761 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Mon, 18 May 2026 18:46:13 +0700
Subject: [PATCH 14/28] fix: test and document on run

---
 crates/core/src/qbft/internal_test.rs | 11 +++++++++++
 crates/core/src/qbft/mod.rs           |  7 +++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 33f296b5..b8fd8f8a 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -2013,6 +2013,17 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
             }
             recv(result_chan_rx) -> res => {
                 let q_commit = res.expect(READ_CHAN_ERR);
+                if test.should_halt {
+                    cts.cancel();
+                    clock.cancel();
+                    panic!(
+                        "halt case unexpectedly decided: q_commit={:?} elapsed={:?}\n{}",
+                        q_commit,
+                        clock.elapsed(),
+                        trace.dump()
+                    );
+                }
+
                 for commit in q_commit.clone() {
                     for previous in results.values() {
                         if previous.value() != commit.value() {
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index e1786459..71eb57b7 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -299,8 +299,11 @@ struct DedupKey {
     round: i64,
 }
 
-/// Executes one QBFT consensus instance until it decides, errors, or is
-/// cancelled.
+/// Executes one QBFT consensus instance until it errors or is cancelled.
+///
+/// Decisions are reported via `Definition::decide`. After deciding, `run`
+/// remains active so it can answer later `ROUND_CHANGE` messages with `DECIDED`
+/// catch-up messages.
 ///
 /// `I` identifies the consensus instance, `V` is the comparable proposed value,
 /// and `C` is the application value used by `Definition::compare` to compare a

From 739e3b389b5b6a65cdf717498307ef4dc179e67a Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 11:04:13 +0700
Subject: [PATCH 15/28] fix: compare callback failed should timeout

---
 crates/core/src/qbft/internal_test.rs | 37 +++++++++++++++++++++++++++
 crates/core/src/qbft/mod.rs           |  8 +++---
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index b8fd8f8a..495de30e 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1664,6 +1664,43 @@ fn compare_timeout_does_not_wait_for_blocked_callback() {
     ));
 }
 
+#[test]
+fn compare_callback_exit_without_status_waits_for_timer() {
+    let cts = CancellationTokenSource::new();
+    let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None);
+    let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
+    let (timer_tx, timer_rx) = mpmc::bounded(1);
+    let (callback_done_tx, callback_done_rx) = mpmc::bounded(1);
+
+    let mut def = noop_definition();
+    def.compare = Arc::new(move |_, _, _, _, _, _| {
+        callback_done_tx.send(()).expect(WRITE_CHAN_ERR);
+    });
+
+    let (result_tx, result_rx) = mpmc::bounded(1);
+    thread::spawn(move || {
+        result_tx
+            .send(compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx))
+            .expect(WRITE_CHAN_ERR);
+    });
+
+    callback_done_rx
+        .recv_timeout(Duration::from_millis(100))
+        .expect("compare callback must exit");
+    assert!(
+        result_rx.try_recv().is_err(),
+        "compare must wait for timer/cancel if callback exits without status"
+    );
+
+    timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR);
+    assert!(matches!(
+        result_rx
+            .recv_timeout(Duration::from_millis(100))
+            .expect("compare must return after timer fires"),
+        (0, Err(QbftError::TimeoutError))
+    ));
+}
+
 #[test]
 fn compare_parent_cancel_cancels_callback_token() {
     let cts = CancellationTokenSource::new();
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 71eb57b7..0c53de21 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -648,7 +648,7 @@ where
     V: PartialEq + 'static,
     C: Clone + Send + Sync + 'static,
 {
-    let (compare_err_tx, compare_err_rx) = mpmc::bounded::<Result<()>>(1);
+    let (compare_err_tx, mut compare_err_rx) = mpmc::bounded::<Result<()>>(1);
     let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::<C>(1);
 
     // d.Compare has 2 roles:
@@ -692,9 +692,9 @@ where
             recv(compare_err_rx) -> msg => {
                 let err = match msg {
                     Ok(err) => err,
-                    Err(err) => {
-                        compare_cts.cancel();
-                        return (result, Err(QbftError::ChannelError(err)));
+                    Err(_) => {
+                        compare_err_rx = mpmc::never();
+                        continue;
                     }
                 };
 

From 27744cff60b58a851631d9aec036618765536213 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 11:13:01 +0700
Subject: [PATCH 16/28] fix: test use timeout channel instead of sleep

---
 crates/core/src/qbft/internal_test.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 495de30e..08dd1960 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1708,10 +1708,12 @@ fn compare_parent_cancel_cancels_callback_token() {
     let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None);
     let (_vs_tx, vs_rx) = mpmc::bounded::<i64>(1);
     let (timer_tx, timer_rx) = mpmc::bounded(1);
+    let (compare_started_tx, compare_started_rx) = mpmc::bounded(1);
     let (token_cancelled_tx, token_cancelled_rx) = mpmc::bounded(1);
 
     let mut def = noop_definition();
     def.compare = Arc::new(move |ct, _, _, _, return_err, _| {
+        compare_started_tx.send(()).expect(WRITE_CHAN_ERR);
         while !ct.is_canceled() {
             thread::sleep(Duration::from_millis(1));
         }
@@ -1726,7 +1728,9 @@ fn compare_parent_cancel_cancels_callback_token() {
             .expect(WRITE_CHAN_ERR);
     });
 
-    thread::sleep(Duration::from_millis(10));
+    compare_started_rx
+        .recv_timeout(Duration::from_millis(100))
+        .expect("compare must start");
     cts.cancel();
 
     match result_rx.recv_timeout(Duration::from_millis(100)) {

From 0bd6a4f1ae8639e3f2f651d11cc5289f3a349235 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 11:31:20 +0700
Subject: [PATCH 17/28] fix: add comment on cancellation poll interval

---
 crates/core/src/qbft/mod.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 0c53de21..3e5c2062 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -33,7 +33,10 @@ use std::{
 
 type Result<T> = std::result::Result<T, QbftError>;
 
-const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1);
+// The `cancellation` crate is callback-based, not channel-based, so it cannot
+// be used directly in `crossbeam::select!`. Keep polling coarse: QBFT shutdown
+// does not need sub-millisecond latency, and idle instances should stay cheap.
+const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(50);
 
 type CompareFn<I, V, C> = dyn Fn(
         /* ct */ &CancellationToken,

From e988c35f4c14899f2582d412d5f9206eaada1057 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 11:36:35 +0700
Subject: [PATCH 18/28] fix: use enum for invalid defnintion error

---
 crates/core/src/qbft/internal_test.rs |  8 ++------
 crates/core/src/qbft/mod.rs           | 17 +++++++++--------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 08dd1960..29f44138 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1358,18 +1358,14 @@ fn invalid_definition_rejected() {
             input_rx.clone(),
             source_rx.clone(),
         ),
-        Err(QbftError::InvalidDefinition(
-            "nodes must be greater than zero"
-        ))
+        Err(QbftError::InvalidNodes { nodes: 0 })
     ));
 
     def.nodes = 4;
     def.fifo_limit = 0;
     assert!(matches!(
         qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx),
-        Err(QbftError::InvalidDefinition(
-            "fifo_limit must be greater than zero"
-        ))
+        Err(QbftError::InvalidFifoLimit { fifo_limit: 0 })
     ));
 }
 
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 3e5c2062..071823c2 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -69,8 +69,11 @@ pub enum QbftError {
     #[error("Zero input value not supported")]
     ZeroInputValue,
 
-    #[error("Invalid definition: {0}")]
-    InvalidDefinition(&'static str),
+    #[error("invalid node count: must be greater than zero, got {nodes}")]
+    InvalidNodes { nodes: i64 },
+
+    #[error("invalid FIFO limit: must be greater than zero, got {fifo_limit}")]
+    InvalidFifoLimit { fifo_limit: i64 },
 
     #[error("Failed to read from channel: {0}")]
     ChannelError(#[from] mpmc::RecvError),
@@ -621,15 +624,13 @@ where
     V: PartialEq,
 {
     if d.nodes <= 0 {
-        return Err(QbftError::InvalidDefinition(
-            "nodes must be greater than zero",
-        ));
+        return Err(QbftError::InvalidNodes { nodes: d.nodes });
     }
 
     if d.fifo_limit <= 0 {
-        return Err(QbftError::InvalidDefinition(
-            "fifo_limit must be greater than zero",
-        ));
+        return Err(QbftError::InvalidFifoLimit {
+            fifo_limit: d.fifo_limit,
+        });
     }
 
     Ok(())

From 6c65b3291d80af3cefc00f3d4d2e367041b8ccd0 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 11:59:03 +0700
Subject: [PATCH 19/28] fix: add more test on check valid round

---
 crates/core/src/qbft/internal_test.rs | 126 ++++++++++++++++++++++++--
 crates/core/src/qbft/mod.rs           |   9 +-
 2 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 29f44138..4226c89a 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -654,6 +654,22 @@ fn new_msg(
     })
 }
 
+fn new_prepare_quorum(round: i64, value: i64) -> Vec<TestMsgRef> {
+    (1..=3)
+        .map(|source| new_msg(MSG_PREPARE, 0, source, round, value, 0, 0, 0, None))
+        .collect()
+}
+
+fn new_round_change(source: i64, round: i64, pr: i64, pv: i64) -> TestMsgRef {
+    new_msg(MSG_ROUND_CHANGE, 0, source, round, 0, 0, pr, pv, None)
+}
+
+fn new_round_change_quorum(round: i64, pr: i64, pv: i64) -> Vec<TestMsgRef> {
+    (1..=3)
+        .map(|source| new_round_change(source, round, pr, pv))
+        .collect()
+}
+
 // Delays the message broadcast by between 1x and 2x jitter_ms and drops
 // messages.
 fn bcast(
@@ -1503,17 +1519,111 @@ fn justified_qrc_j1_and_j2() {
 }
 
 #[test]
-fn round_change_prepared_round_must_be_before_round() {
+fn valid_round_change_prepared_round_boundaries() {
+    let cases = [
+        ("negative", 2, -1, false),
+        ("null at round one", 1, 0, true),
+        ("previous round", 2, 1, true),
+        ("current round", 2, 2, false),
+        ("future round", 2, 3, false),
+    ];
+
+    for (name, round, prepared_round, expected) in cases {
+        let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None);
+        assert_eq!(expected, valid_round_change_prepared_round(&msg), "{name}");
+    }
+}
+
+#[test]
+fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() {
     let mut def = noop_definition();
     def.nodes = 4;
-    let prepares = vec![
-        new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None),
-        new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None),
-        new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None),
-    ];
-    let invalid = new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, Some(&prepares));
+    let target_round = 2;
+    let valid_prepared_round = 1;
+    let value = 7;
+    let prepares = new_prepare_quorum(valid_prepared_round, value);
+    let valid_round_changes = new_round_change_quorum(target_round, valid_prepared_round, value);
+
+    for (name, invalid_pr) in [
+        ("negative", -1),
+        ("current round", target_round),
+        ("future round", target_round + 1),
+    ] {
+        let invalid_prepares = new_prepare_quorum(invalid_pr, value);
+        let invalid_round_change = new_msg(
+            MSG_ROUND_CHANGE,
+            0,
+            1,
+            target_round,
+            0,
+            0,
+            invalid_pr,
+            value,
+            Some(&invalid_prepares),
+        );
+        assert!(
+            !is_justified_round_change(&def, &invalid_round_change),
+            "is_justified_round_change must reject {name} prepared_round"
+        );
+
+        let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value);
+        only_invalid.extend(invalid_prepares);
+        assert_eq!(
+            None,
+            contains_justified_qrc(&def, &only_invalid, target_round),
+            "contains_justified_qrc must not count {name} prepared_round"
+        );
+        assert!(
+            get_justified_qrc(&def, &only_invalid, target_round).is_none(),
+            "get_justified_qrc must not count {name} prepared_round"
+        );
+
+        let mut with_invalid_extra = valid_round_changes.clone();
+        with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value));
+        with_invalid_extra.extend(prepares.clone());
+        assert_eq!(
+            Some(value),
+            contains_justified_qrc(&def, &with_invalid_extra, target_round),
+            "contains_justified_qrc must ignore extra {name} prepared_round"
+        );
+        let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round)
+            .expect("valid quorum must remain after filtering invalid prepared_round");
+        assert!(
+            qrc.iter()
+                .filter(|msg| msg.type_() == MSG_ROUND_CHANGE)
+                .all(valid_round_change_prepared_round),
+            "get_justified_qrc must filter extra {name} prepared_round"
+        );
+    }
+}
 
-    assert!(!is_justified_round_change(&def, &invalid));
+#[test]
+fn quorum_null_prepared_filters_invalid_prepared_rounds() {
+    let mut def = noop_definition();
+    def.nodes = 4;
+
+    let valid = new_round_change_quorum(1, 0, 0);
+    let (qrc, ok) = quorum_null_prepared(&def, &valid, 1);
+    assert!(ok);
+    assert_eq!(3, qrc.len());
+
+    for (name, round, invalid_pr) in [
+        ("negative", 1, -1),
+        ("current round", 1, 1),
+        ("future round", 1, 2),
+        ("zero round", 0, 0),
+    ] {
+        let invalid = new_round_change_quorum(round, invalid_pr, 0);
+        let (qrc, ok) = quorum_null_prepared(&def, &invalid, round);
+        assert!(
+            !ok,
+            "quorum_null_prepared must reject {name} prepared_round"
+        );
+        assert!(
+            qrc.is_empty(),
+            "quorum_null_prepared must filter {name} prepared_round"
+        );
+    }
 }
 
 #[test]
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 071823c2..54b77949 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -1036,15 +1036,14 @@ fn contains_justified_qrc<I, V, C>(
 where
     V: Eq + Hash + Default,
 {
-    let qrc = filter_round_change(justification, round);
+    let qrc = filter_round_change(justification, round)
+        .into_iter()
+        .filter(valid_round_change_prepared_round)
+        .collect::<Vec<_>>();
     if (qrc.len() as i64) < d.quorum() {
         return None;
     }
 
-    if qrc.iter().any(|rc| !valid_round_change_prepared_round(rc)) {
-        return None;
-    }
-
     // No need to calculate J1 or J2 for all possible combinations,
     // since justification should only contain one.
 

From b30e616aa0b79a9592d389d912d14eecf242f3a8 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 12:10:03 +0700
Subject: [PATCH 20/28] fix: using test-case

---
 crates/core/src/qbft/internal_test.rs | 553 ++++++++++----------------
 1 file changed, 205 insertions(+), 348 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 4226c89a..38409f61 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -9,6 +9,7 @@ use std::{
     thread,
     time::Duration,
 };
+use test_case::test_case;
 
 const WRITE_CHAN_ERR: &str = "Failed to write to channel";
 const READ_CHAN_ERR: &str = "Failed to read from channel";
@@ -899,107 +900,66 @@ impl SomeMsg<i64, i64, i64> for TestMsg {
     }
 }
 
-#[test]
-fn happy_0() {
+#[test_case(0 ; "happy_0")]
+#[test_case(1 ; "happy_1")]
+fn happy(instance: i64) {
     test_qbft(Test {
-        instance: 0,
-        decide_round: 1,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn happy_1() {
-    test_qbft(Test {
-        instance: 1,
+        instance,
         decide_round: 1,
         ..Default::default()
     });
 }
 
-#[test]
-fn prepare_round_1_decide_round_2() {
-    test_qbft(Test {
-        instance: 0,
-        commits_after: 1,
-        decide_round: 2,
-        prepared_val: 1,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn prepare_round_2_decide_round_3() {
-    test_qbft(Test {
-        instance: 0,
-        commits_after: 2,
-        value_delay: HashMap::from([(1, Duration::from_secs(2))]),
-        decide_round: 3,
-        prepared_val: 2,
-        const_period: true,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn leader_late_exp() {
+#[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")]
+#[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")]
+fn prepare_round(
+    commits_after: i32,
+    value_delay_secs: Option<u64>,
+    decide_round: i32,
+    prepared_val: i32,
+    const_period: bool,
+) {
     test_qbft(Test {
         instance: 0,
-        start_delay: HashMap::from([(1, Duration::from_secs(2))]),
-        decide_round: 2,
+        commits_after,
+        value_delay: value_delay_secs
+            .map(|secs| HashMap::from([(1, Duration::from_secs(secs))]))
+            .unwrap_or_default(),
+        decide_round,
+        prepared_val,
+        const_period,
         ..Default::default()
     });
 }
 
-#[test]
-fn leader_down_const() {
+#[test_case(false ; "leader_late_exp")]
+#[test_case(true ; "leader_down_const")]
+fn delayed_leader_start(const_period: bool) {
     test_qbft(Test {
         instance: 0,
         start_delay: HashMap::from([(1, Duration::from_secs(2))]),
-        const_period: true,
         decide_round: 2,
+        const_period,
         ..Default::default()
     });
 }
 
-#[test]
-fn very_late_exp() {
-    test_qbft(Test {
-        instance: 3,
-        start_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]),
-        decide_round: 4,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn very_late_const() {
+#[test_case(3, false, 4, false ; "very_late_exp")]
+#[test_case(1, true, 0, true ; "very_late_const")]
+fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) {
     test_qbft(Test {
-        instance: 1,
+        instance,
         start_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]),
-        const_period: true,
-        random_round: true,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn stagger_start_exp() {
-    test_qbft(Test {
-        instance: 0,
-        start_delay: HashMap::from([
-            (1, Duration::from_secs(0)),
-            (2, Duration::from_secs(1)),
-            (3, Duration::from_secs(2)),
-            (4, Duration::from_secs(3)),
-        ]),
-        random_round: true, // Takes 1 or 2 rounds.
+        decide_round,
+        const_period,
+        random_round,
         ..Default::default()
     });
 }
 
-#[test]
-fn stagger_start_const() {
+#[test_case(false ; "stagger_start_exp")]
+#[test_case(true ; "stagger_start_const")]
+fn stagger_start(const_period: bool) {
     test_qbft(Test {
         instance: 0,
         start_delay: HashMap::from([
@@ -1008,54 +968,40 @@ fn stagger_start_const() {
             (3, Duration::from_secs(2)),
             (4, Duration::from_secs(3)),
         ]),
-        const_period: true,
+        const_period,
         random_round: true, // Takes 1 or 2 rounds.
         ..Default::default()
     });
 }
 
-#[test]
-fn very_delayed_value_exp() {
-    test_qbft(Test {
-        instance: 3,
-        value_delay: HashMap::from([
-            (1, Duration::from_secs(5)),
-            (2, Duration::from_secs(10)),
-            (4, Duration::from_secs(5)),
-        ]),
-        decide_round: 4,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn very_delayed_value_const() {
-    test_qbft(Test {
-        instance: 1,
-        value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]),
-        const_period: true,
-        random_round: true,
-        ..Default::default()
-    });
-}
+#[test_case(3, true, false, 4, false ; "very_delayed_value_exp")]
+#[test_case(1, false, true, 0, true ; "very_delayed_value_const")]
+fn very_delayed_value(
+    instance: i64,
+    include_process_4: bool,
+    const_period: bool,
+    decide_round: i32,
+    random_round: bool,
+) {
+    let mut value_delay =
+        HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]);
+    if include_process_4 {
+        value_delay.insert(4, Duration::from_secs(5));
+    }
 
-#[test]
-fn stagger_delayed_value_exp() {
     test_qbft(Test {
-        instance: 0,
-        value_delay: HashMap::from([
-            (1, Duration::from_secs(0)),
-            (2, Duration::from_secs(1)),
-            (3, Duration::from_secs(2)),
-            (4, Duration::from_secs(3)),
-        ]),
-        random_round: true,
+        instance,
+        value_delay,
+        const_period,
+        decide_round,
+        random_round,
         ..Default::default()
     });
 }
 
-#[test]
-fn stagger_delayed_value_const() {
+#[test_case(false ; "stagger_delayed_value_exp")]
+#[test_case(true ; "stagger_delayed_value_const")]
+fn stagger_delayed_value(const_period: bool) {
     test_qbft(Test {
         instance: 0,
         value_delay: HashMap::from([
@@ -1064,7 +1010,7 @@ fn stagger_delayed_value_const() {
             (3, Duration::from_secs(2)),
             (4, Duration::from_secs(3)),
         ]),
-        const_period: true,
+        const_period,
         random_round: true,
         ..Default::default()
     });
@@ -1082,80 +1028,53 @@ fn round1_leader_no_value_round2_leader_offline() {
     });
 }
 
-#[test]
-fn jitter_500ms_exp() {
-    test_qbft(Test {
-        instance: 3,
-        bcast_jitter_ms: 500,
-        random_round: true,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn jitter_200ms_const() {
+#[test_case(500, false ; "jitter_500ms_exp")]
+#[test_case(200, true ; "jitter_200ms_const")]
+fn jitter(bcast_jitter_ms: i32, const_period: bool) {
     test_qbft(Test {
         instance: 3,
-        bcast_jitter_ms: 200, // 0.2-0.4s network delay * 3msgs/round == 0.6-1.2s delay per 1s round
-        const_period: true,
-        random_round: true,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn drop_10_percent_const() {
-    test_qbft(Test {
-        instance: 1,
-        drop_prob: HashMap::from([(1, 0.1), (2, 0.1), (3, 0.1), (4, 0.1)]),
-        const_period: true,
+        bcast_jitter_ms,
+        const_period,
         random_round: true,
         ..Default::default()
     });
 }
 
-#[test]
-fn drop_30_percent_const() {
-    test_qbft(Test {
-        instance: 1,
-        drop_prob: HashMap::from([(1, 0.3), (2, 0.3), (3, 0.3), (4, 0.3)]),
-        const_period: true,
-        random_round: true,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn fuzz() {
-    test_qbft(Test {
-        instance: 1,
-        fuzz: true,
-        const_period: true,
-        decide_round: 1,
-        ..Default::default()
-    });
-}
-
-#[test]
-fn fuzz_with_late_leader() {
+#[test_case(0.1 ; "drop_10_percent_const")]
+#[test_case(0.3 ; "drop_30_percent_const")]
+fn dropped_messages(drop_probability: f64) {
     test_qbft(Test {
         instance: 1,
-        fuzz: true,
-        start_delay: HashMap::from([(1, Duration::from_secs(2)), (2, Duration::from_secs(2))]),
+        drop_prob: HashMap::from([
+            (1, drop_probability),
+            (2, drop_probability),
+            (3, drop_probability),
+            (4, drop_probability),
+        ]),
         const_period: true,
         random_round: true,
         ..Default::default()
     });
 }
 
-#[test]
-fn fuzz_with_very_late_leader() {
+#[test_case(None, 1, false ; "fuzz")]
+#[test_case(Some(2), 0, true ; "fuzz_with_late_leader")]
+#[test_case(Some(10), 0, true ; "fuzz_with_very_late_leader")]
+fn fuzzed(start_delay_secs: Option<u64>, decide_round: i32, random_round: bool) {
     test_qbft(Test {
         instance: 1,
         fuzz: true,
-        start_delay: HashMap::from([(1, Duration::from_secs(10)), (2, Duration::from_secs(10))]),
+        start_delay: start_delay_secs
+            .map(|secs| {
+                HashMap::from([
+                    (1, Duration::from_secs(secs)),
+                    (2, Duration::from_secs(secs)),
+                ])
+            })
+            .unwrap_or_default(),
         const_period: true,
-        random_round: true,
+        decide_round,
+        random_round,
         ..Default::default()
     });
 }
@@ -1181,41 +1100,35 @@ fn noop_transport() -> Transport<i64, i64, i64> {
     }
 }
 
-#[test]
-fn formulas() {
-    let expected = [
-        (1, 1, 0),
-        (2, 2, 0),
-        (3, 2, 0),
-        (4, 3, 1),
-        (5, 4, 1),
-        (6, 4, 1),
-        (7, 5, 2),
-        (8, 6, 2),
-        (9, 6, 2),
-        (10, 7, 3),
-        (11, 8, 3),
-        (12, 8, 3),
-        (13, 9, 4),
-        (14, 10, 4),
-        (15, 10, 4),
-        (16, 11, 5),
-        (17, 12, 5),
-        (18, 12, 5),
-        (19, 13, 6),
-        (20, 14, 6),
-        (21, 14, 6),
-        (22, 15, 7),
-    ];
-
-    for (n, q, f) in expected {
-        let d = Definition::<i64, i64, i64> {
-            nodes: n,
-            ..noop_definition()
-        };
-        assert_eq!(q, d.quorum(), "Quorum given N={n}");
-        assert_eq!(f, d.faulty(), "Faulty given N={n}");
-    }
+#[test_case(1, 1, 0 ; "n1")]
+#[test_case(2, 2, 0 ; "n2")]
+#[test_case(3, 2, 0 ; "n3")]
+#[test_case(4, 3, 1 ; "n4")]
+#[test_case(5, 4, 1 ; "n5")]
+#[test_case(6, 4, 1 ; "n6")]
+#[test_case(7, 5, 2 ; "n7")]
+#[test_case(8, 6, 2 ; "n8")]
+#[test_case(9, 6, 2 ; "n9")]
+#[test_case(10, 7, 3 ; "n10")]
+#[test_case(11, 8, 3 ; "n11")]
+#[test_case(12, 8, 3 ; "n12")]
+#[test_case(13, 9, 4 ; "n13")]
+#[test_case(14, 10, 4 ; "n14")]
+#[test_case(15, 10, 4 ; "n15")]
+#[test_case(16, 11, 5 ; "n16")]
+#[test_case(17, 12, 5 ; "n17")]
+#[test_case(18, 12, 5 ; "n18")]
+#[test_case(19, 13, 6 ; "n19")]
+#[test_case(20, 14, 6 ; "n20")]
+#[test_case(21, 14, 6 ; "n21")]
+#[test_case(22, 15, 7 ; "n22")]
+fn formulas(n: i64, q: i64, f: i64) {
+    let d = Definition::<i64, i64, i64> {
+        nodes: n,
+        ..noop_definition()
+    };
+    assert_eq!(q, d.quorum(), "Quorum given N={n}");
+    assert_eq!(f, d.faulty(), "Faulty given N={n}");
 }
 
 #[test]
@@ -1354,35 +1267,27 @@ fn idle_run_returns_when_cancelled() {
     ));
 }
 
-#[test]
-fn invalid_definition_rejected() {
+#[test_case(0, 1, true ; "invalid_nodes")]
+#[test_case(4, 0, false ; "invalid_fifo_limit")]
+fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) {
     let cts = CancellationTokenSource::new();
     let transport = noop_transport();
     let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
     let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
 
     let mut def = noop_definition();
-    def.nodes = 0;
-    def.fifo_limit = 1;
-    assert!(matches!(
-        qbft::run(
-            cts.token(),
-            &def,
-            &transport,
-            &0,
-            1,
-            input_rx.clone(),
-            source_rx.clone(),
-        ),
-        Err(QbftError::InvalidNodes { nodes: 0 })
-    ));
-
-    def.nodes = 4;
-    def.fifo_limit = 0;
-    assert!(matches!(
-        qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx),
-        Err(QbftError::InvalidFifoLimit { fifo_limit: 0 })
-    ));
+    def.nodes = nodes;
+    def.fifo_limit = fifo_limit;
+
+    let result = qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx);
+    if invalid_nodes {
+        assert!(matches!(result, Err(QbftError::InvalidNodes { nodes: 0 })));
+    } else {
+        assert!(matches!(
+            result,
+            Err(QbftError::InvalidFifoLimit { fifo_limit: 0 })
+        ));
+    }
 }
 
 #[test]
@@ -1518,24 +1423,20 @@ fn justified_qrc_j1_and_j2() {
     assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none());
 }
 
-#[test]
-fn valid_round_change_prepared_round_boundaries() {
-    let cases = [
-        ("negative", 2, -1, false),
-        ("null at round one", 1, 0, true),
-        ("previous round", 2, 1, true),
-        ("current round", 2, 2, false),
-        ("future round", 2, 3, false),
-    ];
-
-    for (name, round, prepared_round, expected) in cases {
-        let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None);
-        assert_eq!(expected, valid_round_change_prepared_round(&msg), "{name}");
-    }
+#[test_case(2, -1, false ; "negative")]
+#[test_case(1, 0, true ; "null_at_round_one")]
+#[test_case(2, 1, true ; "previous_round")]
+#[test_case(2, 2, false ; "current_round")]
+#[test_case(2, 3, false ; "future_round")]
+fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64, expected: bool) {
+    let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None);
+    assert_eq!(expected, valid_round_change_prepared_round(&msg));
 }
 
-#[test]
-fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() {
+#[test_case(-1 ; "negative")]
+#[test_case(2 ; "current_round")]
+#[test_case(3 ; "future_round")]
+fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr: i64) {
     let mut def = noop_definition();
     def.nodes = 4;
     let target_round = 2;
@@ -1544,61 +1445,49 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() {
     let prepares = new_prepare_quorum(valid_prepared_round, value);
     let valid_round_changes = new_round_change_quorum(target_round, valid_prepared_round, value);
 
-    for (name, invalid_pr) in [
-        ("negative", -1),
-        ("current round", target_round),
-        ("future round", target_round + 1),
-    ] {
-        let invalid_prepares = new_prepare_quorum(invalid_pr, value);
-        let invalid_round_change = new_msg(
-            MSG_ROUND_CHANGE,
-            0,
-            1,
-            target_round,
-            0,
-            0,
-            invalid_pr,
-            value,
-            Some(&invalid_prepares),
-        );
-        assert!(
-            !is_justified_round_change(&def, &invalid_round_change),
-            "is_justified_round_change must reject {name} prepared_round"
-        );
-
-        let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value);
-        only_invalid.extend(invalid_prepares);
-        assert_eq!(
-            None,
-            contains_justified_qrc(&def, &only_invalid, target_round),
-            "contains_justified_qrc must not count {name} prepared_round"
-        );
-        assert!(
-            get_justified_qrc(&def, &only_invalid, target_round).is_none(),
-            "get_justified_qrc must not count {name} prepared_round"
-        );
-
-        let mut with_invalid_extra = valid_round_changes.clone();
-        with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value));
-        with_invalid_extra.extend(prepares.clone());
-        assert_eq!(
-            Some(value),
-            contains_justified_qrc(&def, &with_invalid_extra, target_round),
-            "contains_justified_qrc must ignore extra {name} prepared_round"
-        );
-        let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round)
-            .expect("valid quorum must remain after filtering invalid prepared_round");
-        assert!(
-            qrc.iter()
-                .filter(|msg| msg.type_() == MSG_ROUND_CHANGE)
-                .all(valid_round_change_prepared_round),
-            "get_justified_qrc must filter extra {name} prepared_round"
-        );
-    }
+    let invalid_prepares = new_prepare_quorum(invalid_pr, value);
+    let invalid_round_change = new_msg(
+        MSG_ROUND_CHANGE,
+        0,
+        1,
+        target_round,
+        0,
+        0,
+        invalid_pr,
+        value,
+        Some(&invalid_prepares),
+    );
+    assert!(!is_justified_round_change(&def, &invalid_round_change));
+
+    let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value);
+    only_invalid.extend(invalid_prepares);
+    assert_eq!(
+        None,
+        contains_justified_qrc(&def, &only_invalid, target_round)
+    );
+    assert!(get_justified_qrc(&def, &only_invalid, target_round).is_none());
+
+    let mut with_invalid_extra = valid_round_changes.clone();
+    with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value));
+    with_invalid_extra.extend(prepares.clone());
+    assert_eq!(
+        Some(value),
+        contains_justified_qrc(&def, &with_invalid_extra, target_round)
+    );
+    let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round)
+        .expect("valid quorum must remain after filtering invalid prepared_round");
+    assert!(
+        qrc.iter()
+            .filter(|msg| msg.type_() == MSG_ROUND_CHANGE)
+            .all(valid_round_change_prepared_round)
+    );
 }
 
-#[test]
-fn quorum_null_prepared_filters_invalid_prepared_rounds() {
+#[test_case(1, -1 ; "negative")]
+#[test_case(1, 1 ; "current_round")]
+#[test_case(1, 2 ; "future_round")]
+#[test_case(0, 0 ; "zero_round")]
+fn quorum_null_prepared_filters_invalid_prepared_rounds(round: i64, invalid_pr: i64) {
     let mut def = noop_definition();
     def.nodes = 4;
 
@@ -1607,23 +1496,10 @@ fn quorum_null_prepared_filters_invalid_prepared_rounds() {
     assert!(ok);
     assert_eq!(3, qrc.len());
 
-    for (name, round, invalid_pr) in [
-        ("negative", 1, -1),
-        ("current round", 1, 1),
-        ("future round", 1, 2),
-        ("zero round", 0, 0),
-    ] {
-        let invalid = new_round_change_quorum(round, invalid_pr, 0);
-        let (qrc, ok) = quorum_null_prepared(&def, &invalid, round);
-        assert!(
-            !ok,
-            "quorum_null_prepared must reject {name} prepared_round"
-        );
-        assert!(
-            qrc.is_empty(),
-            "quorum_null_prepared must filter {name} prepared_round"
-        );
-    }
+    let invalid = new_round_change_quorum(round, invalid_pr, 0);
+    let (qrc, ok) = quorum_null_prepared(&def, &invalid, round);
+    assert!(!ok);
+    assert!(qrc.is_empty());
 }
 
 #[test]
@@ -1936,43 +1812,24 @@ struct ChainSplitTest {
     should_halt: bool,
 }
 
-#[test]
-fn chain_split_same_value() {
-    test_qbft_chain_split(ChainSplitTest {
-        decide_round: 1,
-        value_source: HashMap::from([(1, 1), (2, 1), (3, 1), (4, 1)]),
-        prepared_val: 1,
-        should_halt: false,
-    });
-}
-
-#[test]
-fn chain_split_non_leader_peer_has_different_value() {
-    test_qbft_chain_split(ChainSplitTest {
-        decide_round: 1,
-        value_source: HashMap::from([(1, 1), (2, 3), (3, 1), (4, 1)]),
-        prepared_val: 1,
-        should_halt: false,
-    });
-}
-
-#[test]
-fn chain_split_first_leader_has_different_value_second_leader_succeeds() {
-    test_qbft_chain_split(ChainSplitTest {
-        decide_round: 2,
-        value_source: HashMap::from([(1, 3), (2, 1), (3, 1), (4, 1)]),
-        prepared_val: 1,
-        should_halt: false,
-    });
-}
-
-#[test]
-fn zz_chain_split_no_consensus_halt() {
+#[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")]
+#[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")]
+#[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")]
+#[test_case(1, 1, 3, 3, 0, 0, true ; "zz_no_consensus_halt")]
+fn chain_split(
+    value_1: i64,
+    value_2: i64,
+    value_3: i64,
+    value_4: i64,
+    decide_round: i32,
+    prepared_val: i32,
+    should_halt: bool,
+) {
     test_qbft_chain_split(ChainSplitTest {
-        decide_round: 0,
-        value_source: HashMap::from([(1, 1), (2, 1), (3, 3), (4, 3)]),
-        prepared_val: 0,
-        should_halt: true,
+        decide_round,
+        value_source: HashMap::from([(1, value_1), (2, value_2), (3, value_3), (4, value_4)]),
+        prepared_val,
+        should_halt,
     });
 }
 

From 4a364dff40811433efe1604c1abc11aea7ee0e0a Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 13:32:36 +0700
Subject: [PATCH 21/28] fix: small fixes

---
 crates/core/src/qbft/fake_clock.rs    |  1 +
 crates/core/src/qbft/internal_test.rs |  2 +-
 crates/core/src/qbft/mod.rs           | 12 +++++++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 8392e7f3..75d07ef6 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -92,6 +92,7 @@ impl FakeClock {
         inner.now - inner.start
     }
 
+    /// Explicit cleanup; dropping one clone must not cancel timers owned by other clones.
     pub fn cancel(&self) {
         let mut inner = self.inner.lock().unwrap();
         inner.clients.clear();
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 38409f61..1ed16e11 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1815,7 +1815,7 @@ struct ChainSplitTest {
 #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")]
 #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")]
 #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")]
-#[test_case(1, 1, 3, 3, 0, 0, true ; "zz_no_consensus_halt")]
+#[test_case(1, 1, 3, 3, 0, 0, true ; "no_consensus_halt")]
 fn chain_split(
     value_1: i64,
     value_2: i64,
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 54b77949..2537611d 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -57,8 +57,8 @@ pub enum QbftError {
     #[error("Compare leader value with local value failed")]
     CompareError,
 
-    #[error("bug: expected only comparison or timeout error")]
-    UnexpectedCompareError,
+    #[error("bug: expected only comparison or timeout error, got {0}")]
+    UnexpectedCompareError(Box<QbftError>),
 
     #[error("context canceled")]
     ContextCanceled,
@@ -534,7 +534,11 @@ where
                                         broadcast_round_change()?;
                                     }
                                     QbftError::ContextCanceled => return Err(QbftError::ContextCanceled),
-                                    _ => return Err(QbftError::UnexpectedCompareError),
+                                    _ => {
+                                        return Err(QbftError::UnexpectedCompareError(Box::new(
+                                            qbft_err,
+                                        )));
+                                    }
                                 }
                             }
                         }
@@ -1288,6 +1292,8 @@ where
     let null_pr = Default::default();
     let null_pv = Some(&Default::default());
 
+    // Normal callers pass round >= 1, which makes pr=0 valid. Keep the explicit
+    // prepared-round filter so this helper remains safe for direct/future calls.
     let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv)
         .into_iter()
         .filter(valid_round_change_prepared_round)

From e759a81c8112593f68701a78affe82e260c1e07d Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 13:38:42 +0700
Subject: [PATCH 22/28] fix: linter

---
 crates/core/src/qbft/fake_clock.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 75d07ef6..2d6c239d 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -92,7 +92,8 @@ impl FakeClock {
         inner.now - inner.start
     }
 
-    /// Explicit cleanup; dropping one clone must not cancel timers owned by other clones.
+    /// Explicit cleanup; dropping one clone must not cancel timers owned by
+    /// other clones.
     pub fn cancel(&self) {
         let mut inner = self.inner.lock().unwrap();
         inner.clients.clear();

From 998b9ce0bc3e449a29944dfaa698c93f6a291253 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 14:59:02 +0700
Subject: [PATCH 23/28] fix: remove unnecessary filter

---
 crates/core/src/qbft/internal_test.rs | 3 +--
 crates/core/src/qbft/mod.rs           | 7 +------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 1ed16e11..af4cd57d 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1486,8 +1486,7 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr:
 #[test_case(1, -1 ; "negative")]
 #[test_case(1, 1 ; "current_round")]
 #[test_case(1, 2 ; "future_round")]
-#[test_case(0, 0 ; "zero_round")]
-fn quorum_null_prepared_filters_invalid_prepared_rounds(round: i64, invalid_pr: i64) {
+fn quorum_null_prepared_requires_null_prepared_rounds(round: i64, invalid_pr: i64) {
     let mut def = noop_definition();
     def.nodes = 4;
 
diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs
index 2537611d..956f17e8 100644
--- a/crates/core/src/qbft/mod.rs
+++ b/crates/core/src/qbft/mod.rs
@@ -1292,12 +1292,7 @@ where
     let null_pr = Default::default();
     let null_pv = Some(&Default::default());
 
-    // Normal callers pass round >= 1, which makes pr=0 valid. Keep the explicit
-    // prepared-round filter so this helper remains safe for direct/future calls.
-    let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv)
-        .into_iter()
-        .filter(valid_round_change_prepared_round)
-        .collect::<Vec<_>>();
+    let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv);
 
     (
         justification.clone(),

From 0451f2030c14c69a2101e90458e25b32eb780093 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Tue, 19 May 2026 23:42:51 +0700
Subject: [PATCH 24/28] fix: make_is_leader now 0-based

---
 crates/core/src/qbft/internal_test.rs | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index af4cd57d..8b8aa42d 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -607,9 +607,7 @@ fn seed_from_label(label: &str) -> u64 {
 /// Construct a leader election function.
 fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone {
     move |instance: &i64, round: i64, process: i64| -> bool {
-        // Go's helper uses 0-indexed process IDs. These Rust tests use 1..=n,
-        // so translate the same modulo schedule into the local process range.
-        ((instance + round - 1).rem_euclid(n)) + 1 == process
+        (instance + round).rem_euclid(n) == process
     }
 }
 
@@ -974,24 +972,12 @@ fn stagger_start(const_period: bool) {
     });
 }
 
-#[test_case(3, true, false, 4, false ; "very_delayed_value_exp")]
-#[test_case(1, false, true, 0, true ; "very_delayed_value_const")]
-fn very_delayed_value(
-    instance: i64,
-    include_process_4: bool,
-    const_period: bool,
-    decide_round: i32,
-    random_round: bool,
-) {
-    let mut value_delay =
-        HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]);
-    if include_process_4 {
-        value_delay.insert(4, Duration::from_secs(5));
-    }
-
+#[test_case(3, false, 4, false ; "very_delayed_value_exp")]
+#[test_case(1, true, 0, true ; "very_delayed_value_const")]
+fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, random_round: bool) {
     test_qbft(Test {
         instance,
-        value_delay,
+        value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]),
         const_period,
         decide_round,
         random_round,

From e0e51f78b21ead3047bb1cfcd669118e531db508 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Wed, 20 May 2026 00:11:31 +0700
Subject: [PATCH 25/28] fix: add cancelled state for fake clock

---
 crates/core/src/qbft/fake_clock.rs | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 2d6c239d..686b549c 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -15,6 +15,7 @@ struct FakeClockInner {
     start: Instant,
     now: Instant,
     last_id: usize,
+    cancelled: bool,
     clients: HashMap<usize, (mpmc::Sender<Instant>, Instant)>,
 }
 
@@ -25,6 +26,7 @@ impl FakeClock {
                 start: now,
                 now,
                 last_id: 1,
+                cancelled: false,
                 clients: Default::default(),
             })),
         }
@@ -41,6 +43,10 @@ impl FakeClock {
 
         let client_id = {
             let mut inner = self.inner.lock().unwrap();
+            if inner.cancelled {
+                return (rx, Box::new(|| {}));
+            }
+
             let id = inner.last_id;
             let deadline = inner.now + duration;
 
@@ -92,10 +98,11 @@ impl FakeClock {
         inner.now - inner.start
     }
 
-    /// Explicit cleanup; dropping one clone must not cancel timers owned by
-    /// other clones.
+    /// Explicit terminal cleanup; do not reintroduce `Drop`, since dropping one
+    /// clone must not cancel timers owned by other clones.
     pub fn cancel(&self) {
         let mut inner = self.inner.lock().unwrap();
+        inner.cancelled = true;
         inner.clients.clear();
     }
 }
@@ -160,6 +167,20 @@ fn multiple_threads_cancellation() {
     assert_eq!(Duration::ZERO, clock.elapsed());
 }
 
+#[test]
+fn timer_created_after_cancel_is_closed() {
+    let clock = FakeClock::new(Instant::now());
+    clock.cancel();
+
+    let (ch, cancel) = clock.new_timer(Duration::from_secs(5));
+
+    assert!(matches!(
+        ch.try_recv(),
+        Err(mpmc::TryRecvError::Disconnected)
+    ));
+    cancel();
+}
+
 #[test]
 fn cancel_one_timer_only() {
     let clock = FakeClock::new(Instant::now());

From 6cc22da8e7566ea3ebac42f99586099e29ed7c6f Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Wed, 20 May 2026 00:52:33 +0700
Subject: [PATCH 26/28] fix: flanky tests by ordering and add small settle
 window time

---
 crates/core/src/qbft/fake_clock.rs    |  8 +++++---
 crates/core/src/qbft/internal_test.rs | 12 +++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 686b549c..d03063c2 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -1,6 +1,6 @@
 use crossbeam::channel as mpmc;
 use std::{
-    collections::HashMap,
+    collections::BTreeMap,
     sync::{Arc, Mutex},
     thread,
     time::{Duration, Instant},
@@ -16,7 +16,7 @@ struct FakeClockInner {
     now: Instant,
     last_id: usize,
     cancelled: bool,
-    clients: HashMap<usize, (mpmc::Sender<Instant>, Instant)>,
+    clients: BTreeMap<usize, (mpmc::Sender<Instant>, Instant)>,
 }
 
 impl FakeClock {
@@ -75,7 +75,9 @@ impl FakeClock {
             inner.now += duration;
             let now = inner.now;
 
-            for (&id, (ch, deadline)) in inner.clients.iter() {
+            // Preserve timer creation order for equal deadlines. The QBFT
+            // fake-clock harness asserts exact rounds, so timer ordering matters.
+            for (&id, (ch, deadline)) in &inner.clients {
                 if *deadline <= now {
                     expired.push((id, ch.clone()));
                 }
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 8b8aa42d..4d321b16 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -276,10 +276,11 @@ fn test_qbft(test: Test) {
                         .as_ref()
                         .expect("value sender kept until run returns")
                         .clone();
+                    let (delay_ch, cancel) = clock.new_timer(delay);
                     s.spawn(move || {
-                        let (delay_ch, cancel) = clock.new_timer(delay);
-                        _ = delay_ch.recv();
-                        _ = v_chan_tx_send.send(i);
+                        if delay_ch.recv().is_ok() {
+                            _ = v_chan_tx_send.send(i);
+                        }
 
                         cancel();
                     });
@@ -378,7 +379,6 @@ fn test_qbft(test: Test) {
                         }
                         BroadcastEvent::Delayed(delayed) => pending.push(delayed),
                     }
-                    clock.advance(Duration::from_millis(1));
                     if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
                         cts.cancel();
                         clock.cancel();
@@ -517,7 +517,9 @@ fn test_qbft(test: Test) {
                 }
 
                 default => {
-                    thread::sleep(time::Duration::from_micros(1));
+                    // Give worker threads a small scheduling window before
+                    // advancing fake time; these tests assert exact rounds.
+                    thread::sleep(Duration::from_micros(50));
                     clock.advance(Duration::from_millis(1));
                     if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
                         cts.cancel();

From 7e48b5658c0dddef31e0c7878263925b4c61d594 Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Wed, 20 May 2026 16:29:31 +0700
Subject: [PATCH 27/28] fix: harden fake-clock to avoid flanky scheduling

---
 crates/core/src/qbft/fake_clock.rs    | 126 +++++++--
 crates/core/src/qbft/internal_test.rs | 357 ++++++++++++++++++++++----
 2 files changed, 411 insertions(+), 72 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index d03063c2..160cae56 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -1,7 +1,10 @@
 use crossbeam::channel as mpmc;
 use std::{
     collections::BTreeMap,
-    sync::{Arc, Mutex},
+    sync::{
+        Arc, Mutex,
+        atomic::{AtomicIsize, Ordering},
+    },
     thread,
     time::{Duration, Instant},
 };
@@ -11,15 +14,25 @@ pub struct FakeClock {
     inner: Arc<Mutex<FakeClockInner>>,
 }
 
+#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)]
+pub enum TimerPriority {
+    // Match the Go fake-clock harness at equal deadlines: delayed nodes start
+    // before protocol timers, while delayed input values arrive after them.
+    StartDelay,
+    Protocol,
+    InputValue,
+}
+
 struct FakeClockInner {
     start: Instant,
     now: Instant,
     last_id: usize,
     cancelled: bool,
-    clients: BTreeMap<usize, (mpmc::Sender<Instant>, Instant)>,
+    clients: BTreeMap<usize, (mpmc::Sender<Instant>, Instant, TimerPriority)>,
 }
 
 impl FakeClock {
+    /// Create a fake clock pinned to an initial instant.
     pub fn new(now: Instant) -> Self {
         Self {
             inner: Arc::new(Mutex::new(FakeClockInner {
@@ -32,6 +45,7 @@ impl FakeClock {
         }
     }
 
+    /// Register a protocol timer with the default protocol priority.
     pub fn new_timer(
         &self,
         duration: Duration,
@@ -39,7 +53,22 @@ impl FakeClock {
         mpmc::Receiver<Instant>,
         Box<dyn Fn() + Send + Sync + 'static>,
     ) {
-        let (tx, rx) = mpmc::bounded::<Instant>(1);
+        self.new_timer_with_priority(duration, TimerPriority::Protocol)
+    }
+
+    /// Register a timer with explicit same-deadline ordering priority.
+    pub fn new_timer_with_priority(
+        &self,
+        duration: Duration,
+        priority: TimerPriority,
+    ) -> (
+        mpmc::Receiver<Instant>,
+        Box<dyn Fn() + Send + Sync + 'static>,
+    ) {
+        // Synchronous expiry handoff: advancing fake time must wait until the
+        // timer owner observes the tick, otherwise exact-round QBFT tests race
+        // worker scheduling.
+        let (tx, rx) = mpmc::bounded::<Instant>(0);
 
         let client_id = {
             let mut inner = self.inner.lock().unwrap();
@@ -51,7 +80,7 @@ impl FakeClock {
             let deadline = inner.now + duration;
 
             inner.last_id += 1;
-            inner.clients.insert(id, (tx, deadline));
+            inner.clients.insert(id, (tx, deadline, priority));
 
             id
         };
@@ -65,7 +94,26 @@ impl FakeClock {
         (rx, cancel)
     }
 
-    pub fn advance(&self, duration: Duration) {
+    /// Advance fake time and deliver all timers expired by the new time.
+    pub fn advance(&self, duration: Duration) -> usize {
+        self.advance_inner(duration, None)
+    }
+
+    /// Advance fake time and wait for each delivered timer action to complete.
+    pub fn advance_and_wait(
+        &self,
+        duration: Duration,
+        pending_timer_actions: &AtomicIsize,
+    ) -> usize {
+        self.advance_inner(duration, Some(pending_timer_actions))
+    }
+
+    /// Shared advance path; optionally synchronizes timer delivery with the test harness.
+    fn advance_inner(
+        &self,
+        duration: Duration,
+        pending_timer_actions: Option<&AtomicIsize>,
+    ) -> usize {
         // Advance time and collect expired senders under lock, but perform sends
         // without holding lock.
         let mut expired = vec![];
@@ -75,31 +123,55 @@ impl FakeClock {
             inner.now += duration;
             let now = inner.now;
 
-            // Preserve timer creation order for equal deadlines. The QBFT
-            // fake-clock harness asserts exact rounds, so timer ordering matters.
-            for (&id, (ch, deadline)) in &inner.clients {
+            for (&id, (ch, deadline, priority)) in &inner.clients {
                 if *deadline <= now {
-                    expired.push((id, ch.clone()));
+                    expired.push((id, *deadline, *priority, ch.clone()));
                 }
             }
 
-            for (id, _) in expired.iter() {
+            for (id, _, _, _) in expired.iter() {
                 inner.clients.remove(id);
             }
 
             now
         };
 
-        for (_, ch) in expired {
-            let _ = ch.send(now);
+        // Equal-deadline order is part of the test harness contract: these
+        // tests assert exact rounds, and Go's fake-clock scheduling is stable.
+        expired.sort_by_key(|(id, deadline, priority, _)| (*deadline, *priority, *id));
+
+        let mut delivered = 0;
+        for (_, _, _, ch) in expired {
+            if let Some(pending_timer_actions) = pending_timer_actions {
+                pending_timer_actions.fetch_add(1, Ordering::SeqCst);
+            }
+
+            if ch.send(now).is_ok() {
+                delivered += 1;
+                if let Some(pending_timer_actions) = pending_timer_actions {
+                    while pending_timer_actions.load(Ordering::SeqCst) > 0 {
+                        thread::yield_now();
+                    }
+                }
+            } else if let Some(pending_timer_actions) = pending_timer_actions {
+                pending_timer_actions.fetch_sub(1, Ordering::SeqCst);
+            }
         }
+
+        delivered
     }
 
+    /// Return fake time elapsed since clock creation.
     pub fn elapsed(&self) -> Duration {
         let inner = self.inner.lock().unwrap();
         inner.now - inner.start
     }
 
+    /// Return currently registered timers.
+    pub fn timer_count(&self) -> usize {
+        self.inner.lock().unwrap().clients.len()
+    }
+
     /// Explicit terminal cleanup; do not reintroduce `Drop`, since dropping one
     /// clone must not cancel timers owned by other clones.
     pub fn cancel(&self) {
@@ -110,6 +182,7 @@ impl FakeClock {
 }
 
 #[test]
+/// Timers registered by different threads fire after fake time passes deadlines.
 fn multiple_threads_timers() {
     let clock = FakeClock::new(Instant::now());
     let (done_tx, done_rx) = mpmc::bounded(2);
@@ -141,6 +214,7 @@ fn multiple_threads_timers() {
 }
 
 #[test]
+/// Cancelling the clock closes outstanding timers without advancing fake time.
 fn multiple_threads_cancellation() {
     let clock = FakeClock::new(Instant::now());
     let (done_tx, done_rx) = mpmc::bounded(2);
@@ -170,6 +244,7 @@ fn multiple_threads_cancellation() {
 }
 
 #[test]
+/// A timer created after clock cancellation is immediately closed.
 fn timer_created_after_cancel_is_closed() {
     let clock = FakeClock::new(Instant::now());
     clock.cancel();
@@ -184,25 +259,42 @@ fn timer_created_after_cancel_is_closed() {
 }
 
 #[test]
+/// Cancelling one timer does not affect other timers with the same deadline.
 fn cancel_one_timer_only() {
     let clock = FakeClock::new(Instant::now());
     let (ch_1, cancel_1) = clock.new_timer(Duration::from_secs(5));
     let (ch_2, _) = clock.new_timer(Duration::from_secs(5));
+    let (done_tx, done_rx) = mpmc::bounded(1);
 
     cancel_1();
-    clock.advance(Duration::from_secs(5));
+    thread::scope(|s| {
+        s.spawn(move || {
+            done_tx.send(ch_2.recv().is_ok()).unwrap();
+        });
+
+        clock.advance(Duration::from_secs(5));
+    });
 
     assert!(ch_1.try_recv().is_err());
-    assert!(ch_2.try_recv().is_ok());
+    assert_eq!(Ok(true), done_rx.try_recv());
 }
 
 #[test]
+/// An expired timer is delivered once and removed from the clock.
 fn expired_timer_delivers_once() {
     let clock = FakeClock::new(Instant::now());
     let (ch, _) = clock.new_timer(Duration::from_secs(5));
+    let (done_tx, done_rx) = mpmc::bounded(1);
 
+    thread::scope(|s| {
+        s.spawn(move || {
+            done_tx.send(ch.recv().is_ok()).unwrap();
+        });
+
+        clock.advance(Duration::from_secs(5));
+    });
+
+    assert_eq!(Ok(true), done_rx.try_recv());
     clock.advance(Duration::from_secs(5));
-    assert!(ch.try_recv().is_ok());
-    clock.advance(Duration::from_secs(5));
-    assert!(ch.try_recv().is_err());
+    assert!(done_rx.try_recv().is_err());
 }
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 4d321b16..0973c924 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -1,11 +1,18 @@
-use crate::qbft::{self, fake_clock::FakeClock, *};
+use crate::qbft::{
+    self,
+    fake_clock::{FakeClock, TimerPriority},
+    *,
+};
 use cancellation::CancellationTokenSource;
 use crossbeam::channel as mpmc;
 use std::{
     collections::{BTreeMap, HashMap},
     fmt::Write as _,
     panic::{self, AssertUnwindSafe},
-    sync::{Arc, Mutex},
+    sync::{
+        Arc, Mutex,
+        atomic::{AtomicIsize, AtomicUsize, Ordering},
+    },
     thread,
     time::Duration,
 };
@@ -24,10 +31,28 @@ const TEST_STREAM_MSG_ROUND: u64 = 11;
 const TEST_STREAM_MSG_VALUE: u64 = 12;
 const TEST_STREAM_MSG_PREPARED_ROUND: u64 = 13;
 const TEST_STREAM_MSG_PREPARED_VALUE: u64 = 14;
+const TEST_WAIT_TIMEOUT: Duration = Duration::from_secs(1);
+// Wall-clock guard catches lack of harness progress. Fake time still controls
+// protocol progress, so slow-but-progressing parallel runs should not fail.
+const TEST_STALL_TIMEOUT: Duration = Duration::from_secs(20);
 
 type RunOutcome = std::thread::Result<Result<()>>;
 type TestMsgRef = Msg<i64, i64, i64>;
 
+struct PendingCompareGuard {
+    pending_compares: Arc<AtomicUsize>,
+}
+
+impl Drop for PendingCompareGuard {
+    fn drop(&mut self) {
+        self.pending_compares.fetch_sub(1, Ordering::SeqCst);
+    }
+}
+
+fn complete_timer_action(pending_timer_actions: &AtomicIsize) {
+    pending_timer_actions.fetch_sub(1, Ordering::SeqCst);
+}
+
 struct PendingBroadcast {
     deliver_at: Duration,
     key: u64,
@@ -65,6 +90,12 @@ struct Test {
     pub fuzz: bool,
 }
 
+// Main QBFT simulation harness:
+// 1. build one fake clock, four node transports, and QBFT callbacks;
+// 2. spawn one thread per node running `qbft::run`;
+// 3. route broadcasts through the in-memory network with optional drop/jitter/fuzz;
+// 4. advance fake time only after pending compare/timer work is drained;
+// 5. collect all decisions and assert same value plus expected round/value.
 fn test_qbft(test: Test) {
     const N: usize = 4;
     const MAX_ROUND: usize = 50;
@@ -77,6 +108,8 @@ fn test_qbft(test: Test) {
     let clock = FakeClock::new(start_time);
 
     let cts = CancellationTokenSource::new();
+    let pending_compares = Arc::new(AtomicUsize::new(0));
+    let pending_timer_actions = Arc::new(AtomicIsize::new(0));
     // Keep peer iteration deterministic. These fake-clock tests assert exact
     // rounds, and broadcast fanout order affects which node observes quorums
     // first when tests run in parallel.
@@ -91,6 +124,11 @@ fn test_qbft(test: Test) {
     let (unjust_tx, unjust_rx) = mpmc::unbounded::<String>();
     let (result_chan_tx, result_chan_rx) = mpmc::bounded::<Vec<Msg<i64, i64, i64>>>(N);
     let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N);
+    let expected_initial_timers = N + test
+        .value_delay
+        .keys()
+        .filter(|process| !test.start_delay.contains_key(process))
+        .count();
 
     let is_leader = Box::new(make_is_leader(N as i64));
 
@@ -116,16 +154,27 @@ fn test_qbft(test: Test) {
                 result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
             })
         },
-        compare: Arc::new(|_, _, _, _, return_err, _| {
-            return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
-        }),
+        compare: {
+            let pending_compares = pending_compares.clone();
+            Arc::new(move |_, _, _, _, return_err, _| {
+                let _guard = PendingCompareGuard {
+                    pending_compares: pending_compares.clone(),
+                };
+                return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+            })
+        },
         nodes: N as i64,
         fifo_limit: FIFO_LIMIT as i64,
         log_round_change: {
             let clock = clock.clone();
             let trace = trace.clone();
+            let pending_timer_actions = pending_timer_actions.clone();
 
             Box::new(move |_, process, round, new_round, upon_rule, _| {
+                if upon_rule == UPON_ROUND_TIMEOUT {
+                    complete_timer_action(&pending_timer_actions);
+                }
+
                 trace.push(format!(
                     "{:?} - {}@{} change to {} ~= {}",
                     clock.elapsed(),
@@ -151,7 +200,12 @@ fn test_qbft(test: Test) {
         log_upon_rule: {
             let clock = clock.clone();
             let trace = trace.clone();
+            let pending_compares = pending_compares.clone();
             Box::new(move |_, process, round, msg, upon_rule| {
+                if upon_rule == UPON_JUSTIFIED_PRE_PREPARE {
+                    pending_compares.fetch_add(1, Ordering::SeqCst);
+                }
+
                 trace.push(format!(
                     "{:?} {} => {}@{} -> {}@{} ~= {}",
                     clock.elapsed(),
@@ -239,9 +293,11 @@ fn test_qbft(test: Test) {
             let run_chan_tx = run_chan_tx.clone();
             let defs = defs.clone();
             let is_leader = is_leader.clone();
+            let pending_timer_actions = pending_timer_actions.clone();
             let trace = trace.clone();
 
             s.spawn(move || {
+                let mut start_timer_fired = false;
                 if let Some(delay) = start_delay {
                     trace.push(format!(
                         "{:?} Node {} start delay {:?}",
@@ -249,14 +305,17 @@ fn test_qbft(test: Test) {
                         i,
                         delay
                     ));
-                    let (delay_ch, _) = clock.new_timer(delay);
-                    _ = delay_ch.recv();
-                    trace.push(format!(
-                        "{:?} Node {} starting {:?}",
-                        clock.elapsed(),
-                        i,
-                        delay
-                    ));
+                    let (delay_ch, _) =
+                        clock.new_timer_with_priority(delay, TimerPriority::StartDelay);
+                    if delay_ch.recv().is_ok() {
+                        start_timer_fired = true;
+                        trace.push(format!(
+                            "{:?} Node {} starting {:?}",
+                            clock.elapsed(),
+                            i,
+                            delay
+                        ));
+                    }
                 }
 
                 if start_delay.is_some() {
@@ -265,6 +324,9 @@ fn test_qbft(test: Test) {
                         _ = receiver.recv().expect(READ_CHAN_ERR);
                     }
                 }
+                if start_timer_fired {
+                    complete_timer_action(&pending_timer_actions);
+                }
 
                 let (v_chan_tx, v_chan_rx) = mpmc::bounded::<i64>(1);
                 let (vs_chan_tx, vs_chan_rx) = mpmc::bounded::<i64>(1);
@@ -276,10 +338,13 @@ fn test_qbft(test: Test) {
                         .as_ref()
                         .expect("value sender kept until run returns")
                         .clone();
-                    let (delay_ch, cancel) = clock.new_timer(delay);
+                    let pending_timer_actions = pending_timer_actions.clone();
+                    let (delay_ch, cancel) =
+                        clock.new_timer_with_priority(delay, TimerPriority::InputValue);
                     s.spawn(move || {
                         if delay_ch.recv().is_ok() {
                             _ = v_chan_tx_send.send(i);
+                            complete_timer_action(&pending_timer_actions);
                         }
 
                         cancel();
@@ -322,6 +387,21 @@ fn test_qbft(test: Test) {
             });
         }
 
+        while clock.timer_count() < expected_initial_timers {
+            thread::yield_now();
+            if real_start.elapsed() > TEST_STALL_TIMEOUT {
+                cts.cancel();
+                clock.cancel();
+                panic!(
+                    "qbft test setup hang: timers={} expected={} seed={}\n{}",
+                    clock.timer_count(),
+                    expected_initial_timers,
+                    seed,
+                    trace.dump()
+                );
+            }
+        }
+
         let mut results = BTreeMap::<i64, Msg<i64, i64, i64>>::new();
         let mut count = 0;
         let mut decided = false;
@@ -330,9 +410,10 @@ fn test_qbft(test: Test) {
         let mut pending = Vec::<PendingBroadcast>::new();
         let mut next_fuzz_at = test.fuzz.then_some(Duration::from_millis(100));
         let mut fuzz_counter = 0_u64;
+        let mut last_progress = time::Instant::now();
 
         loop {
-            broadcasts += deliver_ready_broadcasts(
+            let delivered = deliver_ready_broadcasts(
                 &mut pending,
                 &receives,
                 &test.drop_prob,
@@ -340,6 +421,10 @@ fn test_qbft(test: Test) {
                 &trace,
                 &clock,
             );
+            broadcasts += delivered;
+            if delivered > 0 {
+                last_progress = time::Instant::now();
+            }
 
             if decided {
                 next_fuzz_at = None;
@@ -361,6 +446,7 @@ fn test_qbft(test: Test) {
                 ));
                 broadcasts +=
                     fanout_broadcast(&receives, &test.drop_prob, seed, &trace, &clock, msg);
+                last_progress = time::Instant::now();
                 next_fuzz_at = Some(next + Duration::from_millis(100));
             }
 
@@ -379,7 +465,8 @@ fn test_qbft(test: Test) {
                         }
                         BroadcastEvent::Delayed(delayed) => pending.push(delayed),
                     }
-                    if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
+                    last_progress = time::Instant::now();
+                    if clock.elapsed() > Duration::from_secs(180) {
                         cts.cancel();
                         clock.cancel();
                         panic!(
@@ -405,6 +492,7 @@ fn test_qbft(test: Test) {
 
                 recv(result_chan_rx) -> res => {
                     let q_commit = res.expect(READ_CHAN_ERR);
+                    last_progress = time::Instant::now();
 
                     for commit in q_commit.clone() {
                         for (_, previous) in results.iter() {
@@ -490,6 +578,7 @@ fn test_qbft(test: Test) {
 
                 recv(run_chan_rx) -> res => {
                     let (node, outcome) = res.expect(READ_CHAN_ERR);
+                    last_progress = time::Instant::now();
 
                     if !matches!(outcome, Ok(Ok(()))) {
                         if !decided {
@@ -517,11 +606,36 @@ fn test_qbft(test: Test) {
                 }
 
                 default => {
-                    // Give worker threads a small scheduling window before
-                    // advancing fake time; these tests assert exact rounds.
-                    thread::sleep(Duration::from_micros(50));
-                    clock.advance(Duration::from_millis(1));
-                    if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) {
+                    if pending_compares.load(Ordering::SeqCst) != 0
+                        || pending_timer_actions.load(Ordering::SeqCst) > 0
+                    {
+                        thread::yield_now();
+                        if last_progress.elapsed() > TEST_STALL_TIMEOUT {
+                            cts.cancel();
+                            clock.cancel();
+                            panic!(
+                                "qbft test hang: pending_compares={} pending_timer_actions={} decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}",
+                                pending_compares.load(Ordering::SeqCst),
+                                pending_timer_actions.load(Ordering::SeqCst),
+                                decided,
+                                done,
+                                count,
+                                clock.elapsed(),
+                                real_start.elapsed(),
+                                broadcasts,
+                                seed,
+                                trace.dump()
+                            );
+                        }
+                        continue;
+                    }
+
+                    // Matches the Go harness throttle; ordering correctness
+                    // comes from the pending-work barriers, not this duration.
+                    thread::sleep(Duration::from_micros(1));
+                    clock.advance_and_wait(Duration::from_millis(1), &pending_timer_actions);
+                    last_progress = time::Instant::now();
+                    if clock.elapsed() > Duration::from_secs(180) {
                         cts.cancel();
                         clock.cancel();
                         panic!(
@@ -900,6 +1014,8 @@ impl SomeMsg<i64, i64, i64> for TestMsg {
     }
 }
 
+// Tests the normal-case path with an available round-1 leader.
+// Expect all nodes to decide in round 1 on the leader value.
 #[test_case(0 ; "happy_0")]
 #[test_case(1 ; "happy_1")]
 fn happy(instance: i64) {
@@ -910,6 +1026,8 @@ fn happy(instance: i64) {
     });
 }
 
+// Tests prepared-value carryover when commits are suppressed in earlier rounds.
+// Expect later rounds to decide the highest prepared value, not a new leader value.
 #[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")]
 #[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")]
 fn prepare_round(
@@ -932,6 +1050,8 @@ fn prepare_round(
     });
 }
 
+// Tests round change when the first leader starts late.
+// Expect the next live leader to drive consensus in round 2.
 #[test_case(false ; "leader_late_exp")]
 #[test_case(true ; "leader_down_const")]
 fn delayed_leader_start(const_period: bool) {
@@ -944,6 +1064,8 @@ fn delayed_leader_start(const_period: bool) {
     });
 }
 
+// Tests recovery when two nodes, including early leaders, start much later.
+// Expect consensus after enough round changes, with exact round only when deterministic.
 #[test_case(3, false, 4, false ; "very_late_exp")]
 #[test_case(1, true, 0, true ; "very_late_const")]
 fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) {
@@ -957,6 +1079,8 @@ fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_
     });
 }
 
+// Tests staggered node startup and message buffering/draining.
+// Expect consensus once enough live nodes join, with round allowed to vary.
 #[test_case(false ; "stagger_start_exp")]
 #[test_case(true ; "stagger_start_const")]
 fn stagger_start(const_period: bool) {
@@ -974,6 +1098,8 @@ fn stagger_start(const_period: bool) {
     });
 }
 
+// Tests late input values on nodes that otherwise participate in the protocol.
+// Expect round changes until a valid leader value is available.
 #[test_case(3, false, 4, false ; "very_delayed_value_exp")]
 #[test_case(1, true, 0, true ; "very_delayed_value_const")]
 fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, random_round: bool) {
@@ -987,6 +1113,8 @@ fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, rand
     });
 }
 
+// Tests input values arriving at different fake times for all nodes.
+// Expect consensus once enough nodes can validate/propose values.
 #[test_case(false ; "stagger_delayed_value_exp")]
 #[test_case(true ; "stagger_delayed_value_const")]
 fn stagger_delayed_value(const_period: bool) {
@@ -1004,6 +1132,8 @@ fn stagger_delayed_value(const_period: bool) {
     });
 }
 
+// Tests a round-1 leader without input and a round-2 leader that is offline.
+// Expect consensus to skip both blocked leaders and decide in round 3.
 #[test]
 fn round1_leader_no_value_round2_leader_offline() {
     test_qbft(Test {
@@ -1016,6 +1146,8 @@ fn round1_leader_no_value_round2_leader_offline() {
     });
 }
 
+// Tests delayed broadcast delivery under fake network jitter.
+// Expect safety and eventual consensus despite delayed messages.
 #[test_case(500, false ; "jitter_500ms_exp")]
 #[test_case(200, true ; "jitter_200ms_const")]
 fn jitter(bcast_jitter_ms: i32, const_period: bool) {
@@ -1028,6 +1160,8 @@ fn jitter(bcast_jitter_ms: i32, const_period: bool) {
     });
 }
 
+// Tests deterministic message loss at 10% and 30%.
+// Expect eventual consensus without conflicting decisions.
 #[test_case(0.1 ; "drop_10_percent_const")]
 #[test_case(0.3 ; "drop_30_percent_const")]
 fn dropped_messages(drop_probability: f64) {
@@ -1045,6 +1179,8 @@ fn dropped_messages(drop_probability: f64) {
     });
 }
 
+// Tests bogus message injection during normal and delayed-leader scenarios.
+// Expect unjust fuzz traffic to be ignored and honest nodes to still decide.
 #[test_case(None, 1, false ; "fuzz")]
 #[test_case(Some(2), 0, true ; "fuzz_with_late_leader")]
 #[test_case(Some(10), 0, true ; "fuzz_with_very_late_leader")]
@@ -1088,6 +1224,8 @@ fn noop_transport() -> Transport<i64, i64, i64> {
     }
 }
 
+// Tests quorum/faulty formulas across node counts.
+// Expect quorum and tolerated-fault counts to match the Charon formula.
 #[test_case(1, 1, 0 ; "n1")]
 #[test_case(2, 2, 0 ; "n2")]
 #[test_case(3, 2, 0 ; "n3")]
@@ -1119,6 +1257,8 @@ fn formulas(n: i64, q: i64, f: i64) {
     assert_eq!(f, d.faulty(), "Faulty given N={n}");
 }
 
+// Tests PRE-PREPARE justification with mixed ROUND_CHANGE and PREPARE evidence.
+// Expect the proposal to be accepted when it carries a justified prepared value.
 #[test]
 fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() {
     let preprepare = new_msg(
@@ -1147,6 +1287,8 @@ fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() {
     assert!(is_justified_pre_prepare(&def, &1, &preprepare, 0));
 }
 
+// Tests duplicate PRE-PREPARE rule handling after compare failure.
+// Expect the next round proposal to trigger once and exit by cancellation.
 #[test]
 fn duplicate_pre_prepare_rules() {
     let cts = CancellationTokenSource::new();
@@ -1224,6 +1366,8 @@ fn duplicate_pre_prepare_rules() {
     assert!(matches!(res, Err(QbftError::ContextCanceled)));
 }
 
+// Tests idle cancellation while no inputs, timers, or messages are available.
+// Expect `run` to unblock and return `ContextCanceled`.
 #[test]
 fn idle_run_returns_when_cancelled() {
     let cts = CancellationTokenSource::new();
@@ -1235,8 +1379,10 @@ fn idle_run_returns_when_cancelled() {
     let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
     let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
     let (done_tx, done_rx) = mpmc::bounded(1);
+    let (started_tx, started_rx) = mpmc::bounded(1);
 
     thread::spawn(move || {
+        started_tx.send(()).expect(WRITE_CHAN_ERR);
         done_tx
             .send(qbft::run(
                 &token, &def, &transport, &0, 1, input_rx, source_rx,
@@ -1244,17 +1390,21 @@ fn idle_run_returns_when_cancelled() {
             .expect(WRITE_CHAN_ERR);
     });
 
-    thread::sleep(Duration::from_millis(10));
+    started_rx
+        .recv_timeout(TEST_WAIT_TIMEOUT)
+        .expect("run thread must start before cancellation");
     cts.cancel();
 
     assert!(matches!(
         done_rx
-            .recv_timeout(Duration::from_millis(100))
+            .recv_timeout(TEST_WAIT_TIMEOUT)
             .expect("idle run must unblock on cancellation"),
         Err(QbftError::ContextCanceled)
     ));
 }
 
+// Tests definition validation at the `run` boundary.
+// Expect invalid node count and FIFO limit to return typed errors.
 #[test_case(0, 1, true ; "invalid_nodes")]
 #[test_case(4, 0, false ; "invalid_fifo_limit")]
 fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) {
@@ -1278,6 +1428,8 @@ fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool)
     }
 }
 
+// Tests cancellation under a continuously hot receive channel.
+// Expect cancellation to win even when incoming traffic is always ready.
 #[test]
 fn run_cancels_under_hot_receive_stream() {
     let cts = CancellationTokenSource::new();
@@ -1294,6 +1446,7 @@ fn run_cancels_under_hot_receive_stream() {
     let (_input_tx, input_rx) = mpmc::bounded::<i64>(1);
     let (_source_tx, source_rx) = mpmc::bounded::<i64>(1);
     let (done_tx, done_rx) = mpmc::bounded(1);
+    let (started_tx, started_rx) = mpmc::bounded(1);
 
     let sender_cts = CancellationTokenSource::new();
     let sender_token = sender_cts.token().clone();
@@ -1309,6 +1462,7 @@ fn run_cancels_under_hot_receive_stream() {
     });
 
     thread::spawn(move || {
+        started_tx.send(()).expect(WRITE_CHAN_ERR);
         done_tx
             .send(qbft::run(
                 &token, &def, &transport, &0, 1, input_rx, source_rx,
@@ -1316,12 +1470,14 @@ fn run_cancels_under_hot_receive_stream() {
             .expect(WRITE_CHAN_ERR);
     });
 
-    thread::sleep(Duration::from_millis(10));
+    started_rx
+        .recv_timeout(TEST_WAIT_TIMEOUT)
+        .expect("run thread must start before cancellation");
     cts.cancel();
 
     assert!(matches!(
         done_rx
-            .recv_timeout(Duration::from_millis(100))
+            .recv_timeout(TEST_WAIT_TIMEOUT)
             .expect("run must unblock on cancellation even with a hot receive stream"),
         Err(QbftError::ContextCanceled)
     ));
@@ -1330,6 +1486,8 @@ fn run_cancels_under_hot_receive_stream() {
     sender.join().expect("sender thread must exit");
 }
 
+// Tests message classification into QBFT upon-rules.
+// Expect each fixture to map to the rule required by the protocol transition.
 #[test]
 fn classify_rules() {
     let mut def = noop_definition();
@@ -1376,6 +1534,8 @@ fn classify_rules() {
     );
 }
 
+// Tests ROUND_CHANGE quorum justification forms J1 and J2.
+// Expect null-prepared and highest-prepared quorums to be accepted, invalid `pr` rejected.
 #[test]
 fn justified_qrc_j1_and_j2() {
     let mut def = noop_definition();
@@ -1411,6 +1571,8 @@ fn justified_qrc_j1_and_j2() {
     assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none());
 }
 
+// Tests ROUND_CHANGE prepared-round bounds.
+// Expect only null prepared round or strictly previous prepared rounds to be valid.
 #[test_case(2, -1, false ; "negative")]
 #[test_case(1, 0, true ; "null_at_round_one")]
 #[test_case(2, 1, true ; "previous_round")]
@@ -1421,6 +1583,8 @@ fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64,
     assert_eq!(expected, valid_round_change_prepared_round(&msg));
 }
 
+// Tests invalid prepared rounds at every justification call site.
+// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums survive.
 #[test_case(-1 ; "negative")]
 #[test_case(2 ; "current_round")]
 #[test_case(3 ; "future_round")]
@@ -1471,6 +1635,8 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr:
     );
 }
 
+// Tests null-prepared quorum filtering.
+// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form J1.
 #[test_case(1, -1 ; "negative")]
 #[test_case(1, 1 ; "current_round")]
 #[test_case(1, 2 ; "future_round")]
@@ -1489,6 +1655,8 @@ fn quorum_null_prepared_requires_null_prepared_rounds(round: i64, invalid_pr: i6
     assert!(qrc.is_empty());
 }
 
+// Tests duplicate sender handling in quorum filters.
+// Expect at most one message per source to count toward a quorum.
 #[test]
 fn filter_msgs_keeps_one_per_source() {
     let msgs = vec![
@@ -1506,6 +1674,8 @@ fn filter_msgs_keeps_one_per_source() {
     );
 }
 
+// Tests compare outcomes: success, error, cached value source, and timeout.
+// Expect cached value sources to be preserved and timer expiry to return `TimeoutError`.
 #[test]
 fn compare_success_error_cached_value_source_and_timeout() {
     let cts = CancellationTokenSource::new();
@@ -1602,6 +1772,8 @@ fn compare_success_error_cached_value_source_and_timeout() {
     ));
 }
 
+// Tests compare timeout with a cooperative but blocked callback.
+// Expect `compare` to return timeout without joining the callback first.
 #[test]
 fn compare_timeout_does_not_wait_for_blocked_callback() {
     let cts = CancellationTokenSource::new();
@@ -1627,12 +1799,14 @@ fn compare_timeout_does_not_wait_for_blocked_callback() {
 
     assert!(matches!(
         result_rx
-            .recv_timeout(Duration::from_millis(100))
+            .recv_timeout(TEST_WAIT_TIMEOUT)
             .expect("compare must return on timer without waiting for blocked callback"),
         (0, Err(QbftError::TimeoutError))
     ));
 }
 
+// Tests a compare callback that exits without sending status.
+// Expect `compare` to wait for timer/cancel instead of treating disconnect as final.
 #[test]
 fn compare_callback_exit_without_status_waits_for_timer() {
     let cts = CancellationTokenSource::new();
@@ -1654,7 +1828,7 @@ fn compare_callback_exit_without_status_waits_for_timer() {
     });
 
     callback_done_rx
-        .recv_timeout(Duration::from_millis(100))
+        .recv_timeout(TEST_WAIT_TIMEOUT)
         .expect("compare callback must exit");
     assert!(
         result_rx.try_recv().is_err(),
@@ -1664,12 +1838,14 @@ fn compare_callback_exit_without_status_waits_for_timer() {
     timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR);
     assert!(matches!(
         result_rx
-            .recv_timeout(Duration::from_millis(100))
+            .recv_timeout(TEST_WAIT_TIMEOUT)
             .expect("compare must return after timer fires"),
         (0, Err(QbftError::TimeoutError))
     ));
 }
 
+// Tests parent cancellation propagation into the compare callback token.
+// Expect `compare` to return `ContextCanceled` and the callback token to be canceled.
 #[test]
 fn compare_parent_cancel_cancels_callback_token() {
     let cts = CancellationTokenSource::new();
@@ -1698,11 +1874,11 @@ fn compare_parent_cancel_cancels_callback_token() {
     });
 
     compare_started_rx
-        .recv_timeout(Duration::from_millis(100))
+        .recv_timeout(TEST_WAIT_TIMEOUT)
         .expect("compare must start");
     cts.cancel();
 
-    match result_rx.recv_timeout(Duration::from_millis(100)) {
+    match result_rx.recv_timeout(TEST_WAIT_TIMEOUT) {
         Ok(result) => assert!(matches!(result, (0, Err(QbftError::ContextCanceled)))),
         Err(err) => {
             let _ = timer_tx.send(time::Instant::now());
@@ -1710,10 +1886,12 @@ fn compare_parent_cancel_cancels_callback_token() {
         }
     }
     token_cancelled_rx
-        .recv_timeout(Duration::from_millis(100))
+        .recv_timeout(TEST_WAIT_TIMEOUT)
         .expect("callback token must be canceled by parent token");
 }
 
+// Tests parent cancellation while `run` is waiting inside compare.
+// Expect `run` to return cancellation without broadcasting PREPARE.
 #[test]
 fn run_parent_cancel_during_compare_does_not_prepare() {
     const LEADER: i64 = 1;
@@ -1761,18 +1939,18 @@ fn run_parent_cancel_during_compare_does_not_prepare() {
     });
 
     compare_started_rx
-        .recv_timeout(Duration::from_millis(100))
+        .recv_timeout(TEST_WAIT_TIMEOUT)
         .expect("compare must start");
     cts.cancel();
 
     assert!(matches!(
         done_rx
-            .recv_timeout(Duration::from_millis(100))
+            .recv_timeout(TEST_WAIT_TIMEOUT)
             .expect("run must return parent cancellation from compare"),
         Err(QbftError::ContextCanceled)
     ));
     compare_cancelled_rx
-        .recv_timeout(Duration::from_millis(100))
+        .recv_timeout(TEST_WAIT_TIMEOUT)
         .expect("compare callback token must be canceled");
     assert!(
         broadcast_rx.try_iter().all(|type_| type_ != MSG_PREPARE),
@@ -1799,6 +1977,8 @@ struct ChainSplitTest {
     should_halt: bool,
 }
 
+// Tests value-source disagreement across nodes.
+// Expect agreement when a quorum can compare equal values, or halt when no quorum can.
 #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")]
 #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")]
 #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")]
@@ -1826,8 +2006,11 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
     const FIFO_LIMIT: i64 = 100;
 
     let clock = FakeClock::new(time::Instant::now());
+    let real_start = time::Instant::now();
     let cts = CancellationTokenSource::new();
     let trace = Trace::new();
+    let pending_compares = Arc::new(AtomicUsize::new(0));
+    let pending_timer_actions = Arc::new(AtomicIsize::new(0));
     // Keep peer iteration deterministic. These fake-clock tests assert exact
     // rounds, and broadcast fanout order affects which node observes quorums
     // first when tests run in parallel.
@@ -1857,33 +2040,49 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                 result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR);
             })
         },
-        compare: Arc::new(
-            |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| {
-                let leader_value_source = qcommit.value_source().expect("value source");
-                let local = if *input_value_source == 0 {
-                    let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
-                    return_value.send(value).expect(WRITE_CHAN_ERR);
-                    value
-                } else {
-                    *input_value_source
-                };
-
-                if leader_value_source != local {
-                    return_err
-                        .send(Err(QbftError::CompareError))
-                        .expect(WRITE_CHAN_ERR);
-                    return;
-                }
+        compare: {
+            let pending_compares = pending_compares.clone();
+            Arc::new(
+                move |_,
+                      qcommit,
+                      input_value_source_ch,
+                      input_value_source,
+                      return_err,
+                      return_value| {
+                    let _guard = PendingCompareGuard {
+                        pending_compares: pending_compares.clone(),
+                    };
+                    let leader_value_source = qcommit.value_source().expect("value source");
+                    let local = if *input_value_source == 0 {
+                        let value = input_value_source_ch.recv().expect(READ_CHAN_ERR);
+                        return_value.send(value).expect(WRITE_CHAN_ERR);
+                        value
+                    } else {
+                        *input_value_source
+                    };
+
+                    if leader_value_source != local {
+                        return_err
+                            .send(Err(QbftError::CompareError))
+                            .expect(WRITE_CHAN_ERR);
+                        return;
+                    }
 
-                return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
-            },
-        ),
+                    return_err.send(Ok(())).expect(WRITE_CHAN_ERR);
+                },
+            )
+        },
         nodes: N as i64,
         fifo_limit: FIFO_LIMIT,
         log_round_change: {
             let clock = clock.clone();
             let trace = trace.clone();
+            let pending_timer_actions = pending_timer_actions.clone();
             Box::new(move |_, process, round, new_round, upon_rule, _| {
+                if upon_rule == UPON_ROUND_TIMEOUT {
+                    complete_timer_action(&pending_timer_actions);
+                }
+
                 trace.push(format!(
                     "{:?} - {}@{} change to {} ~= {}",
                     clock.elapsed(),
@@ -1903,7 +2102,12 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
         log_upon_rule: {
             let clock = clock.clone();
             let trace = trace.clone();
+            let pending_compares = pending_compares.clone();
             Box::new(move |_, process, round, msg, upon_rule| {
+                if upon_rule == UPON_JUSTIFIED_PRE_PREPARE {
+                    pending_compares.fetch_add(1, Ordering::SeqCst);
+                }
+
                 trace.push(format!(
                     "{:?} {} => {}@{} -> {}@{} ~= {}",
                     clock.elapsed(),
@@ -1977,15 +2181,32 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
             });
         }
 
+        while clock.timer_count() < N {
+            thread::yield_now();
+            if real_start.elapsed() > TEST_STALL_TIMEOUT {
+                cts.cancel();
+                clock.cancel();
+                panic!(
+                    "chain split setup hang: timers={} expected={} elapsed={:?}\n{}",
+                    clock.timer_count(),
+                    N,
+                    clock.elapsed(),
+                    trace.dump()
+                );
+            }
+        }
+
         let mut results = BTreeMap::<i64, Msg<i64, i64, i64>>::new();
         let mut count = 0;
         let mut decided = false;
         let mut done = 0;
+        let mut last_progress = time::Instant::now();
 
         loop {
             mpmc::select! {
             recv(broadcast_rx) -> msg => {
                 let msg = msg.expect(READ_CHAN_ERR);
+                last_progress = time::Instant::now();
                 for (target, (out_tx, _)) in receives.iter() {
                     if *target == msg.source() {
                         continue;
@@ -2004,6 +2225,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
             }
             recv(result_chan_rx) -> res => {
                 let q_commit = res.expect(READ_CHAN_ERR);
+                last_progress = time::Instant::now();
                 if test.should_halt {
                     cts.cancel();
                     clock.cancel();
@@ -2066,6 +2288,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
             }
             recv(run_chan_rx) -> res => {
                 let (node, outcome) = res.expect(READ_CHAN_ERR);
+                last_progress = time::Instant::now();
                 let expected_halt = test.should_halt
                     && outcome_is_error(&outcome, |err| matches!(err, QbftError::MaxRoundReached));
                 if !(decided || expected_halt) {
@@ -2091,13 +2314,37 @@ fn test_qbft_chain_split(test: ChainSplitTest) {
                 }
             }
             default => {
+                if pending_compares.load(Ordering::SeqCst) != 0
+                    || pending_timer_actions.load(Ordering::SeqCst) > 0
+                {
+                    thread::yield_now();
+                    if last_progress.elapsed() > TEST_STALL_TIMEOUT {
+                        cts.cancel();
+                        clock.cancel();
+                        panic!(
+                            "chain split hang: pending_compares={} pending_timer_actions={} decided={decided} done={done} count={count} elapsed={:?}\n{}",
+                            pending_compares.load(Ordering::SeqCst),
+                            pending_timer_actions.load(Ordering::SeqCst),
+                            clock.elapsed(),
+                            trace.dump()
+                        );
+                    }
+                    continue;
+                }
+
+                // Matches the Go harness throttle; ordering correctness comes
+                // from the pending-work barriers, not this duration.
                 thread::sleep(Duration::from_micros(1));
+                // The no-consensus halt case must reach round 11; using Go's
+                // 1ms tick here makes this Rust harness exceed its real-time
+                // guard, so only that halt path fast-forwards fake time.
                 let tick = if test.should_halt {
                     Duration::from_millis(100)
                 } else {
                     Duration::from_millis(1)
                 };
-                clock.advance(tick);
+                clock.advance_and_wait(tick, &pending_timer_actions);
+                last_progress = time::Instant::now();
                 let limit = if test.should_halt {
                     let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32");
                     let seconds = 1_u64

From 024bff78d7ae5c8397647dfb486097194e63895c Mon Sep 17 00:00:00 2001
From: Quang Le <iamquang95@gmail.com>
Date: Wed, 20 May 2026 16:34:48 +0700
Subject: [PATCH 28/28] fix: lint

---
 crates/core/src/qbft/fake_clock.rs    |  8 +++---
 crates/core/src/qbft/internal_test.rs | 36 ++++++++++++++++++---------
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs
index 160cae56..6bc32495 100644
--- a/crates/core/src/qbft/fake_clock.rs
+++ b/crates/core/src/qbft/fake_clock.rs
@@ -108,7 +108,8 @@ impl FakeClock {
         self.advance_inner(duration, Some(pending_timer_actions))
     }
 
-    /// Shared advance path; optionally synchronizes timer delivery with the test harness.
+    /// Shared advance path; optionally synchronizes timer delivery with the
+    /// test harness.
     fn advance_inner(
         &self,
         duration: Duration,
@@ -129,7 +130,7 @@ impl FakeClock {
                 }
             }
 
-            for (id, _, _, _) in expired.iter() {
+            for (id, ..) in expired.iter() {
                 inner.clients.remove(id);
             }
 
@@ -182,7 +183,8 @@ impl FakeClock {
 }
 
 #[test]
-/// Timers registered by different threads fire after fake time passes deadlines.
+/// Timers registered by different threads fire after fake time passes
+/// deadlines.
 fn multiple_threads_timers() {
     let clock = FakeClock::new(Instant::now());
     let (done_tx, done_rx) = mpmc::bounded(2);
diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs
index 0973c924..167f2c95 100644
--- a/crates/core/src/qbft/internal_test.rs
+++ b/crates/core/src/qbft/internal_test.rs
@@ -93,7 +93,8 @@ struct Test {
 // Main QBFT simulation harness:
 // 1. build one fake clock, four node transports, and QBFT callbacks;
 // 2. spawn one thread per node running `qbft::run`;
-// 3. route broadcasts through the in-memory network with optional drop/jitter/fuzz;
+// 3. route broadcasts through the in-memory network with optional
+//    drop/jitter/fuzz;
 // 4. advance fake time only after pending compare/timer work is drained;
 // 5. collect all decisions and assert same value plus expected round/value.
 fn test_qbft(test: Test) {
@@ -1027,7 +1028,8 @@ fn happy(instance: i64) {
 }
 
 // Tests prepared-value carryover when commits are suppressed in earlier rounds.
-// Expect later rounds to decide the highest prepared value, not a new leader value.
+// Expect later rounds to decide the highest prepared value, not a new leader
+// value.
 #[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")]
 #[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")]
 fn prepare_round(
@@ -1065,7 +1067,8 @@ fn delayed_leader_start(const_period: bool) {
 }
 
 // Tests recovery when two nodes, including early leaders, start much later.
-// Expect consensus after enough round changes, with exact round only when deterministic.
+// Expect consensus after enough round changes, with exact round only when
+// deterministic.
 #[test_case(3, false, 4, false ; "very_late_exp")]
 #[test_case(1, true, 0, true ; "very_late_const")]
 fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) {
@@ -1258,7 +1261,8 @@ fn formulas(n: i64, q: i64, f: i64) {
 }
 
 // Tests PRE-PREPARE justification with mixed ROUND_CHANGE and PREPARE evidence.
-// Expect the proposal to be accepted when it carries a justified prepared value.
+// Expect the proposal to be accepted when it carries a justified prepared
+// value.
 #[test]
 fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() {
     let preprepare = new_msg(
@@ -1535,7 +1539,8 @@ fn classify_rules() {
 }
 
 // Tests ROUND_CHANGE quorum justification forms J1 and J2.
-// Expect null-prepared and highest-prepared quorums to be accepted, invalid `pr` rejected.
+// Expect null-prepared and highest-prepared quorums to be accepted, invalid
+// `pr` rejected.
 #[test]
 fn justified_qrc_j1_and_j2() {
     let mut def = noop_definition();
@@ -1572,7 +1577,8 @@ fn justified_qrc_j1_and_j2() {
 }
 
 // Tests ROUND_CHANGE prepared-round bounds.
-// Expect only null prepared round or strictly previous prepared rounds to be valid.
+// Expect only null prepared round or strictly previous prepared rounds to be
+// valid.
 #[test_case(2, -1, false ; "negative")]
 #[test_case(1, 0, true ; "null_at_round_one")]
 #[test_case(2, 1, true ; "previous_round")]
@@ -1584,7 +1590,8 @@ fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64,
 }
 
 // Tests invalid prepared rounds at every justification call site.
-// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums survive.
+// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums
+// survive.
 #[test_case(-1 ; "negative")]
 #[test_case(2 ; "current_round")]
 #[test_case(3 ; "future_round")]
@@ -1636,7 +1643,8 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr:
 }
 
 // Tests null-prepared quorum filtering.
-// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form J1.
+// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form
+// J1.
 #[test_case(1, -1 ; "negative")]
 #[test_case(1, 1 ; "current_round")]
 #[test_case(1, 2 ; "future_round")]
@@ -1675,7 +1683,8 @@ fn filter_msgs_keeps_one_per_source() {
 }
 
 // Tests compare outcomes: success, error, cached value source, and timeout.
-// Expect cached value sources to be preserved and timer expiry to return `TimeoutError`.
+// Expect cached value sources to be preserved and timer expiry to return
+// `TimeoutError`.
 #[test]
 fn compare_success_error_cached_value_source_and_timeout() {
     let cts = CancellationTokenSource::new();
@@ -1806,7 +1815,8 @@ fn compare_timeout_does_not_wait_for_blocked_callback() {
 }
 
 // Tests a compare callback that exits without sending status.
-// Expect `compare` to wait for timer/cancel instead of treating disconnect as final.
+// Expect `compare` to wait for timer/cancel instead of treating disconnect as
+// final.
 #[test]
 fn compare_callback_exit_without_status_waits_for_timer() {
     let cts = CancellationTokenSource::new();
@@ -1845,7 +1855,8 @@ fn compare_callback_exit_without_status_waits_for_timer() {
 }
 
 // Tests parent cancellation propagation into the compare callback token.
-// Expect `compare` to return `ContextCanceled` and the callback token to be canceled.
+// Expect `compare` to return `ContextCanceled` and the callback token to be
+// canceled.
 #[test]
 fn compare_parent_cancel_cancels_callback_token() {
     let cts = CancellationTokenSource::new();
@@ -1978,7 +1989,8 @@ struct ChainSplitTest {
 }
 
 // Tests value-source disagreement across nodes.
-// Expect agreement when a quorum can compare equal values, or halt when no quorum can.
+// Expect agreement when a quorum can compare equal values, or halt when no
+// quorum can.
 #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")]
 #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")]
 #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")]