From 606d5bcb04b65485aa539c88e340137357a8bfac Mon Sep 17 00:00:00 2001 From: Quang Le Date: Thu, 14 May 2026 11:16:25 +0700 Subject: [PATCH 01/34] test: update qbft test --- crates/core/src/qbft/fake_clock.rs | 58 +- crates/core/src/qbft/internal_test.rs | 1196 ++++++++++++++++++++++--- 2 files changed, 1131 insertions(+), 123 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 04e143f5..3b7baeea 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -1,6 +1,6 @@ use crossbeam::channel as mpmc; use std::{ - collections::HashMap, + collections::BTreeMap, sync::{Arc, Mutex}, thread, time::{Duration, Instant}, @@ -15,7 +15,7 @@ struct FakeClockInner { start: Instant, now: Instant, last_id: usize, - clients: HashMap, Instant)>, + clients: BTreeMap, Instant)>, } impl FakeClock { @@ -98,56 +98,82 @@ impl FakeClock { } } -impl Drop for FakeClock { - fn drop(&mut self) { - self.cancel(); - } -} - #[test] fn multiple_threads_timers() { let clock = FakeClock::new(Instant::now()); + let (done_tx, done_rx) = mpmc::bounded(2); - let start = Instant::now(); thread::scope(|s| { let c1 = clock.clone(); let (ch_1, _) = c1.new_timer(Duration::from_secs(5)); + let done_tx_1 = done_tx.clone(); s.spawn(move || { - let _ = ch_1.recv(); + done_tx_1.send(ch_1.recv().is_ok()).unwrap(); }); let c2 = clock.clone(); let (ch_2, _) = c2.new_timer(Duration::from_secs(5)); + let done_tx_2 = done_tx.clone(); s.spawn(move || { - let _ = ch_2.recv(); + done_tx_2.send(ch_2.recv().is_ok()).unwrap(); }); + clock.advance(Duration::from_secs(4)); + assert!(done_rx.try_recv().is_err()); clock.advance(Duration::from_secs(6)); }); - println!("start={:?}, clock={:?}", start.elapsed(), clock.elapsed()); + assert_eq!(vec![true, true], done_rx.try_iter().collect::>()); + assert_eq!(Duration::from_secs(10), clock.elapsed()); } #[test] fn multiple_threads_cancellation() { let clock = FakeClock::new(Instant::now()); + let (done_tx, done_rx) = mpmc::bounded(2); - let start = Instant::now(); thread::scope(|s| { let c1 = clock.clone(); let (ch_1, _) = c1.new_timer(Duration::from_secs(5)); + let done_tx_1 = done_tx.clone(); s.spawn(move || { - let _ = ch_1.recv(); + done_tx_1.send(ch_1.recv().is_err()).unwrap(); }); let c2 = clock.clone(); let (ch_2, _) = c2.new_timer(Duration::from_secs(5)); + let done_tx_2 = done_tx.clone(); s.spawn(move || { - let _ = ch_2.recv(); + done_tx_2.send(ch_2.recv().is_err()).unwrap(); }); clock.cancel(); }); - println!("start={:?}, clock={:?}", start.elapsed(), clock.elapsed()); + assert_eq!(vec![true, true], done_rx.try_iter().collect::>()); + assert_eq!(Duration::ZERO, clock.elapsed()); +} + +#[test] +fn cancel_one_timer_only() { + let clock = FakeClock::new(Instant::now()); + let (ch_1, cancel_1) = clock.new_timer(Duration::from_secs(5)); + let (ch_2, _) = clock.new_timer(Duration::from_secs(5)); + + cancel_1(); + clock.advance(Duration::from_secs(5)); + + assert!(ch_1.try_recv().is_err()); + assert!(ch_2.try_recv().is_ok()); +} + +#[test] +fn expired_timer_delivers_once() { + let clock = FakeClock::new(Instant::now()); + let (ch, _) = clock.new_timer(Duration::from_secs(5)); + + clock.advance(Duration::from_secs(5)); + assert!(ch.try_recv().is_ok()); + clock.advance(Duration::from_secs(5)); + assert!(ch.try_recv().is_err()); } diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index b7770251..5c5659f2 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1,10 +1,32 @@ use crate::qbft::{self, fake_clock::FakeClock, *}; use cancellation::CancellationTokenSource; use crossbeam::channel as mpmc; -use std::{collections::HashMap, sync::Arc, thread, time::Duration}; +use std::{ + collections::{BTreeMap, HashMap}, + fmt::Write as _, + panic::{self, AssertUnwindSafe}, + sync::{Arc, Mutex}, + thread, + time::Duration, +}; const WRITE_CHAN_ERR: &str = "Failed to write to channel"; const READ_CHAN_ERR: &str = "Failed to read from channel"; +const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c; + +type RunOutcome = std::thread::Result>; +type TestMsgRef = Msg; + +struct PendingBroadcast { + deliver_at: Duration, + key: u64, + msg: TestMsgRef, +} + +enum BroadcastEvent { + Immediate(TestMsgRef), + Delayed(PendingBroadcast), +} #[derive(Default, Debug)] struct Test { @@ -28,6 +50,8 @@ struct Test { pub prepared_val: i32, /// Non-deterministic consensus at random round. pub random_round: bool, + /// Enables fuzzing by node 1. + pub fuzz: bool, } fn test_qbft(test: Test) { @@ -35,20 +59,24 @@ fn test_qbft(test: Test) { const MAX_ROUND: usize = 50; const FIFO_LIMIT: usize = 100; + let seed = test_seed(&test); + let trace = Trace::new(); let start_time = time::Instant::now(); + let real_start = time::Instant::now(); let clock = FakeClock::new(start_time); let cts = CancellationTokenSource::new(); - let mut receives = HashMap::< + let mut receives = BTreeMap::< i64, ( mpmc::Sender>, mpmc::Receiver>, ), >::new(); - let (broadcast_tx, broadcast_rx) = mpmc::unbounded::>(); + let (broadcast_tx, broadcast_rx) = mpmc::unbounded::(); + let (unjust_tx, unjust_rx) = mpmc::unbounded::(); let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); - let (run_chan_tx, run_chan_rx) = mpmc::bounded::>(N); + let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N); let is_leader = Box::new(make_is_leader(N as i64)); @@ -74,32 +102,46 @@ fn test_qbft(test: Test) { result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); }) }, - compare: Box::new(|_, _, _, _, return_err, _| { - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + compare: Box::new(|_, _, _, input_value_source, return_err, return_value| { + return_value + .send(*input_value_source) + .expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Ok(())); }), nodes: N as i64, fifo_limit: FIFO_LIMIT as i64, log_round_change: { let clock = clock.clone(); + let trace = trace.clone(); Box::new(move |_, process, round, new_round, upon_rule, _| { - println!( + trace.push(format!( "{:?} - {}@{} change to {} ~= {}", clock.elapsed(), process, round, new_round, upon_rule, - ); + )); + }) + }, + log_unjust: { + let trace = trace.clone(); + let unjust_tx = unjust_tx.clone(); + let fuzz = test.fuzz; + Box::new(move |_, process, msg| { + let line = format!("Unjust: process={} msg={:?}", process, msg); + trace.push(line.clone()); + if !fuzz { + unjust_tx.send(line).expect(WRITE_CHAN_ERR); + } }) }, - log_unjust: Box::new(|_, _, msg| { - println!("Unjust: {:?}", msg); - }), log_upon_rule: { let clock = clock.clone(); + let trace = trace.clone(); Box::new(move |_, process, round, msg, upon_rule| { - println!( + trace.push(format!( "{:?} {} => {}@{} -> {}@{} ~= {}", clock.elapsed(), msg.source(), @@ -108,7 +150,7 @@ fn test_qbft(test: Test) { process, round, upon_rule, - ); + )); }) }, }); @@ -122,6 +164,7 @@ fn test_qbft(test: Test) { let trans = Transport { broadcast: { let clock = clock.clone(); + let trace = trace.clone(); Box::new( move |_, type_, instance, source, round, value, pr, pv, justification| { @@ -130,16 +173,22 @@ fn test_qbft(test: Test) { } if type_ == MSG_COMMIT && round <= test.commits_after.into() { - println!( + trace.push(format!( "{:?} {} dropping commit for round {}", clock.elapsed(), source, round - ); + )); return Ok(()); } - println!("{:?} {} => {}@{}", clock.elapsed(), source, type_, round); + trace.push(format!( + "{:?} {} => {}@{}", + clock.elapsed(), + source, + type_, + round + )); let msg = new_msg( type_, @@ -159,7 +208,9 @@ fn test_qbft(test: Test) { msg.clone(), test.bcast_jitter_ms, clock.clone(), - ); // TODO: Add clock + trace.clone(), + seed, + ); Ok(()) }, @@ -168,7 +219,7 @@ fn test_qbft(test: Test) { receive: receiver.clone(), }; - let token = cts.token(); + let token = cts.token().clone(); let clock = clock.clone(); let receiver = receiver.clone(); let start_delay = test.start_delay.get(&i).copied(); @@ -177,99 +228,229 @@ fn test_qbft(test: Test) { let run_chan_tx = run_chan_tx.clone(); let defs = defs.clone(); let is_leader = is_leader.clone(); + let trace = trace.clone(); s.spawn(move || { if let Some(delay) = start_delay { - println!("{:?} Node {} start delay {:?}", clock.elapsed(), i, delay); + trace.push(format!( + "{:?} Node {} start delay {:?}", + clock.elapsed(), + i, + delay + )); let (delay_ch, _) = clock.new_timer(delay); _ = delay_ch.recv(); - println!("{:?} Node {} starting", clock.elapsed(), i); + trace.push(format!( + "{:?} Node {} starting {:?}", + clock.elapsed(), + i, + delay + )); } - // Drain any buffered messages - while !receiver.is_empty() { - _ = receiver.recv().expect(READ_CHAN_ERR); + if start_delay.is_some() { + // Drain any buffered messages + while !receiver.is_empty() { + _ = receiver.recv().expect(READ_CHAN_ERR); + } } let (v_chan_tx, v_chan_rx) = mpmc::bounded::(1); - let (_, vs_chan_rx) = mpmc::bounded::(1); + let (vs_chan_tx, vs_chan_rx) = mpmc::bounded::(1); + let mut keep_value_sender = Some(v_chan_tx); + let mut input_value_rx = v_chan_rx; if let Some(delay) = value_delay { + let v_chan_tx_send = keep_value_sender + .as_ref() + .expect("value sender kept until run returns") + .clone(); s.spawn(move || { let (delay_ch, cancel) = clock.new_timer(delay); _ = delay_ch.recv(); - _ = v_chan_tx.send(i); + _ = v_chan_tx_send.send(i); cancel(); }); } else if decide_round != 1 { + let v_chan_tx_send = keep_value_sender + .as_ref() + .expect("value sender kept until run returns") + .clone(); s.spawn(move || { - _ = v_chan_tx.send(i); + _ = v_chan_tx_send.send(i); }); } else if is_leader(&test.instance, 1, i) { + let v_chan_tx_send = keep_value_sender + .as_ref() + .expect("value sender kept until run returns") + .clone(); s.spawn(move || { - _ = v_chan_tx.send(i); + _ = v_chan_tx_send.send(i); }); + } else { + keep_value_sender = None; + input_value_rx = mpmc::never(); } - run_chan_tx - .send(qbft::run( - token, + let keepalive = (keep_value_sender, vs_chan_tx); + let run_result = panic::catch_unwind(AssertUnwindSafe(|| { + qbft::run( + &token, &defs, &trans, &test.instance, i, - v_chan_rx, + input_value_rx, vs_chan_rx, - )) - .expect(WRITE_CHAN_ERR); + ) + })); + drop(keepalive); + run_chan_tx.send((i, run_result)).expect(WRITE_CHAN_ERR); }); } - let mut results = HashMap::>::new(); + let mut results = BTreeMap::>::new(); let mut count = 0; let mut decided = false; let mut done = 0; + let mut broadcasts = 0usize; + let mut pending = Vec::::new(); + let mut next_fuzz_at = test.fuzz.then_some(Duration::from_millis(100)); + let mut fuzz_counter = 0_u64; loop { - mpmc::select! { - recv(broadcast_rx) -> msg => { - let msg = msg.expect(READ_CHAN_ERR); - for (target, (out_tx, _)) in receives.iter() { - if *target == msg.source() { - continue; // Do not broadcast to self, we sent to self already. - } - - if let Some(p) = test.drop_prob.get(&msg.source()) { - if rand::random::() < *p { - println!("{:?} {} => {}@{} => {} (dropped)", clock.elapsed(), msg.source(), msg.type_(), msg.round(), target); - continue; // Drop - } - } + broadcasts += deliver_ready_broadcasts( + &mut pending, + &receives, + &test.drop_prob, + seed, + &trace, + &clock, + ); + + while let Some(next) = next_fuzz_at { + if clock.elapsed() < next { + break; + } - out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + let msg = random_msg(test.instance, 1, seed, fuzz_counter); + fuzz_counter = fuzz_counter.wrapping_add(1); + trace.push(format!( + "{:?} fuzz {} => {}@{}", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round() + )); + broadcasts += + fanout_broadcast(&receives, &test.drop_prob, seed, &trace, &clock, msg); + next_fuzz_at = Some(next + Duration::from_millis(100)); + } - if rand::random::() < 0.1 { // Send 10% messages twice - out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + mpmc::select! { + recv(broadcast_rx) -> event => { + match event.expect(READ_CHAN_ERR) { + BroadcastEvent::Immediate(msg) => { + broadcasts += fanout_broadcast( + &receives, + &test.drop_prob, + seed, + &trace, + &clock, + msg, + ); } + BroadcastEvent::Delayed(delayed) => pending.push(delayed), + } + clock.advance(Duration::from_millis(1)); + if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { + cts.cancel(); + clock.cancel(); + panic!( + "qbft test hang: decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}", + decided, + done, + count, + clock.elapsed(), + real_start.elapsed(), + broadcasts, + seed, + trace.dump() + ); } } + recv(unjust_rx) -> unjust => { + let unjust = unjust.expect(READ_CHAN_ERR); + cts.cancel(); + clock.cancel(); + panic!("unjust message: {unjust} elapsed={:?} seed={}\n{}", clock.elapsed(), seed, trace.dump()); + } + recv(result_chan_rx) -> res => { let q_commit = res.expect(READ_CHAN_ERR); for commit in q_commit.clone() { for (_, previous) in results.iter() { - assert_eq!(previous.value(), commit.value(), "commit values"); + if previous.value() != commit.value() { + cts.cancel(); + clock.cancel(); + panic!( + "commit values differ: previous={:?} commit={:?} elapsed={:?} seed={}\n{}", + previous, + commit, + clock.elapsed(), + seed, + trace.dump() + ); + } } if !test.random_round { - assert_eq!(i64::from(test.decide_round), commit.round(), "wrong decide round"); + if i64::from(test.decide_round) != commit.round() { + cts.cancel(); + clock.cancel(); + panic!( + "wrong decide round: want={} got={} commit={:?} elapsed={:?} seed={}\n{}", + test.decide_round, + commit.round(), + commit, + clock.elapsed(), + seed, + trace.dump() + ); + } if test.prepared_val != 0 { // Check prepared value if set - assert_eq!(i64::from(test.prepared_val), commit.value(), "wrong prepared value"); + if i64::from(test.prepared_val) != commit.value() { + cts.cancel(); + clock.cancel(); + panic!( + "wrong prepared value: want={} got={} commit={:?} elapsed={:?} seed={}\n{}", + test.prepared_val, + commit.value(), + commit, + clock.elapsed(), + seed, + trace.dump() + ); + } } else { // Otherwise check that leader value was used. - assert!(is_leader(&test.instance, commit.round(), commit.value()), "not leader"); + if !is_leader(&test.instance, commit.round(), commit.value()) { + cts.cancel(); + clock.cancel(); + panic!( + "not leader value: instance={} round={} value={} commit={:?} elapsed={:?} seed={}\n{}", + test.instance, + commit.round(), + commit.value(), + commit, + clock.elapsed(), + seed, + trace.dump() + ); + } } } @@ -282,7 +463,7 @@ fn test_qbft(test: Test) { } let round = q_commit[0].round(); - println!("Got all results in round {} after {:?}: {:?}", round, clock.elapsed(), results); + trace.push(format!("Got all results in round {} after {:?}: {:?}", round, clock.elapsed(), results)); // Trigger shutdown decided = true; @@ -292,11 +473,24 @@ fn test_qbft(test: Test) { } recv(run_chan_rx) -> res => { - let err = res.expect(READ_CHAN_ERR); + let (node, outcome) = res.expect(READ_CHAN_ERR); - if err.is_err() { + if !matches!(outcome, Ok(Ok(()))) { if !decided { - panic!("unexpected run error"); + cts.cancel(); + clock.cancel(); + panic!( + "unexpected run error: node={} outcome={} decided={} done={} count={} elapsed={:?} broadcasts={} seed={}\n{}", + node, + format_run_outcome(&outcome), + decided, + done, + count, + clock.elapsed(), + broadcasts, + seed, + trace.dump() + ); } } @@ -309,12 +503,98 @@ fn test_qbft(test: Test) { default => { thread::sleep(time::Duration::from_micros(1)); clock.advance(Duration::from_millis(1)); + if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { + cts.cancel(); + clock.cancel(); + panic!( + "qbft test hang: decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}", + decided, + done, + count, + clock.elapsed(), + real_start.elapsed(), + broadcasts, + seed, + trace.dump() + ); + } } } } }); } +#[derive(Clone, Default)] +struct Trace(Arc>>); + +impl Trace { + fn new() -> Self { + Self::default() + } + + fn push(&self, line: String) { + self.0.lock().unwrap().push(line); + } + + fn dump(&self) -> String { + let lines = self.0.lock().unwrap(); + let start = lines.len().saturating_sub(200); + let mut out = String::new(); + for line in &lines[start..] { + let _ = writeln!(out, "{line}"); + } + out + } +} + +fn format_run_outcome(outcome: &RunOutcome) -> String { + match outcome { + Ok(Ok(())) => "ok".to_string(), + Ok(Err(err)) => format!("error {err:?}"), + Err(payload) => { + if let Some(msg) = payload.downcast_ref::<&str>() { + format!("panic {msg}") + } else if let Some(msg) = payload.downcast_ref::() { + format!("panic {msg}") + } else { + "panic ".to_string() + } + } + } +} + +fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> bool { + matches!(outcome, Ok(Err(err)) if expected(err)) +} + +fn send_compare_result( + return_err: &mpmc::Sender>, + return_value: &mpmc::Sender, + result: Result<()>, +) { + // Keep the auxiliary compare channels alive until the result is consumed. + // Go channels do not close when the callback returns; Rust senders do. + let keepalive = return_value.clone(); + thread::spawn(move || { + thread::sleep(Duration::from_millis(50)); + drop(keepalive); + }); + return_err.send(result).expect(WRITE_CHAN_ERR); +} + +fn test_seed(test: &Test) -> u64 { + let mut seed = 0x5142_4654_5445_5354_u64; + seed ^= test.instance as u64; + seed ^= u64::from(test.const_period) << 8; + seed ^= (test.bcast_jitter_ms as u64) << 16; + seed ^= (test.commits_after as u64) << 32; + seed ^= (test.decide_round as u64) << 40; + seed ^= (test.prepared_val as u64) << 48; + seed ^= u64::from(test.random_round) << 56; + seed ^= u64::from(test.fuzz) << 57; + seed +} + /// Construct a leader election function. fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone { move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process } @@ -365,32 +645,167 @@ fn new_msg( // Delays the message broadcast by between 1x and 2x jitter_ms and drops // messages. fn bcast( - broadcast: mpmc::Sender>, + broadcast: mpmc::Sender, msg: Msg, jitter_ms: i32, clock: FakeClock, + trace: Trace, + seed: u64, ) { if jitter_ms == 0 { - broadcast.send(msg.clone()).expect(WRITE_CHAN_ERR); + broadcast + .send(BroadcastEvent::Immediate(msg.clone())) + .expect(WRITE_CHAN_ERR); return; } - thread::spawn(move || { - let delta_ms = (f64::from(jitter_ms) * rand::random::()) as i32; - let delay = Duration::from_millis((jitter_ms + delta_ms) as u64); - println!( - "{:?} {} => {}@{} (bcast delay {:?})", - clock.elapsed(), - msg.source(), - msg.type_(), - msg.round(), - delay - ); - let (delay_ch, _) = clock.new_timer(delay); - _ = delay_ch.recv(); + let delta_ms = (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, 3)) as i32; + let delay = Duration::from_millis((jitter_ms + delta_ms) as u64); + trace.push(format!( + "{:?} {} => {}@{} (bcast delay {:?})", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + delay + )); + let key = deterministic_msg_u64(seed, &msg, 0, 4); + broadcast + .send(BroadcastEvent::Delayed(PendingBroadcast { + deliver_at: clock.elapsed() + delay, + key, + msg, + })) + .expect(WRITE_CHAN_ERR); +} - _ = broadcast.send(msg); - }); +fn deliver_ready_broadcasts( + pending: &mut Vec, + receives: &BTreeMap, mpmc::Receiver)>, + drop_prob: &HashMap, + seed: u64, + trace: &Trace, + clock: &FakeClock, +) -> usize { + pending.sort_by_key(|delayed| (delayed.deliver_at, delayed.key)); + let ready_count = pending + .iter() + .take_while(|delayed| delayed.deliver_at <= clock.elapsed()) + .count(); + let ready = pending.drain(..ready_count).collect::>(); + + ready + .into_iter() + .map(|delayed| fanout_broadcast(receives, drop_prob, seed, trace, clock, delayed.msg)) + .sum() +} + +fn fanout_broadcast( + receives: &BTreeMap, mpmc::Receiver)>, + drop_prob: &HashMap, + seed: u64, + trace: &Trace, + clock: &FakeClock, + msg: TestMsgRef, +) -> usize { + let mut broadcasts = 0; + for (target, (out_tx, _)) in receives.iter() { + if *target == msg.source() { + continue; // Do not broadcast to self, we sent to self already. + } + + if let Some(p) = drop_prob.get(&msg.source()) { + if deterministic_unit(seed, &msg, *target, 1) < *p { + trace.push(format!( + "{:?} {} => {}@{} => {} (dropped)", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + target + )); + continue; + } + } + + out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + broadcasts += 1; + + if deterministic_unit(seed, &msg, *target, 2) < 0.1 { + out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + broadcasts += 1; + trace.push(format!( + "{:?} {} => {}@{} => {} (duplicate)", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + target + )); + } + } + + broadcasts +} + +fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg { + let message_types = [ + MSG_PRE_PREPARE, + MSG_PREPARE, + MSG_COMMIT, + MSG_ROUND_CHANGE, + MSG_DECIDED, + ]; + new_msg( + message_types[deterministic_range(seed, counter, 10, message_types.len())], + instance, + peer_idx, + deterministic_i64(seed, counter, 11, 10), + deterministic_i64(seed, counter, 12, 10), + 0, + deterministic_i64(seed, counter, 13, 10), + deterministic_i64(seed, counter, 14, 10), + None, + ) +} + +fn deterministic_unit(seed: u64, msg: &Msg, target: i64, salt: u64) -> f64 { + let value = deterministic_msg_u64(seed, msg, target, salt) >> 11; + value as f64 / ((1_u64 << 53) as f64) +} + +fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, salt: u64) -> u64 { + let mut value = splitmix64(seed ^ salt); + value = splitmix64(value ^ i64_to_u64(msg.type_().0)); + value = splitmix64(value ^ i64_to_u64(msg.instance())); + value = splitmix64(value ^ i64_to_u64(msg.source())); + value = splitmix64(value ^ i64_to_u64(msg.round())); + value = splitmix64(value ^ i64_to_u64(msg.value())); + value = splitmix64(value ^ i64_to_u64(msg.value_source().unwrap_or_default())); + value = splitmix64(value ^ i64_to_u64(msg.prepared_round())); + value = splitmix64(value ^ i64_to_u64(msg.prepared_value())); + splitmix64(value ^ i64_to_u64(target)) +} + +fn deterministic_range(seed: u64, counter: u64, salt: u64, upper: usize) -> usize { + let upper = u64::try_from(upper).expect("upper fits in u64"); + usize::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in usize") +} + +fn deterministic_i64(seed: u64, counter: u64, salt: u64, upper: i64) -> i64 { + let upper = u64::try_from(upper).expect("upper is positive"); + i64::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in i64") +} + +fn splitmix64(mut value: u64) -> u64 { + value = value.wrapping_add(0x9e37_79b9_7f4a_7c15); + value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + value ^ (value >> 31) +} + +fn i64_to_u64(value: i64) -> u64 { + u64::from_le_bytes(value.to_le_bytes()) } #[derive(Clone, Debug)] @@ -455,7 +870,6 @@ impl SomeMsg for TestMsg { } #[test] -#[ignore = "flaky"] fn happy_0() { test_qbft(Test { instance: 0, @@ -465,7 +879,6 @@ fn happy_0() { } #[test] -#[ignore = "flaky"] fn happy_1() { test_qbft(Test { instance: 1, @@ -475,7 +888,6 @@ fn happy_1() { } #[test] -#[ignore = "flaky"] fn prepare_round_1_decide_round_2() { test_qbft(Test { instance: 0, @@ -487,12 +899,11 @@ fn prepare_round_1_decide_round_2() { } #[test] -#[ignore = "wrong prepared value"] fn prepare_round_2_decide_round_3() { test_qbft(Test { instance: 0, commits_after: 2, - value_delay: HashMap::from([(1, Duration::from_millis(200))]), + value_delay: HashMap::from([(1, Duration::from_secs(2))]), decide_round: 3, prepared_val: 2, const_period: true, @@ -501,32 +912,27 @@ fn prepare_round_2_decide_round_3() { } #[test] -#[ignore = "wrong decide round"] -fn leader_late_xp() { +fn leader_late_exp() { test_qbft(Test { instance: 0, - start_delay: HashMap::from([(1, Duration::from_millis(200))]), + start_delay: HashMap::from([(1, Duration::from_secs(2))]), decide_round: 2, ..Default::default() }); } #[test] -#[ignore = "wrong decide round"] fn leader_down_const() { test_qbft(Test { - instance: 3, - start_delay: HashMap::from([ - (1, Duration::from_millis(50)), - (2, Duration::from_millis(100)), - ]), - decide_round: 4, + instance: 0, + start_delay: HashMap::from([(1, Duration::from_secs(2))]), + const_period: true, + decide_round: 2, ..Default::default() }); } #[test] -#[ignore = "flaky"] fn very_late_exp() { test_qbft(Test { instance: 3, @@ -537,7 +943,6 @@ fn very_late_exp() { } #[test] -#[ignore = "flaky"] fn very_late_const() { test_qbft(Test { instance: 1, @@ -549,7 +954,6 @@ fn very_late_const() { } #[test] -#[ignore = "flaky"] fn stagger_start_exp() { test_qbft(Test { instance: 0, @@ -565,7 +969,6 @@ fn stagger_start_exp() { } #[test] -#[ignore = "flaky"] fn stagger_start_const() { test_qbft(Test { instance: 0, @@ -582,7 +985,6 @@ fn stagger_start_const() { } #[test] -#[ignore = "flaky"] fn very_delayed_value_exp() { test_qbft(Test { instance: 3, @@ -593,7 +995,6 @@ fn very_delayed_value_exp() { } #[test] -#[ignore = "flaky"] fn very_delayed_value_const() { test_qbft(Test { instance: 1, @@ -605,7 +1006,6 @@ fn very_delayed_value_const() { } #[test] -#[ignore = "flaky"] fn stagger_delayed_value_exp() { test_qbft(Test { instance: 0, @@ -621,7 +1021,6 @@ fn stagger_delayed_value_exp() { } #[test] -#[ignore = "flaky"] fn stagger_delayed_value_const() { test_qbft(Test { instance: 0, @@ -638,7 +1037,6 @@ fn stagger_delayed_value_const() { } #[test] -#[ignore = "flaky"] fn round1_leader_no_value_round2_leader_offline() { test_qbft(Test { instance: 0, @@ -651,7 +1049,6 @@ fn round1_leader_no_value_round2_leader_offline() { } #[test] -#[ignore = "flaky"] fn jitter_500ms_exp() { test_qbft(Test { instance: 3, @@ -662,7 +1059,6 @@ fn jitter_500ms_exp() { } #[test] -#[ignore = "flaky"] fn jitter_200ms_const() { test_qbft(Test { instance: 3, @@ -674,7 +1070,6 @@ fn jitter_200ms_const() { } #[test] -#[ignore = "flaky"] fn drop_10_percent_const() { test_qbft(Test { instance: 1, @@ -686,7 +1081,6 @@ fn drop_10_percent_const() { } #[test] -#[ignore = "flaky"] fn drop_30_percent_const() { test_qbft(Test { instance: 1, @@ -697,6 +1091,41 @@ fn drop_30_percent_const() { }); } +#[test] +fn fuzz() { + test_qbft(Test { + instance: 1, + fuzz: true, + const_period: true, + decide_round: 1, + ..Default::default() + }); +} + +#[test] +fn fuzz_with_late_leader() { + test_qbft(Test { + instance: 1, + fuzz: true, + start_delay: HashMap::from([(1, Duration::from_secs(2)), (2, Duration::from_secs(2))]), + const_period: true, + random_round: true, + ..Default::default() + }); +} + +#[test] +fn fuzz_with_very_late_leader() { + test_qbft(Test { + instance: 1, + fuzz: true, + start_delay: HashMap::from([(1, Duration::from_secs(10)), (2, Duration::from_secs(10))]), + const_period: true, + random_round: true, + ..Default::default() + }); +} + fn noop_definition() -> Definition { Definition { is_leader: Box::new(|_, _, _| false), @@ -719,7 +1148,71 @@ fn noop_transport() -> Transport { } #[test] -#[ignore = "flaky"] +fn formulas() { + let expected = [ + (1, 1, 0), + (2, 2, 0), + (3, 2, 0), + (4, 3, 1), + (5, 4, 1), + (6, 4, 1), + (7, 5, 2), + (8, 6, 2), + (9, 6, 2), + (10, 7, 3), + (11, 8, 3), + (12, 8, 3), + (13, 9, 4), + (14, 10, 4), + (15, 10, 4), + (16, 11, 5), + (17, 12, 5), + (18, 12, 5), + (19, 13, 6), + (20, 14, 6), + (21, 14, 6), + (22, 15, 7), + ]; + + for (n, q, f) in expected { + let d = Definition:: { + nodes: n, + ..noop_definition() + }; + assert_eq!(q, d.quorum(), "Quorum given N={n}"); + assert_eq!(f, d.faulty(), "Faulty given N={n}"); + } +} + +#[test] +fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() { + let preprepare = new_msg( + MSG_PRE_PREPARE, + 1, + 3, + 6, + 2, + 0, + 0, + 0, + Some(&vec![ + new_msg(MSG_ROUND_CHANGE, 1, 2, 6, 0, 0, 2, 3, None), + new_msg(MSG_ROUND_CHANGE, 1, 3, 6, 0, 0, 2, 3, None), + new_msg(MSG_ROUND_CHANGE, 1, 1, 6, 0, 0, 2, 2, None), + new_msg(MSG_PREPARE, 1, 3, 2, 2, 0, 0, 0, None), + new_msg(MSG_PREPARE, 1, 4, 2, 2, 0, 0, 0, None), + new_msg(MSG_PREPARE, 1, 1, 2, 2, 0, 0, 0, None), + new_msg(MSG_PREPARE, 1, 2, 2, 2, 0, 0, 0, None), + ]), + ); + let mut def = noop_definition(); + def.nodes = 4; + def.is_leader = Box::new(make_is_leader(4)); + + assert!(is_justified_pre_prepare(&def, &1, &preprepare, 0)); +} + +#[test] fn duplicate_pre_prepare_rules() { let cts = CancellationTokenSource::new(); let ct = &cts.token().clone(); @@ -760,8 +1253,11 @@ fn duplicate_pre_prepare_rules() { panic!("unexpected round {}", round); }); - def.compare = Box::new(|_, _, _, _, return_err, _| { - _ = return_err.send(Ok(())); + def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { + return_value + .send(*input_value_source) + .expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Ok(())); }); let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); @@ -788,3 +1284,489 @@ fn duplicate_pre_prepare_rules() { assert!(res.is_ok()); } + +#[test] +fn classify_rules() { + let mut def = noop_definition(); + def.nodes = 4; + def.is_leader = Box::new(make_is_leader(4)); + + let preprepare = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 1, 0, 0, 0, None); + assert!(classify(&def, &0, 1, 2, &HashMap::new(), &preprepare).0 == UPON_JUSTIFIED_PRE_PREPARE); + + let prepares = vec![ + new_msg(MSG_PREPARE, 0, 1, 1, 2, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 2, 1, 2, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 3, 1, 2, 0, 0, 0, None), + ]; + let buffer = buffer_by_source(&prepares); + assert!(classify(&def, &0, 1, 2, &buffer, &prepares[2]).0 == UPON_QUORUM_PREPARES); + + let commits = vec![ + new_msg(MSG_COMMIT, 0, 1, 1, 2, 0, 0, 0, None), + new_msg(MSG_COMMIT, 0, 2, 1, 2, 0, 0, 0, None), + new_msg(MSG_COMMIT, 0, 3, 1, 2, 0, 0, 0, None), + ]; + let buffer = buffer_by_source(&commits); + assert!(classify(&def, &0, 1, 2, &buffer, &commits[2]).0 == UPON_QUORUM_COMMITS); + + let future_round_changes = vec![ + new_msg(MSG_ROUND_CHANGE, 0, 1, 3, 0, 0, 0, 0, None), + new_msg(MSG_ROUND_CHANGE, 0, 2, 3, 0, 0, 0, 0, None), + ]; + let buffer = buffer_by_source(&future_round_changes); + assert!( + classify(&def, &0, 1, 2, &buffer, &future_round_changes[1]).0 == UPON_F_PLUS1_ROUND_CHANGES + ); + + let unjust_round_changes = vec![ + new_msg(MSG_ROUND_CHANGE, 0, 1, 1, 0, 0, 2, 9, None), + new_msg(MSG_ROUND_CHANGE, 0, 2, 1, 0, 0, 2, 9, None), + new_msg(MSG_ROUND_CHANGE, 0, 3, 1, 0, 0, 2, 9, None), + ]; + let buffer = buffer_by_source(&unjust_round_changes); + assert!( + classify(&def, &0, 1, 2, &buffer, &unjust_round_changes[2]).0 + == UPON_UNJUST_QUORUM_ROUND_CHANGES + ); +} + +#[test] +fn justified_qrc_j1_and_j2() { + let mut def = noop_definition(); + def.nodes = 4; + let j1 = vec![ + new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 0, 0, None), + new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 0, 0, None), + new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None), + ]; + assert_eq!(Some(0), contains_justified_qrc(&def, &j1, 2)); + assert_eq!(3, get_justified_qrc(&def, &j1, 2).unwrap().len()); + + let j2 = vec![ + new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 1, 7, None), + new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 1, 7, None), + new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 2, 1, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 3, 1, 7, 0, 0, 0, None), + ]; + assert_eq!(Some(7), contains_justified_qrc(&def, &j2, 2)); + assert!(get_justified_qrc(&def, &j2, 2).unwrap().len() >= 6); +} + +#[test] +fn filter_msgs_keeps_one_per_source() { + let msgs = vec![ + new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 2, 1, 7, 0, 0, 0, None), + ]; + + let filtered = filter_msgs(&msgs, MSG_PREPARE, 1, Some(&7), None, None); + + assert_eq!(2, filtered.len()); + assert_eq!( + vec![1, 2], + filtered.iter().map(|msg| msg.source()).collect::>() + ); +} + +#[test] +fn compare_success_error_cached_value_source_and_timeout() { + let cts = CancellationTokenSource::new(); + let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None); + let (_vs_tx, vs_rx) = mpmc::bounded::(1); + let timer = mpmc::never(); + let mut def = noop_definition(); + def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { + return_value + .send(*input_value_source) + .expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Ok(())); + }); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), + Ok(0) + )); + + let mut def = noop_definition(); + def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { + return_value + .send(*input_value_source) + .expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Err(QbftError::CompareError)); + }); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), + Err(QbftError::CompareError) + )); + + let (vs_tx, vs_rx) = mpmc::bounded::(1); + vs_tx.send(42).expect(WRITE_CHAN_ERR); + let mut def = noop_definition(); + def.compare = Box::new( + |_, _, input_value_source_ch, input_value_source, return_err, return_value| { + let cached = if *input_value_source == 0 { + let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); + return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *input_value_source + }; + assert_eq!(42, cached); + send_compare_result(return_err, return_value, Ok(())); + }, + ); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), + Ok(42) + )); + + let (timer_tx, timer_rx) = mpmc::bounded(1); + timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); + let mut def = noop_definition(); + def.compare = Box::new(|_, _, _, _, return_err, _| { + thread::sleep(Duration::from_millis(20)); + let _ = return_err.send(Ok(())); + }); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx), + Err(QbftError::TimeoutError) + )); +} + +fn buffer_by_source(msgs: &[Msg]) -> HashMap>> { + let mut buffer = HashMap::new(); + for msg in msgs { + buffer + .entry(msg.source()) + .or_insert_with(Vec::new) + .push(msg.clone()); + } + buffer +} + +#[derive(Debug)] +struct ChainSplitTest { + value_source: HashMap, + decide_round: i32, + prepared_val: i32, + should_halt: bool, +} + +#[test] +fn chain_split_same_value() { + test_qbft_chain_split(ChainSplitTest { + decide_round: 1, + value_source: HashMap::from([(1, 1), (2, 1), (3, 1), (4, 1)]), + prepared_val: 1, + should_halt: false, + }); +} + +#[test] +fn chain_split_non_leader_peer_has_different_value() { + test_qbft_chain_split(ChainSplitTest { + decide_round: 1, + value_source: HashMap::from([(1, 1), (2, 3), (3, 1), (4, 1)]), + prepared_val: 1, + should_halt: false, + }); +} + +#[test] +fn chain_split_first_leader_has_different_value_second_leader_succeeds() { + test_qbft_chain_split(ChainSplitTest { + decide_round: 2, + value_source: HashMap::from([(1, 3), (2, 1), (3, 1), (4, 1)]), + prepared_val: 1, + should_halt: false, + }); +} + +#[test] +fn zz_chain_split_no_consensus_halt() { + test_qbft_chain_split(ChainSplitTest { + decide_round: 0, + value_source: HashMap::from([(1, 1), (2, 1), (3, 3), (4, 3)]), + prepared_val: 0, + should_halt: true, + }); +} + +fn test_qbft_chain_split(test: ChainSplitTest) { + const N: usize = 4; + const MAX_ROUND: i64 = 10; + const FIFO_LIMIT: i64 = 100; + + let clock = FakeClock::new(time::Instant::now()); + let cts = CancellationTokenSource::new(); + let trace = Trace::new(); + let mut receives = BTreeMap::< + i64, + ( + mpmc::Sender>, + mpmc::Receiver>, + ), + >::new(); + let (broadcast_tx, broadcast_rx) = mpmc::unbounded::>(); + let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); + let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N); + let instance = 0; + + let defs = Arc::new(Definition { + is_leader: Box::new(make_is_leader(N as i64)), + new_timer: { + let clock = clock.clone(); + Box::new(move |round| { + clock.new_timer(Duration::from_secs(u64::pow(2, (round as u32) - 1))) + }) + }, + decide: { + let result_chan_tx = result_chan_tx.clone(); + Box::new(move |_, _, _, q_commit| { + result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); + }) + }, + compare: Box::new( + |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| { + let leader_value_source = qcommit.value_source().expect("value source"); + let local = if *input_value_source == 0 { + let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); + return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *input_value_source + }; + + if leader_value_source != local { + return_value.send(local).expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Err(QbftError::CompareError)); + return; + } + + return_value.send(local).expect(WRITE_CHAN_ERR); + send_compare_result(return_err, return_value, Ok(())); + }, + ), + nodes: N as i64, + fifo_limit: FIFO_LIMIT, + log_round_change: { + let clock = clock.clone(); + let trace = trace.clone(); + Box::new(move |_, process, round, new_round, upon_rule, _| { + trace.push(format!( + "{:?} - {}@{} change to {} ~= {}", + clock.elapsed(), + process, + round, + new_round, + upon_rule + )); + }) + }, + log_unjust: { + let trace = trace.clone(); + Box::new(move |_, process, msg| { + trace.push(format!("Unjust: process={} msg={:?}", process, msg)) + }) + }, + log_upon_rule: { + let clock = clock.clone(); + let trace = trace.clone(); + Box::new(move |_, process, round, msg, upon_rule| { + trace.push(format!( + "{:?} {} => {}@{} -> {}@{} ~= {}", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + process, + round, + upon_rule + )); + }) + }, + }); + + thread::scope(|s| { + for i in 1..=N as i64 { + let (sender, receiver) = mpmc::bounded::>(1000); + receives.insert(i, (sender.clone(), receiver.clone())); + let broadcast_tx = broadcast_tx.clone(); + let trace = trace.clone(); + let clock = clock.clone(); + + let transport = Transport { + broadcast: Box::new( + move |_, type_, instance, source, round, value, pr, pv, justification| { + if round > MAX_ROUND { + return Err(QbftError::MaxRoundReached); + } + + trace.push(format!( + "{:?} {} => {}@{}", + clock.elapsed(), + source, + type_, + round + )); + let msg = new_msg( + type_, + *instance, + source, + round, + *value, + *value, + pr, + *pv, + justification, + ); + sender.send(msg.clone()).expect(WRITE_CHAN_ERR); + broadcast_tx.send(msg).expect(WRITE_CHAN_ERR); + Ok(()) + }, + ), + receive: receiver, + }; + + let token = cts.token().clone(); + let defs = defs.clone(); + let run_chan_tx = run_chan_tx.clone(); + let value_source = test.value_source[&i]; + s.spawn(move || { + let (v_tx, v_rx) = mpmc::bounded::(1); + let (vs_tx, vs_rx) = mpmc::unbounded::(); + v_tx.send(value_source).expect(WRITE_CHAN_ERR); + for _ in 0..=MAX_ROUND { + vs_tx.send(value_source).expect(WRITE_CHAN_ERR); + } + let run_result = panic::catch_unwind(AssertUnwindSafe(|| { + qbft::run(&token, &defs, &transport, &instance, i, v_rx, vs_rx) + })); + drop(v_tx); + drop(vs_tx); + run_chan_tx.send((i, run_result)).expect(WRITE_CHAN_ERR); + }); + } + + let mut results = BTreeMap::>::new(); + let mut count = 0; + let mut decided = false; + let mut done = 0; + + loop { + mpmc::select! { + recv(broadcast_rx) -> msg => { + let msg = msg.expect(READ_CHAN_ERR); + for (target, (out_tx, _)) in receives.iter() { + if *target == msg.source() { + continue; + } + out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, 1) < 0.1 { + out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); + } + } + } + recv(result_chan_rx) -> res => { + let q_commit = res.expect(READ_CHAN_ERR); + for commit in q_commit.clone() { + for previous in results.values() { + if previous.value() != commit.value() { + cts.cancel(); + clock.cancel(); + panic!( + "chain split commit values differ: previous={:?} commit={:?} elapsed={:?}\n{}", + previous, + commit, + clock.elapsed(), + trace.dump() + ); + } + } + if i64::from(test.decide_round) != commit.round() { + cts.cancel(); + clock.cancel(); + panic!( + "chain split wrong decide round: want={} got={} commit={:?} elapsed={:?}\n{}", + test.decide_round, + commit.round(), + commit, + clock.elapsed(), + trace.dump() + ); + } + if test.prepared_val != 0 { + if i64::from(test.prepared_val) != commit.value() { + cts.cancel(); + clock.cancel(); + panic!( + "chain split wrong prepared value: want={} got={} commit={:?} elapsed={:?}\n{}", + test.prepared_val, + commit.value(), + commit, + clock.elapsed(), + trace.dump() + ); + } + } + results.insert(commit.source(), commit); + } + count += 1; + if count == N { + decided = true; + clock.cancel(); + cts.cancel(); + } + } + recv(run_chan_rx) -> res => { + let (node, outcome) = res.expect(READ_CHAN_ERR); + let expected_halt = test.should_halt + && outcome_is_error(&outcome, |err| matches!(err, QbftError::MaxRoundReached)); + if !(decided || expected_halt) { + cts.cancel(); + clock.cancel(); + panic!( + "unexpected chain split run error: node={} outcome={} decided={} done={} count={} elapsed={:?}\n{}", + node, + format_run_outcome(&outcome), + decided, + done, + count, + clock.elapsed(), + trace.dump() + ); + } + done += 1; + if done == N { + if test.should_halt { + assert!(!decided, "halt case unexpectedly decided"); + } + return; + } + } + default => { + thread::sleep(Duration::from_micros(1)); + let tick = if test.should_halt { + Duration::from_millis(100) + } else { + Duration::from_millis(1) + }; + clock.advance(tick); + let limit = if test.should_halt { + Duration::from_secs(600) + } else { + Duration::from_secs(60) + }; + if clock.elapsed() > limit { + cts.cancel(); + clock.cancel(); + panic!("chain split hang: decided={decided} done={done} count={count} elapsed={:?}\n{}", clock.elapsed(), trace.dump()); + } + } + } + } + }); +} From 0ec38dede4cac52ff4ba21a1f51d160c89efb4a9 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 10:22:23 +0700 Subject: [PATCH 02/34] fix: compare run on retached thread --- crates/core/src/qbft/fake_clock.rs | 4 +- crates/core/src/qbft/internal_test.rs | 231 ++++++++++++++++++++------ crates/core/src/qbft/mod.rs | 169 +++++++++++-------- 3 files changed, 283 insertions(+), 121 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 3b7baeea..0a3ab477 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -1,6 +1,6 @@ use crossbeam::channel as mpmc; use std::{ - collections::BTreeMap, + collections::HashMap, sync::{Arc, Mutex}, thread, time::{Duration, Instant}, @@ -15,7 +15,7 @@ struct FakeClockInner { start: Instant, now: Instant, last_id: usize, - clients: BTreeMap, Instant)>, + clients: HashMap, Instant)>, } impl FakeClock { diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 5c5659f2..ea072f94 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -12,6 +12,7 @@ use std::{ const WRITE_CHAN_ERR: &str = "Failed to write to channel"; const READ_CHAN_ERR: &str = "Failed to read from channel"; +// Fixed seed for deterministic chain-split duplicate-message simulation. const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c; type RunOutcome = std::thread::Result>; @@ -66,6 +67,9 @@ fn test_qbft(test: Test) { let clock = FakeClock::new(start_time); let cts = CancellationTokenSource::new(); + // Keep peer iteration deterministic. These fake-clock tests assert exact + // rounds, and broadcast fanout order affects which node observes quorums + // first when tests run in parallel. let mut receives = BTreeMap::< i64, ( @@ -102,11 +106,8 @@ fn test_qbft(test: Test) { result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); }) }, - compare: Box::new(|_, _, _, input_value_source, return_err, return_value| { - return_value - .send(*input_value_source) - .expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Ok(())); + compare: Arc::new(|_, _, _, _, return_err, _| { + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }), nodes: N as i64, fifo_limit: FIFO_LIMIT as i64, @@ -329,6 +330,10 @@ fn test_qbft(test: Test) { &clock, ); + if decided { + next_fuzz_at = None; + } + while let Some(next) = next_fuzz_at { if clock.elapsed() < next { break; @@ -467,6 +472,7 @@ fn test_qbft(test: Test) { // Trigger shutdown decided = true; + next_fuzz_at = None; clock.cancel(); cts.cancel(); @@ -567,21 +573,6 @@ fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> b matches!(outcome, Ok(Err(err)) if expected(err)) } -fn send_compare_result( - return_err: &mpmc::Sender>, - return_value: &mpmc::Sender, - result: Result<()>, -) { - // Keep the auxiliary compare channels alive until the result is consumed. - // Go channels do not close when the callback returns; Rust senders do. - let keepalive = return_value.clone(); - thread::spawn(move || { - thread::sleep(Duration::from_millis(50)); - drop(keepalive); - }); - return_err.send(result).expect(WRITE_CHAN_ERR); -} - fn test_seed(test: &Test) -> u64 { let mut seed = 0x5142_4654_5445_5354_u64; seed ^= test.instance as u64; @@ -1131,7 +1122,7 @@ fn noop_definition() -> Definition { is_leader: Box::new(|_, _, _| false), new_timer: Box::new(|_| (mpmc::never(), Box::new(|| {}))), decide: Box::new(|_, _, _, _| {}), - compare: Box::new(|_, _, _, _, _, _| {}), + compare: Arc::new(|_, _, _, _, _, _| {}), nodes: 0, fifo_limit: 0, log_round_change: Box::new(|_, _, _, _, _, _| {}), @@ -1253,11 +1244,8 @@ fn duplicate_pre_prepare_rules() { panic!("unexpected round {}", round); }); - def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { - return_value - .send(*input_value_source) - .expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Ok(())); + def.compare = Arc::new(|_, _, _, _, return_err, _| { + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }); let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); @@ -1285,6 +1273,35 @@ fn duplicate_pre_prepare_rules() { assert!(res.is_ok()); } +#[test] +fn idle_run_returns_when_cancelled() { + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let def = noop_definition(); + let transport = noop_transport(); + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + let (done_tx, done_rx) = mpmc::bounded(1); + + thread::spawn(move || { + done_tx + .send(qbft::run( + &token, &def, &transport, &0, 1, input_rx, source_rx, + )) + .expect(WRITE_CHAN_ERR); + }); + + thread::sleep(Duration::from_millis(10)); + cts.cancel(); + + assert!(matches!( + done_rx + .recv_timeout(Duration::from_millis(100)) + .expect("idle run must unblock on cancellation"), + Ok(()) + )); +} + #[test] fn classify_rules() { let mut def = noop_definition(); @@ -1379,33 +1396,42 @@ fn compare_success_error_cached_value_source_and_timeout() { let (_vs_tx, vs_rx) = mpmc::bounded::(1); let timer = mpmc::never(); let mut def = noop_definition(); - def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { - return_value - .send(*input_value_source) - .expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Ok(())); + def.compare = Arc::new(|_, _, _, _, return_err, _| { + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), - Ok(0) + (0, Ok(())) + )); + + let mut def = noop_definition(); + def.compare = Arc::new(|_, _, _, _, return_err, _| { + let return_err = return_err.clone(); + thread::spawn(move || { + thread::sleep(Duration::from_millis(10)); + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }); + }); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 41, &timer), + (41, Ok(())) )); let mut def = noop_definition(); - def.compare = Box::new(|_, _, _, input_value_source, return_err, return_value| { - return_value - .send(*input_value_source) + def.compare = Arc::new(|_, _, _, _, return_err, _| { + return_err + .send(Err(QbftError::CompareError)) .expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Err(QbftError::CompareError)); }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), - Err(QbftError::CompareError) + (0, Err(QbftError::CompareError)) )); let (vs_tx, vs_rx) = mpmc::bounded::(1); vs_tx.send(42).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Box::new( + def.compare = Arc::new( |_, _, input_value_source_ch, input_value_source, return_err, return_value| { let cached = if *input_value_source == 0 { let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); @@ -1415,27 +1441,121 @@ fn compare_success_error_cached_value_source_and_timeout() { *input_value_source }; assert_eq!(42, cached); - send_compare_result(return_err, return_value, Ok(())); + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }, + ); + assert!(matches!( + compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), + (42, Ok(())) + )); + + let (vs_tx, vs_rx) = mpmc::bounded::(1); + vs_tx.send(43).expect(WRITE_CHAN_ERR); + let mut def = noop_definition(); + def.compare = Arc::new( + |_, _, input_value_source_ch, input_value_source, return_err, return_value| { + let cached = if *input_value_source == 0 { + let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); + return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *input_value_source + }; + assert_eq!(43, cached); + return_err + .send(Err(QbftError::CompareError)) + .expect(WRITE_CHAN_ERR); }, ); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), - Ok(42) + (43, Err(QbftError::CompareError)) )); let (timer_tx, timer_rx) = mpmc::bounded(1); timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Box::new(|_, _, _, _, return_err, _| { + def.compare = Arc::new(|_, _, _, _, return_err, _| { thread::sleep(Duration::from_millis(20)); let _ = return_err.send(Ok(())); }); assert!(matches!( - compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx), - Err(QbftError::TimeoutError) + compare(cts.token(), &def, &msg, &vs_rx, 44, &timer_rx), + (44, Err(QbftError::TimeoutError)) + )); +} + +#[test] +fn compare_timeout_does_not_wait_for_blocked_callback() { + let cts = CancellationTokenSource::new(); + let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None); + let (_vs_tx, vs_rx) = mpmc::bounded::(1); + let (timer_tx, timer_rx) = mpmc::bounded(1); + timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); + + let mut def = noop_definition(); + def.compare = Arc::new(|ct, _, _, _, return_err, _| { + while !ct.is_canceled() { + thread::sleep(Duration::from_millis(1)); + } + let _ = return_err.send(Ok(())); + }); + + let (result_tx, result_rx) = mpmc::bounded(1); + thread::spawn(move || { + result_tx + .send(compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx)) + .expect(WRITE_CHAN_ERR); + }); + + assert!(matches!( + result_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare must return on timer without waiting for blocked callback"), + (0, Err(QbftError::TimeoutError)) )); } +#[test] +fn compare_parent_cancel_cancels_callback_token() { + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None); + let (_vs_tx, vs_rx) = mpmc::bounded::(1); + let (timer_tx, timer_rx) = mpmc::bounded(1); + let (token_cancelled_tx, token_cancelled_rx) = mpmc::bounded(1); + + let mut def = noop_definition(); + def.compare = Arc::new(move |ct, _, _, _, return_err, _| { + while !ct.is_canceled() { + thread::sleep(Duration::from_millis(1)); + } + token_cancelled_tx.send(()).expect(WRITE_CHAN_ERR); + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }); + + let (result_tx, result_rx) = mpmc::bounded(1); + thread::spawn(move || { + result_tx + .send(compare(&token, &def, &msg, &vs_rx, 0, &timer_rx)) + .expect(WRITE_CHAN_ERR); + }); + + thread::sleep(Duration::from_millis(10)); + cts.cancel(); + + match result_rx.recv_timeout(Duration::from_millis(100)) { + Ok(result) => assert!(matches!(result, (0, Ok(())))), + Err(err) => { + let _ = timer_tx.send(time::Instant::now()); + panic!("compare callback token must be canceled by parent token: {err}"); + } + } + token_cancelled_rx + .recv_timeout(Duration::from_millis(100)) + .expect("callback token must be canceled by parent token"); +} + fn buffer_by_source(msgs: &[Msg]) -> HashMap>> { let mut buffer = HashMap::new(); for msg in msgs { @@ -1503,6 +1623,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) { let clock = FakeClock::new(time::Instant::now()); let cts = CancellationTokenSource::new(); let trace = Trace::new(); + // Keep peer iteration deterministic. These fake-clock tests assert exact + // rounds, and broadcast fanout order affects which node observes quorums + // first when tests run in parallel. let mut receives = BTreeMap::< i64, ( @@ -1529,7 +1652,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); }) }, - compare: Box::new( + compare: Arc::new( |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| { let leader_value_source = qcommit.value_source().expect("value source"); let local = if *input_value_source == 0 { @@ -1541,13 +1664,13 @@ fn test_qbft_chain_split(test: ChainSplitTest) { }; if leader_value_source != local { - return_value.send(local).expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Err(QbftError::CompareError)); + return_err + .send(Err(QbftError::CompareError)) + .expect(WRITE_CHAN_ERR); return; } - return_value.send(local).expect(WRITE_CHAN_ERR); - send_compare_result(return_err, return_value, Ok(())); + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }, ), nodes: N as i64, @@ -1637,11 +1760,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) { let value_source = test.value_source[&i]; s.spawn(move || { let (v_tx, v_rx) = mpmc::bounded::(1); - let (vs_tx, vs_rx) = mpmc::unbounded::(); + let (vs_tx, vs_rx) = mpmc::bounded::(1); v_tx.send(value_source).expect(WRITE_CHAN_ERR); - for _ in 0..=MAX_ROUND { - vs_tx.send(value_source).expect(WRITE_CHAN_ERR); - } + vs_tx.send(value_source).expect(WRITE_CHAN_ERR); let run_result = panic::catch_unwind(AssertUnwindSafe(|| { qbft::run(&token, &defs, &transport, &instance, i, v_rx, vs_rx) })); @@ -1756,7 +1877,11 @@ fn test_qbft_chain_split(test: ChainSplitTest) { }; clock.advance(tick); let limit = if test.should_halt { - Duration::from_secs(600) + let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32"); + let seconds = 1_u64 + .checked_shl(max_round.checked_add(1).expect("MAX_ROUND permits timeout limit")) + .expect("MAX_ROUND permits timeout limit"); + Duration::from_secs(seconds) } else { Duration::from_secs(60) }; diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 41d0a2d5..e444373b 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -20,7 +20,7 @@ #![allow(clippy::cast_possible_truncation)] #![allow(clippy::arithmetic_side_effects)] -use cancellation::CancellationToken; +use cancellation::{CancellationToken, CancellationTokenSource}; use crossbeam::channel as mpmc; use std::{ any, @@ -33,6 +33,19 @@ use std::{ type Result = std::result::Result; +const RUN_CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1); + +type CompareFn = dyn Fn( + /* ct */ &CancellationToken, + /* qcommit */ &Msg, + /* input_value_source_ch */ &mpmc::Receiver, + /* input_value_source */ &C, + /* return_err */ &mpmc::Sender>, + /* return_value */ &mpmc::Sender, + ) + Send + + Sync + + 'static; + #[derive(Debug, thiserror::Error)] pub enum QbftError { #[error("Timeout")] @@ -41,6 +54,9 @@ pub enum QbftError { #[error("Compare leader value with local value failed")] CompareError, + #[error("bug: expected only comparison or timeout error")] + UnexpectedCompareError, + #[error("Maximum round reached")] MaxRoundReached, @@ -99,20 +115,10 @@ where + Sync, >, - /// Called when leader proposes value and we compare it with our local - /// value. It's an opt-in feature that should instantly return `None` on - /// `return_err` channel if it is not turned on. - pub compare: Box< - dyn Fn( - /* ct */ &CancellationToken, - /* qcommit */ &Msg, - /* input_value_source_ch */ &mpmc::Receiver, - /* input_value_source */ &C, - /* return_err */ &mpmc::Sender>, - /* return_value */ &mpmc::Sender, - ) + Send - + Sync, - >, + /// Charon parity hook called when the leader proposes a value. The core + /// algorithm only runs this callback and reacts to its result; any + /// value-source comparison policy belongs to the caller. + pub compare: sync::Arc>, /// Called when consensus has been reached on a value. pub decide: Box< @@ -194,12 +200,14 @@ pub const MSG_DECIDED: MessageType = MessageType(5); const MSG_SENTINEL: MessageType = MessageType(6); // intentionally not public impl MessageType { + /// Returns true when the message type is one of the known QBFT wire types. pub fn valid(&self) -> bool { self.0 > MSG_UNKNOWN.0 && self.0 < MSG_SENTINEL.0 } } impl Display for MessageType { + /// Formats the message type using the stable wire/debug label. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self.0 { 0 => "unknown", @@ -260,6 +268,7 @@ pub const UPON_JUSTIFIED_DECIDED: UponRule = UponRule(7); pub const UPON_ROUND_TIMEOUT: UponRule = UponRule(8); // This is not triggered by a message, but by a timer. impl Display for UponRule { + /// Formats the upon-rule using the stable debug label. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self.0 { 0 => "nothing", @@ -284,11 +293,12 @@ struct DedupKey { round: i64, } -/// Executes the consensus algorithm until the context is closed. -/// The generic type `I` is the instance of consensus and can be anything. -/// The generic type `V` is the arbitrary data value being proposed; it only -/// requires an Equal method. The generic type `C` is the compare value, used to -/// compare leader's proposed value with local value and can be anything. +/// Executes one QBFT consensus instance until it decides, errors, or is +/// cancelled. +/// +/// `I` identifies the consensus instance, `V` is the comparable proposed value, +/// and `C` is the application value used by `Definition::compare` to compare a +/// leader proposal with the local input source. pub fn run( ct: &CancellationToken, d: &Definition, @@ -299,8 +309,9 @@ pub fn run( input_value_source_ch: mpmc::Receiver, ) -> Result<()> where - V: PartialEq + Eq + Hash + Default, - C: Clone + Send + Sync + Default, + I: Send + Sync + 'static, + V: PartialEq + Eq + Hash + Default + 'static, + C: Clone + Send + Sync + Default + 'static, { // === State === let round: Cell = Cell::new(1); @@ -414,7 +425,7 @@ where (timer_chan, stop_timer) = (d.new_timer)(round.get()); } - while !ct.is_canceled() { + loop { mpmc::select! { recv(input_value_ch) -> result => { let iv = result?; @@ -472,7 +483,7 @@ where stop_timer(); (timer_chan, stop_timer) = (d.new_timer)(round.get()); - let compare_result = compare( + let (new_input_value_source, compare_result) = compare( ct, d, &msg, @@ -480,12 +491,10 @@ where input_value_source.clone(), &timer_chan, ); + input_value_source = new_input_value_source; match compare_result { - Ok(v) => { - input_value_source = v; - broadcast_msg(MSG_PREPARE, &msg.value(), None)?; - } + Ok(()) => broadcast_msg(MSG_PREPARE, &msg.value(), None)?, Err(qbft_err) => { match qbft_err { QbftError::CompareError => { @@ -503,7 +512,7 @@ where broadcast_round_change()?; } - _ => panic!("bug: expected only {} or {} error", QbftError::CompareError, QbftError::TimeoutError) + _ => return Err(QbftError::UnexpectedCompareError), } } } @@ -579,7 +588,7 @@ where broadcast_round_change()?; } - default => { + default(RUN_CANCELLATION_POLL_INTERVAL) => { if ct.is_canceled() { break; } @@ -590,6 +599,9 @@ where Ok(()) } +/// The callback may cache the local input source and return success/failure. +/// This helper only preserves that callback result and lets the round timer win +/// if the callback blocks. fn compare( ct: &CancellationToken, d: &Definition, @@ -597,13 +609,14 @@ fn compare( input_value_source_ch: &mpmc::Receiver, input_value_source: C, timer_chan: &mpmc::Receiver, -) -> Result +) -> (C, Result<()>) where - V: PartialEq, - C: Clone + Send + Sync, + I: Send + Sync + 'static, + V: PartialEq + 'static, + C: Clone + Send + Sync + 'static, { let (compare_err_tx, compare_err_rx) = mpmc::bounded::>(1); - let (compare_value_tx, compare_value_rx) = mpmc::bounded::(1); + let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::(1); // d.Compare has 2 roles: // 1. Read from the `input_value_source_ch` (if `input_value_source` is empty). @@ -615,46 +628,69 @@ where // If comparison or any other unexpected error occurs, the error is returned on // `compare_err` channel. - thread::scope(|s| { - let mut result = input_value_source.clone(); - let compare = &d.compare; + let mut result = input_value_source.clone(); + let compare = d.compare.clone(); + let compare_cts = sync::Arc::new(CancellationTokenSource::new()); + let compare_ct = compare_cts.token().clone(); + let msg = msg.clone(); + let input_value_source_ch = input_value_source_ch.clone(); + + thread::spawn(move || { + (compare)( + &compare_ct, + &msg, + &input_value_source_ch, + &input_value_source, + &compare_err_tx, + &compare_value_tx, + ); + }); - s.spawn(move || { - (compare)( - ct, - msg, - input_value_source_ch, - &input_value_source, - &compare_err_tx, - &compare_value_tx, - ); - }); - - loop { - mpmc::select! { - recv(compare_err_rx) -> msg => { - let err = msg?; - - return match err { - Ok(_) => Ok(result), - Err(_) => Err(QbftError::CompareError), - }; - }, - - recv(compare_value_rx) -> msg => { - let value = msg?; + loop { + mpmc::select! { + recv(compare_err_rx) -> msg => { + let err = match msg { + Ok(err) => err, + Err(err) => { + compare_cts.cancel(); + return (result, Err(QbftError::ChannelError(err))); + } + }; + while let Ok(value) = compare_value_rx.try_recv() { result = value; - }, + } - recv(timer_chan) -> msg => { - msg?; + compare_cts.cancel(); + return match err { + Ok(()) => (result, Ok(())), + Err(_) => (result, Err(QbftError::CompareError)), + }; + }, - return Err(QbftError::TimeoutError); + recv(compare_value_rx) -> msg => { + match msg { + Ok(value) => result = value, + Err(_) => compare_value_rx = mpmc::never(), + } + }, + + recv(timer_chan) -> msg => { + compare_cts.cancel(); + if let Err(err) = msg { + return (result, Err(QbftError::ChannelError(err))); + } + + return (result, Err(QbftError::TimeoutError)); + } + + default(RUN_CANCELLATION_POLL_INTERVAL) => { + if ct.is_canceled() { + compare_cts.cancel(); } } } - }) + } } /// Returns all messages from the provided round. @@ -1122,6 +1158,7 @@ where value: V, } +/// Returns all unique-source PREPARE quorums grouped by identical round and value. fn get_prepare_quorums( d: &Definition, all: &Vec>, From b752caaf8b9d09a8847c44c27141832f14dba1f8 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 10:26:17 +0700 Subject: [PATCH 03/34] fix: removed hard coded salt in tests --- crates/core/src/qbft/internal_test.rs | 49 +++++++++++++++++---------- crates/core/src/qbft/mod.rs | 3 +- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index ea072f94..5f32d92f 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -14,6 +14,15 @@ const WRITE_CHAN_ERR: &str = "Failed to write to channel"; const READ_CHAN_ERR: &str = "Failed to read from channel"; // Fixed seed for deterministic chain-split duplicate-message simulation. const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c; +const TEST_STREAM_DROP: u64 = 1; +const TEST_STREAM_DUPLICATE: u64 = 2; +const TEST_STREAM_JITTER: u64 = 3; +const TEST_STREAM_DELAY_ORDER: u64 = 4; +const TEST_STREAM_MSG_TYPE: u64 = 10; +const TEST_STREAM_MSG_ROUND: u64 = 11; +const TEST_STREAM_MSG_VALUE: u64 = 12; +const TEST_STREAM_MSG_PREPARED_ROUND: u64 = 13; +const TEST_STREAM_MSG_PREPARED_VALUE: u64 = 14; type RunOutcome = std::thread::Result>; type TestMsgRef = Msg; @@ -650,7 +659,8 @@ fn bcast( return; } - let delta_ms = (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, 3)) as i32; + let delta_ms = + (f64::from(jitter_ms) * deterministic_unit(seed, &msg, 0, TEST_STREAM_JITTER)) as i32; let delay = Duration::from_millis((jitter_ms + delta_ms) as u64); trace.push(format!( "{:?} {} => {}@{} (bcast delay {:?})", @@ -660,7 +670,7 @@ fn bcast( msg.round(), delay )); - let key = deterministic_msg_u64(seed, &msg, 0, 4); + let key = deterministic_msg_u64(seed, &msg, 0, TEST_STREAM_DELAY_ORDER); broadcast .send(BroadcastEvent::Delayed(PendingBroadcast { deliver_at: clock.elapsed() + delay, @@ -706,7 +716,7 @@ fn fanout_broadcast( } if let Some(p) = drop_prob.get(&msg.source()) { - if deterministic_unit(seed, &msg, *target, 1) < *p { + if deterministic_unit(seed, &msg, *target, TEST_STREAM_DROP) < *p { trace.push(format!( "{:?} {} => {}@{} => {} (dropped)", clock.elapsed(), @@ -722,7 +732,7 @@ fn fanout_broadcast( out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); broadcasts += 1; - if deterministic_unit(seed, &msg, *target, 2) < 0.1 { + if deterministic_unit(seed, &msg, *target, TEST_STREAM_DUPLICATE) < 0.1 { out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); broadcasts += 1; trace.push(format!( @@ -748,25 +758,26 @@ fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg, target: i64, salt: u64) -> f64 { - let value = deterministic_msg_u64(seed, msg, target, salt) >> 11; +fn deterministic_unit(seed: u64, msg: &Msg, target: i64, stream_id: u64) -> f64 { + let value = deterministic_msg_u64(seed, msg, target, stream_id) >> 11; value as f64 / ((1_u64 << 53) as f64) } -fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, salt: u64) -> u64 { - let mut value = splitmix64(seed ^ salt); +fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, stream_id: u64) -> u64 { + let mut value = splitmix64(seed ^ stream_id); value = splitmix64(value ^ i64_to_u64(msg.type_().0)); value = splitmix64(value ^ i64_to_u64(msg.instance())); value = splitmix64(value ^ i64_to_u64(msg.source())); @@ -778,14 +789,14 @@ fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, salt: splitmix64(value ^ i64_to_u64(target)) } -fn deterministic_range(seed: u64, counter: u64, salt: u64, upper: usize) -> usize { +fn deterministic_range(seed: u64, counter: u64, stream_id: u64, upper: usize) -> usize { let upper = u64::try_from(upper).expect("upper fits in u64"); - usize::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in usize") + usize::try_from(splitmix64(seed ^ counter ^ stream_id) % upper).expect("range fits in usize") } -fn deterministic_i64(seed: u64, counter: u64, salt: u64, upper: i64) -> i64 { +fn deterministic_i64(seed: u64, counter: u64, stream_id: u64, upper: i64) -> i64 { let upper = u64::try_from(upper).expect("upper is positive"); - i64::try_from(splitmix64(seed ^ counter ^ salt) % upper).expect("range fits in i64") + i64::try_from(splitmix64(seed ^ counter ^ stream_id) % upper).expect("range fits in i64") } fn splitmix64(mut value: u64) -> u64 { @@ -1786,7 +1797,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) { continue; } out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); - if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, 1) < 0.1 { + if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, TEST_STREAM_DUPLICATE) + < 0.1 + { out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); } } diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index e444373b..78f111d5 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -1158,7 +1158,8 @@ where value: V, } -/// Returns all unique-source PREPARE quorums grouped by identical round and value. +/// Returns all unique-source PREPARE quorums grouped by identical round and +/// value. fn get_prepare_quorums( d: &Definition, all: &Vec>, From ed912eebf1bd871d3da44e884832a602752cf4d3 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 12:52:18 +0700 Subject: [PATCH 04/34] fix: return error on Context cancelled --- crates/core/src/qbft/internal_test.rs | 4 ++-- crates/core/src/qbft/mod.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 5f32d92f..87ef8b98 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1281,7 +1281,7 @@ fn duplicate_pre_prepare_rules() { input_value_source_ch, ); - assert!(res.is_ok()); + assert!(matches!(res, Err(QbftError::ContextCanceled))); } #[test] @@ -1309,7 +1309,7 @@ fn idle_run_returns_when_cancelled() { done_rx .recv_timeout(Duration::from_millis(100)) .expect("idle run must unblock on cancellation"), - Ok(()) + Err(QbftError::ContextCanceled) )); } diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 78f111d5..9cf6d163 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -57,6 +57,9 @@ pub enum QbftError { #[error("bug: expected only comparison or timeout error")] UnexpectedCompareError, + #[error("context canceled")] + ContextCanceled, + #[error("Maximum round reached")] MaxRoundReached, @@ -590,13 +593,11 @@ where default(RUN_CANCELLATION_POLL_INTERVAL) => { if ct.is_canceled() { - break; + return Err(QbftError::ContextCanceled); } } } } - - Ok(()) } /// The callback may cache the local input source and return success/failure. From e01f64fc4969b7e2746e1d8ecc1f01c3375a04e1 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 14:13:28 +0700 Subject: [PATCH 05/34] fix: hash from string, not magic number --- crates/core/src/qbft/internal_test.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 87ef8b98..e2e1b509 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -12,8 +12,8 @@ use std::{ const WRITE_CHAN_ERR: &str = "Failed to write to channel"; const READ_CHAN_ERR: &str = "Failed to read from channel"; -// Fixed seed for deterministic chain-split duplicate-message simulation. -const CHAIN_SPLIT_SEED: u64 = 0x4348_4149_4e53_504c; +const TEST_SEED_LABEL: &str = "qbft-test"; +const CHAIN_SPLIT_SEED_LABEL: &str = "chain-split"; const TEST_STREAM_DROP: u64 = 1; const TEST_STREAM_DUPLICATE: u64 = 2; const TEST_STREAM_JITTER: u64 = 3; @@ -583,7 +583,7 @@ fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> b } fn test_seed(test: &Test) -> u64 { - let mut seed = 0x5142_4654_5445_5354_u64; + let mut seed = seed_from_label(TEST_SEED_LABEL); seed ^= test.instance as u64; seed ^= u64::from(test.const_period) << 8; seed ^= (test.bcast_jitter_ms as u64) << 16; @@ -595,6 +595,14 @@ fn test_seed(test: &Test) -> u64 { seed } +fn seed_from_label(label: &str) -> u64 { + // Small rolling-hash multiplier; only separates deterministic test labels, + // not used for cryptographic randomness or protocol behavior. + label.bytes().fold(0_u64, |seed, byte| { + seed.wrapping_mul(131).wrapping_add(u64::from(byte)) + }) +} + /// Construct a leader election function. fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone { move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process } @@ -1797,8 +1805,12 @@ fn test_qbft_chain_split(test: ChainSplitTest) { continue; } out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); - if deterministic_unit(CHAIN_SPLIT_SEED, &msg, *target, TEST_STREAM_DUPLICATE) - < 0.1 + if deterministic_unit( + seed_from_label(CHAIN_SPLIT_SEED_LABEL), + &msg, + *target, + TEST_STREAM_DUPLICATE, + ) < 0.1 { out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); } From b22fb417801ce1d90dab9977ce2da76781e244e7 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 14:15:06 +0700 Subject: [PATCH 06/34] fix: one shot cancel when parent is cancelled --- crates/core/src/qbft/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 9cf6d163..4ee494b3 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -635,6 +635,7 @@ where let compare_ct = compare_cts.token().clone(); let msg = msg.clone(); let input_value_source_ch = input_value_source_ch.clone(); + let mut compare_parent_cancelled = false; thread::spawn(move || { (compare)( @@ -686,8 +687,9 @@ where } default(RUN_CANCELLATION_POLL_INTERVAL) => { - if ct.is_canceled() { + if !compare_parent_cancelled && ct.is_canceled() { compare_cts.cancel(); + compare_parent_cancelled = true; } } } From 081d8a47862fec96e56b644f2b6a6650989e1357 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 14:18:48 +0700 Subject: [PATCH 07/34] fix: fix make_is_leader test --- crates/core/src/qbft/internal_test.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index e2e1b509..c52ad70c 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -605,7 +605,11 @@ fn seed_from_label(label: &str) -> u64 { /// Construct a leader election function. fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone { - move |instance: &i64, round: i64, process: i64| -> bool { (instance + round) % n == process } + move |instance: &i64, round: i64, process: i64| -> bool { + // Go's helper uses 0-indexed process IDs. These Rust tests use 1..=n, + // so translate the same modulo schedule into the local process range. + ((instance + round - 1).rem_euclid(n)) + 1 == process + } } /// Returns a new message to be broadcast. @@ -998,7 +1002,11 @@ fn stagger_start_const() { fn very_delayed_value_exp() { test_qbft(Test { instance: 3, - value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]), + value_delay: HashMap::from([ + (1, Duration::from_secs(5)), + (2, Duration::from_secs(10)), + (4, Duration::from_secs(5)), + ]), decide_round: 4, ..Default::default() }); From 1a0a33700001f6a8ffa8452d1bd5a27b9e024905 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 14:24:10 +0700 Subject: [PATCH 08/34] fix: minors naming and comments --- crates/core/src/qbft/fake_clock.rs | 8 ++++++-- crates/core/src/qbft/mod.rs | 8 +++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 0a3ab477..8392e7f3 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -123,7 +123,9 @@ fn multiple_threads_timers() { clock.advance(Duration::from_secs(6)); }); - assert_eq!(vec![true, true], done_rx.try_iter().collect::>()); + let done = done_rx.try_iter().collect::>(); + assert_eq!(2, done.len()); + assert!(done.into_iter().all(|done| done)); assert_eq!(Duration::from_secs(10), clock.elapsed()); } @@ -150,7 +152,9 @@ fn multiple_threads_cancellation() { clock.cancel(); }); - assert_eq!(vec![true, true], done_rx.try_iter().collect::>()); + let done = done_rx.try_iter().collect::>(); + assert_eq!(2, done.len()); + assert!(done.into_iter().all(|done| done)); assert_eq!(Duration::ZERO, clock.elapsed()); } diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 4ee494b3..e064292d 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -33,7 +33,7 @@ use std::{ type Result = std::result::Result; -const RUN_CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1); +const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1); type CompareFn = dyn Fn( /* ct */ &CancellationToken, @@ -591,7 +591,7 @@ where broadcast_round_change()?; } - default(RUN_CANCELLATION_POLL_INTERVAL) => { + default(CANCELLATION_POLL_INTERVAL) => { if ct.is_canceled() { return Err(QbftError::ContextCanceled); } @@ -637,6 +637,8 @@ where let input_value_source_ch = input_value_source_ch.clone(); let mut compare_parent_cancelled = false; + // Detached by design, matching Charon's goroutine behavior: if a caller-provided + // compare callback ignores cancellation and never reports, it may outlive this call. thread::spawn(move || { (compare)( &compare_ct, @@ -686,7 +688,7 @@ where return (result, Err(QbftError::TimeoutError)); } - default(RUN_CANCELLATION_POLL_INTERVAL) => { + default(CANCELLATION_POLL_INTERVAL) => { if !compare_parent_cancelled && ct.is_canceled() { compare_cts.cancel(); compare_parent_cancelled = true; From 44b9a1335a25924db219d5fff47b62cb4f8433bc Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 14:48:07 +0700 Subject: [PATCH 09/34] fix: linter --- crates/core/src/qbft/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index e064292d..5c21e9cc 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -637,8 +637,9 @@ where let input_value_source_ch = input_value_source_ch.clone(); let mut compare_parent_cancelled = false; - // Detached by design, matching Charon's goroutine behavior: if a caller-provided - // compare callback ignores cancellation and never reports, it may outlive this call. + // Detached by design, matching Charon's goroutine behavior: if a + // caller-provided compare callback ignores cancellation and never reports, + // it may outlive this call. thread::spawn(move || { (compare)( &compare_ct, From cc59baf6ca395e6acdb9a75a5467ea871bfc181b Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 18:28:14 +0700 Subject: [PATCH 10/34] fix: early cancel on the loop --- crates/core/src/qbft/internal_test.rs | 52 +++++++++++++++++++++++++++ crates/core/src/qbft/mod.rs | 4 +++ 2 files changed, 56 insertions(+) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index c52ad70c..8a150546 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1329,6 +1329,58 @@ fn idle_run_returns_when_cancelled() { )); } +#[test] +fn run_cancels_under_hot_receive_stream() { + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; + + let (receive_tx, receive_rx) = mpmc::bounded::>(1024); + let transport = Transport { + receive: receive_rx, + ..noop_transport() + }; + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + let (done_tx, done_rx) = mpmc::bounded(1); + + let sender_cts = CancellationTokenSource::new(); + let sender_token = sender_cts.token().clone(); + let sender = thread::spawn(move || { + let msg = new_msg(MSG_PREPARE, 0, 1, 2, 1, 0, 0, 0, None); + while !sender_token.is_canceled() { + match receive_tx.try_send(msg.clone()) { + Ok(()) => {} + Err(mpmc::TrySendError::Full(_)) => thread::yield_now(), + Err(mpmc::TrySendError::Disconnected(_)) => break, + } + } + }); + + thread::spawn(move || { + done_tx + .send(qbft::run( + &token, &def, &transport, &0, 1, input_rx, source_rx, + )) + .expect(WRITE_CHAN_ERR); + }); + + thread::sleep(Duration::from_millis(10)); + cts.cancel(); + + assert!(matches!( + done_rx + .recv_timeout(Duration::from_millis(100)) + .expect("run must unblock on cancellation even with a hot receive stream"), + Err(QbftError::ContextCanceled) + )); + + sender_cts.cancel(); + sender.join().expect("sender thread must exit"); +} + #[test] fn classify_rules() { let mut def = noop_definition(); diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 5c21e9cc..c192d240 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -429,6 +429,10 @@ where } loop { + if ct.is_canceled() { + return Err(QbftError::ContextCanceled); + } + mpmc::select! { recv(input_value_ch) -> result => { let iv = result?; From ed82b6e6a09711696f8f8478a012f62153749c37 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 18:32:19 +0700 Subject: [PATCH 11/34] fix: context cancel in compare --- crates/core/src/qbft/internal_test.rs | 70 ++++++++++++++++++++++++++- crates/core/src/qbft/mod.rs | 15 ++++-- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 8a150546..4a759253 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1610,7 +1610,7 @@ fn compare_parent_cancel_cancels_callback_token() { thread::sleep(Duration::from_millis(1)); } token_cancelled_tx.send(()).expect(WRITE_CHAN_ERR); - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + let _ = return_err.send(Ok(())); }); let (result_tx, result_rx) = mpmc::bounded(1); @@ -1624,7 +1624,7 @@ fn compare_parent_cancel_cancels_callback_token() { cts.cancel(); match result_rx.recv_timeout(Duration::from_millis(100)) { - Ok(result) => assert!(matches!(result, (0, Ok(())))), + Ok(result) => assert!(matches!(result, (0, Err(QbftError::ContextCanceled)))), Err(err) => { let _ = timer_tx.send(time::Instant::now()); panic!("compare callback token must be canceled by parent token: {err}"); @@ -1635,6 +1635,72 @@ fn compare_parent_cancel_cancels_callback_token() { .expect("callback token must be canceled by parent token"); } +#[test] +fn run_parent_cancel_during_compare_does_not_prepare() { + const LEADER: i64 = 1; + const PROCESS: i64 = 2; + + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let msg = new_msg(MSG_PRE_PREPARE, 0, LEADER, 1, 7, 11, 0, 0, None); + let (receive_tx, receive_rx) = mpmc::bounded(1); + receive_tx.send(msg).expect(WRITE_CHAN_ERR); + + let (compare_started_tx, compare_started_rx) = mpmc::bounded(1); + let (compare_cancelled_tx, compare_cancelled_rx) = mpmc::bounded(1); + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; + def.is_leader = Box::new(|_, _, process| process == LEADER); + def.compare = Arc::new(move |ct, _, _, _, return_err, _| { + compare_started_tx.send(()).expect(WRITE_CHAN_ERR); + while !ct.is_canceled() { + thread::sleep(Duration::from_millis(1)); + } + compare_cancelled_tx.send(()).expect(WRITE_CHAN_ERR); + let _ = return_err.send(Ok(())); + }); + + let (broadcast_tx, broadcast_rx) = mpmc::bounded(1); + let transport = Transport { + broadcast: Box::new(move |_, type_, _, _, _, _, _, _, _| { + broadcast_tx.send(type_).expect(WRITE_CHAN_ERR); + Ok(()) + }), + receive: receive_rx, + }; + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + let (done_tx, done_rx) = mpmc::bounded(1); + + thread::spawn(move || { + done_tx + .send(qbft::run( + &token, &def, &transport, &0, PROCESS, input_rx, source_rx, + )) + .expect(WRITE_CHAN_ERR); + }); + + compare_started_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare must start"); + cts.cancel(); + + assert!(matches!( + done_rx + .recv_timeout(Duration::from_millis(100)) + .expect("run must return parent cancellation from compare"), + Err(QbftError::ContextCanceled) + )); + compare_cancelled_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare callback token must be canceled"); + assert!( + broadcast_rx.try_iter().all(|type_| type_ != MSG_PREPARE), + "parent cancellation during compare must not broadcast PREPARE" + ); +} + fn buffer_by_source(msgs: &[Msg]) -> HashMap>> { let mut buffer = HashMap::new(); for msg in msgs { diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index c192d240..cf43da2d 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -519,6 +519,7 @@ where broadcast_round_change()?; } + QbftError::ContextCanceled => return Err(QbftError::ContextCanceled), _ => return Err(QbftError::UnexpectedCompareError), } } @@ -639,7 +640,6 @@ where let compare_ct = compare_cts.token().clone(); let msg = msg.clone(); let input_value_source_ch = input_value_source_ch.clone(); - let mut compare_parent_cancelled = false; // Detached by design, matching Charon's goroutine behavior: if a // caller-provided compare callback ignores cancellation and never reports, @@ -656,6 +656,11 @@ where }); loop { + if ct.is_canceled() { + compare_cts.cancel(); + return (result, Err(QbftError::ContextCanceled)); + } + mpmc::select! { recv(compare_err_rx) -> msg => { let err = match msg { @@ -671,6 +676,10 @@ where } compare_cts.cancel(); + if ct.is_canceled() { + return (result, Err(QbftError::ContextCanceled)); + } + return match err { Ok(()) => (result, Ok(())), Err(_) => (result, Err(QbftError::CompareError)), @@ -694,9 +703,9 @@ where } default(CANCELLATION_POLL_INTERVAL) => { - if !compare_parent_cancelled && ct.is_canceled() { + if ct.is_canceled() { compare_cts.cancel(); - compare_parent_cancelled = true; + return (result, Err(QbftError::ContextCanceled)); } } } From 7911c23493611716d05d510d4ce30e2be27e89e9 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 18:39:21 +0700 Subject: [PATCH 12/34] fix: validate definition --- crates/core/src/qbft/internal_test.rs | 52 ++++++++++++++++++++++++--- crates/core/src/qbft/mod.rs | 24 +++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 4a759253..b8c14b0b 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1248,12 +1248,14 @@ fn duplicate_pre_prepare_rules() { 0, 0, 0, - // Justification not required since nodes and quorum both 0. + // Round 2 is accepted after round 1 records a compare failure. None, ) }; let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; def.is_leader = Box::new(|_, _, process| process == LEADER); def.log_upon_rule = Box::new(move |_, _, round, msg, upon_rule| { println!("UponRule: rule={} round={} ", upon_rule, msg.round()); @@ -1271,8 +1273,13 @@ fn duplicate_pre_prepare_rules() { panic!("unexpected round {}", round); }); - def.compare = Arc::new(|_, _, _, _, return_err, _| { - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + def.compare = Arc::new(|_, msg, _, _, return_err, _| { + let result = if msg.round() == 1 { + Err(QbftError::CompareError) + } else { + Ok(()) + }; + return_err.send(result).expect(WRITE_CHAN_ERR); }); let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); @@ -1304,7 +1311,9 @@ fn duplicate_pre_prepare_rules() { fn idle_run_returns_when_cancelled() { let cts = CancellationTokenSource::new(); let token = cts.token().clone(); - let def = noop_definition(); + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; let transport = noop_transport(); let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); @@ -1329,6 +1338,41 @@ fn idle_run_returns_when_cancelled() { )); } +#[test] +fn invalid_definition_rejected() { + let cts = CancellationTokenSource::new(); + let transport = noop_transport(); + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + + let mut def = noop_definition(); + def.nodes = 0; + def.fifo_limit = 1; + assert!(matches!( + qbft::run( + cts.token(), + &def, + &transport, + &0, + 1, + input_rx.clone(), + source_rx.clone(), + ), + Err(QbftError::InvalidDefinition( + "nodes must be greater than zero" + )) + )); + + def.nodes = 4; + def.fifo_limit = 0; + assert!(matches!( + qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx), + Err(QbftError::InvalidDefinition( + "fifo_limit must be greater than zero" + )) + )); +} + #[test] fn run_cancels_under_hot_receive_stream() { let cts = CancellationTokenSource::new(); diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index cf43da2d..3e0c6ba6 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -66,6 +66,9 @@ pub enum QbftError { #[error("Zero input value not supported")] ZeroInputValue, + #[error("Invalid definition: {0}")] + InvalidDefinition(&'static str), + #[error("Failed to read from channel: {0}")] ChannelError(#[from] mpmc::RecvError), } @@ -316,6 +319,8 @@ where V: PartialEq + Eq + Hash + Default + 'static, C: Clone + Send + Sync + Default + 'static, { + validate_definition(d)?; + // === State === let round: Cell = Cell::new(1); let input_value: RefCell = RefCell::new(Default::default()); @@ -605,6 +610,25 @@ where } } +fn validate_definition(d: &Definition) -> Result<()> +where + V: PartialEq, +{ + if d.nodes <= 0 { + return Err(QbftError::InvalidDefinition( + "nodes must be greater than zero", + )); + } + + if d.fifo_limit <= 0 { + return Err(QbftError::InvalidDefinition( + "fifo_limit must be greater than zero", + )); + } + + Ok(()) +} + /// The callback may cache the local input source and return success/failure. /// This helper only preserves that callback result and lets the round timer win /// if the callback blocks. From 829ba5f4f8a44bd0b4d4d0b45024f1a7f9eea2ac Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 18:43:51 +0700 Subject: [PATCH 13/34] fix: add check pr < r --- crates/core/src/qbft/internal_test.rs | 25 ++++++++++++++++++++++ crates/core/src/qbft/mod.rs | 30 +++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index b8c14b0b..33f296b5 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1493,6 +1493,31 @@ fn justified_qrc_j1_and_j2() { ]; assert_eq!(Some(7), contains_justified_qrc(&def, &j2, 2)); assert!(get_justified_qrc(&def, &j2, 2).unwrap().len() >= 6); + + let invalid_pr = vec![ + new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, None), + new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 2, 7, None), + new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 2, 7, None), + new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None), + ]; + assert_eq!(None, contains_justified_qrc(&def, &invalid_pr, 2)); + assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none()); +} + +#[test] +fn round_change_prepared_round_must_be_before_round() { + let mut def = noop_definition(); + def.nodes = 4; + let prepares = vec![ + new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None), + new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None), + ]; + let invalid = new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, Some(&prepares)); + + assert!(!is_justified_round_change(&def, &invalid)); } #[test] diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 3e0c6ba6..e1786459 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -913,6 +913,13 @@ where let pr = msg.prepared_round(); let pv = msg.prepared_value(); + // The IBFT paper requires ROUND-CHANGE prepared_round to be lower than the + // target round. Go core currently omits this check, but valid Charon traffic + // already satisfies it. + if !valid_round_change_prepared_round(msg) { + return false; + } + if prepares.is_empty() { return pr == 0 && pv == Default::default(); } @@ -946,6 +953,14 @@ where true } +fn valid_round_change_prepared_round(msg: &Msg) -> bool +where + V: PartialEq, +{ + let pr = msg.prepared_round(); + pr >= 0 && pr < msg.round() +} + /// Returns true if the decided message is justified by quorum COMMIT messages /// of identical round and value. fn is_justified_decided(d: &Definition, msg: &Msg) -> bool @@ -1018,6 +1033,11 @@ where if (qrc.len() as i64) < d.quorum() { return None; } + + if qrc.iter().any(|rc| !valid_round_change_prepared_round(rc)) { + return None; + } + // No need to calculate J1 or J2 for all possible combinations, // since justification should only contain one. @@ -1111,7 +1131,10 @@ where return Some(qrc); } - let round_changes = filter_round_change(all, round); + let round_changes = filter_round_change(all, round) + .into_iter() + .filter(valid_round_change_prepared_round) + .collect::>(); for prepares in get_prepare_quorums(d, all) { // See if we have quorum ROUND-CHANGE with HIGHEST_PREPARED(qrc) == @@ -1259,7 +1282,10 @@ where let null_pr = Default::default(); let null_pv = Some(&Default::default()); - let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv); + let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv) + .into_iter() + .filter(valid_round_change_prepared_round) + .collect::>(); ( justification.clone(), From d834463c68aa53f399ed26002c66aae0561a8761 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 18 May 2026 18:46:13 +0700 Subject: [PATCH 14/34] fix: test and document on run --- crates/core/src/qbft/internal_test.rs | 11 +++++++++++ crates/core/src/qbft/mod.rs | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 33f296b5..b8fd8f8a 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -2013,6 +2013,17 @@ fn test_qbft_chain_split(test: ChainSplitTest) { } recv(result_chan_rx) -> res => { let q_commit = res.expect(READ_CHAN_ERR); + if test.should_halt { + cts.cancel(); + clock.cancel(); + panic!( + "halt case unexpectedly decided: q_commit={:?} elapsed={:?}\n{}", + q_commit, + clock.elapsed(), + trace.dump() + ); + } + for commit in q_commit.clone() { for previous in results.values() { if previous.value() != commit.value() { diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index e1786459..71eb57b7 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -299,8 +299,11 @@ struct DedupKey { round: i64, } -/// Executes one QBFT consensus instance until it decides, errors, or is -/// cancelled. +/// Executes one QBFT consensus instance until it errors or is cancelled. +/// +/// Decisions are reported via `Definition::decide`. After deciding, `run` +/// remains active so it can answer later `ROUND_CHANGE` messages with `DECIDED` +/// catch-up messages. /// /// `I` identifies the consensus instance, `V` is the comparable proposed value, /// and `C` is the application value used by `Definition::compare` to compare a From 739e3b389b5b6a65cdf717498307ef4dc179e67a Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 11:04:13 +0700 Subject: [PATCH 15/34] fix: compare callback failed should timeout --- crates/core/src/qbft/internal_test.rs | 37 +++++++++++++++++++++++++++ crates/core/src/qbft/mod.rs | 8 +++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index b8fd8f8a..495de30e 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1664,6 +1664,43 @@ fn compare_timeout_does_not_wait_for_blocked_callback() { )); } +#[test] +fn compare_callback_exit_without_status_waits_for_timer() { + let cts = CancellationTokenSource::new(); + let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None); + let (_vs_tx, vs_rx) = mpmc::bounded::(1); + let (timer_tx, timer_rx) = mpmc::bounded(1); + let (callback_done_tx, callback_done_rx) = mpmc::bounded(1); + + let mut def = noop_definition(); + def.compare = Arc::new(move |_, _, _, _, _, _| { + callback_done_tx.send(()).expect(WRITE_CHAN_ERR); + }); + + let (result_tx, result_rx) = mpmc::bounded(1); + thread::spawn(move || { + result_tx + .send(compare(cts.token(), &def, &msg, &vs_rx, 0, &timer_rx)) + .expect(WRITE_CHAN_ERR); + }); + + callback_done_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare callback must exit"); + assert!( + result_rx.try_recv().is_err(), + "compare must wait for timer/cancel if callback exits without status" + ); + + timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); + assert!(matches!( + result_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare must return after timer fires"), + (0, Err(QbftError::TimeoutError)) + )); +} + #[test] fn compare_parent_cancel_cancels_callback_token() { let cts = CancellationTokenSource::new(); diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 71eb57b7..0c53de21 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -648,7 +648,7 @@ where V: PartialEq + 'static, C: Clone + Send + Sync + 'static, { - let (compare_err_tx, compare_err_rx) = mpmc::bounded::>(1); + let (compare_err_tx, mut compare_err_rx) = mpmc::bounded::>(1); let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::(1); // d.Compare has 2 roles: @@ -692,9 +692,9 @@ where recv(compare_err_rx) -> msg => { let err = match msg { Ok(err) => err, - Err(err) => { - compare_cts.cancel(); - return (result, Err(QbftError::ChannelError(err))); + Err(_) => { + compare_err_rx = mpmc::never(); + continue; } }; From 27744cff60b58a851631d9aec036618765536213 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 11:13:01 +0700 Subject: [PATCH 16/34] fix: test use timeout channel instead of sleep --- crates/core/src/qbft/internal_test.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 495de30e..08dd1960 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1708,10 +1708,12 @@ fn compare_parent_cancel_cancels_callback_token() { let msg = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 11, 0, 0, None); let (_vs_tx, vs_rx) = mpmc::bounded::(1); let (timer_tx, timer_rx) = mpmc::bounded(1); + let (compare_started_tx, compare_started_rx) = mpmc::bounded(1); let (token_cancelled_tx, token_cancelled_rx) = mpmc::bounded(1); let mut def = noop_definition(); def.compare = Arc::new(move |ct, _, _, _, return_err, _| { + compare_started_tx.send(()).expect(WRITE_CHAN_ERR); while !ct.is_canceled() { thread::sleep(Duration::from_millis(1)); } @@ -1726,7 +1728,9 @@ fn compare_parent_cancel_cancels_callback_token() { .expect(WRITE_CHAN_ERR); }); - thread::sleep(Duration::from_millis(10)); + compare_started_rx + .recv_timeout(Duration::from_millis(100)) + .expect("compare must start"); cts.cancel(); match result_rx.recv_timeout(Duration::from_millis(100)) { From 0bd6a4f1ae8639e3f2f651d11cc5289f3a349235 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 11:31:20 +0700 Subject: [PATCH 17/34] fix: add comment on cancellation poll interval --- crates/core/src/qbft/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 0c53de21..3e5c2062 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -33,7 +33,10 @@ use std::{ type Result = std::result::Result; -const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(1); +// The `cancellation` crate is callback-based, not channel-based, so it cannot +// be used directly in `crossbeam::select!`. Keep polling coarse: QBFT shutdown +// does not need sub-millisecond latency, and idle instances should stay cheap. +const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(50); type CompareFn = dyn Fn( /* ct */ &CancellationToken, From e988c35f4c14899f2582d412d5f9206eaada1057 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 11:36:35 +0700 Subject: [PATCH 18/34] fix: use enum for invalid defnintion error --- crates/core/src/qbft/internal_test.rs | 8 ++------ crates/core/src/qbft/mod.rs | 17 +++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 08dd1960..29f44138 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1358,18 +1358,14 @@ fn invalid_definition_rejected() { input_rx.clone(), source_rx.clone(), ), - Err(QbftError::InvalidDefinition( - "nodes must be greater than zero" - )) + Err(QbftError::InvalidNodes { nodes: 0 }) )); def.nodes = 4; def.fifo_limit = 0; assert!(matches!( qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx), - Err(QbftError::InvalidDefinition( - "fifo_limit must be greater than zero" - )) + Err(QbftError::InvalidFifoLimit { fifo_limit: 0 }) )); } diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 3e5c2062..071823c2 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -69,8 +69,11 @@ pub enum QbftError { #[error("Zero input value not supported")] ZeroInputValue, - #[error("Invalid definition: {0}")] - InvalidDefinition(&'static str), + #[error("invalid node count: must be greater than zero, got {nodes}")] + InvalidNodes { nodes: i64 }, + + #[error("invalid FIFO limit: must be greater than zero, got {fifo_limit}")] + InvalidFifoLimit { fifo_limit: i64 }, #[error("Failed to read from channel: {0}")] ChannelError(#[from] mpmc::RecvError), @@ -621,15 +624,13 @@ where V: PartialEq, { if d.nodes <= 0 { - return Err(QbftError::InvalidDefinition( - "nodes must be greater than zero", - )); + return Err(QbftError::InvalidNodes { nodes: d.nodes }); } if d.fifo_limit <= 0 { - return Err(QbftError::InvalidDefinition( - "fifo_limit must be greater than zero", - )); + return Err(QbftError::InvalidFifoLimit { + fifo_limit: d.fifo_limit, + }); } Ok(()) From 6c65b3291d80af3cefc00f3d4d2e367041b8ccd0 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 11:59:03 +0700 Subject: [PATCH 19/34] fix: add more test on check valid round --- crates/core/src/qbft/internal_test.rs | 126 ++++++++++++++++++++++++-- crates/core/src/qbft/mod.rs | 9 +- 2 files changed, 122 insertions(+), 13 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 29f44138..4226c89a 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -654,6 +654,22 @@ fn new_msg( }) } +fn new_prepare_quorum(round: i64, value: i64) -> Vec { + (1..=3) + .map(|source| new_msg(MSG_PREPARE, 0, source, round, value, 0, 0, 0, None)) + .collect() +} + +fn new_round_change(source: i64, round: i64, pr: i64, pv: i64) -> TestMsgRef { + new_msg(MSG_ROUND_CHANGE, 0, source, round, 0, 0, pr, pv, None) +} + +fn new_round_change_quorum(round: i64, pr: i64, pv: i64) -> Vec { + (1..=3) + .map(|source| new_round_change(source, round, pr, pv)) + .collect() +} + // Delays the message broadcast by between 1x and 2x jitter_ms and drops // messages. fn bcast( @@ -1503,17 +1519,111 @@ fn justified_qrc_j1_and_j2() { } #[test] -fn round_change_prepared_round_must_be_before_round() { +fn valid_round_change_prepared_round_boundaries() { + let cases = [ + ("negative", 2, -1, false), + ("null at round one", 1, 0, true), + ("previous round", 2, 1, true), + ("current round", 2, 2, false), + ("future round", 2, 3, false), + ]; + + for (name, round, prepared_round, expected) in cases { + let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None); + assert_eq!(expected, valid_round_change_prepared_round(&msg), "{name}"); + } +} + +#[test] +fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() { let mut def = noop_definition(); def.nodes = 4; - let prepares = vec![ - new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None), - ]; - let invalid = new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, Some(&prepares)); + let target_round = 2; + let valid_prepared_round = 1; + let value = 7; + let prepares = new_prepare_quorum(valid_prepared_round, value); + let valid_round_changes = new_round_change_quorum(target_round, valid_prepared_round, value); + + for (name, invalid_pr) in [ + ("negative", -1), + ("current round", target_round), + ("future round", target_round + 1), + ] { + let invalid_prepares = new_prepare_quorum(invalid_pr, value); + let invalid_round_change = new_msg( + MSG_ROUND_CHANGE, + 0, + 1, + target_round, + 0, + 0, + invalid_pr, + value, + Some(&invalid_prepares), + ); + assert!( + !is_justified_round_change(&def, &invalid_round_change), + "is_justified_round_change must reject {name} prepared_round" + ); + + let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value); + only_invalid.extend(invalid_prepares); + assert_eq!( + None, + contains_justified_qrc(&def, &only_invalid, target_round), + "contains_justified_qrc must not count {name} prepared_round" + ); + assert!( + get_justified_qrc(&def, &only_invalid, target_round).is_none(), + "get_justified_qrc must not count {name} prepared_round" + ); + + let mut with_invalid_extra = valid_round_changes.clone(); + with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value)); + with_invalid_extra.extend(prepares.clone()); + assert_eq!( + Some(value), + contains_justified_qrc(&def, &with_invalid_extra, target_round), + "contains_justified_qrc must ignore extra {name} prepared_round" + ); + let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round) + .expect("valid quorum must remain after filtering invalid prepared_round"); + assert!( + qrc.iter() + .filter(|msg| msg.type_() == MSG_ROUND_CHANGE) + .all(valid_round_change_prepared_round), + "get_justified_qrc must filter extra {name} prepared_round" + ); + } +} - assert!(!is_justified_round_change(&def, &invalid)); +#[test] +fn quorum_null_prepared_filters_invalid_prepared_rounds() { + let mut def = noop_definition(); + def.nodes = 4; + + let valid = new_round_change_quorum(1, 0, 0); + let (qrc, ok) = quorum_null_prepared(&def, &valid, 1); + assert!(ok); + assert_eq!(3, qrc.len()); + + for (name, round, invalid_pr) in [ + ("negative", 1, -1), + ("current round", 1, 1), + ("future round", 1, 2), + ("zero round", 0, 0), + ] { + let invalid = new_round_change_quorum(round, invalid_pr, 0); + let (qrc, ok) = quorum_null_prepared(&def, &invalid, round); + assert!( + !ok, + "quorum_null_prepared must reject {name} prepared_round" + ); + assert!( + qrc.is_empty(), + "quorum_null_prepared must filter {name} prepared_round" + ); + } } #[test] diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 071823c2..54b77949 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -1036,15 +1036,14 @@ fn contains_justified_qrc( where V: Eq + Hash + Default, { - let qrc = filter_round_change(justification, round); + let qrc = filter_round_change(justification, round) + .into_iter() + .filter(valid_round_change_prepared_round) + .collect::>(); if (qrc.len() as i64) < d.quorum() { return None; } - if qrc.iter().any(|rc| !valid_round_change_prepared_round(rc)) { - return None; - } - // No need to calculate J1 or J2 for all possible combinations, // since justification should only contain one. From b30e616aa0b79a9592d389d912d14eecf242f3a8 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 12:10:03 +0700 Subject: [PATCH 20/34] fix: using test-case --- crates/core/src/qbft/internal_test.rs | 553 ++++++++++---------------- 1 file changed, 205 insertions(+), 348 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 4226c89a..38409f61 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -9,6 +9,7 @@ use std::{ thread, time::Duration, }; +use test_case::test_case; const WRITE_CHAN_ERR: &str = "Failed to write to channel"; const READ_CHAN_ERR: &str = "Failed to read from channel"; @@ -899,107 +900,66 @@ impl SomeMsg for TestMsg { } } -#[test] -fn happy_0() { +#[test_case(0 ; "happy_0")] +#[test_case(1 ; "happy_1")] +fn happy(instance: i64) { test_qbft(Test { - instance: 0, - decide_round: 1, - ..Default::default() - }); -} - -#[test] -fn happy_1() { - test_qbft(Test { - instance: 1, + instance, decide_round: 1, ..Default::default() }); } -#[test] -fn prepare_round_1_decide_round_2() { - test_qbft(Test { - instance: 0, - commits_after: 1, - decide_round: 2, - prepared_val: 1, - ..Default::default() - }); -} - -#[test] -fn prepare_round_2_decide_round_3() { - test_qbft(Test { - instance: 0, - commits_after: 2, - value_delay: HashMap::from([(1, Duration::from_secs(2))]), - decide_round: 3, - prepared_val: 2, - const_period: true, - ..Default::default() - }); -} - -#[test] -fn leader_late_exp() { +#[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")] +#[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")] +fn prepare_round( + commits_after: i32, + value_delay_secs: Option, + decide_round: i32, + prepared_val: i32, + const_period: bool, +) { test_qbft(Test { instance: 0, - start_delay: HashMap::from([(1, Duration::from_secs(2))]), - decide_round: 2, + commits_after, + value_delay: value_delay_secs + .map(|secs| HashMap::from([(1, Duration::from_secs(secs))])) + .unwrap_or_default(), + decide_round, + prepared_val, + const_period, ..Default::default() }); } -#[test] -fn leader_down_const() { +#[test_case(false ; "leader_late_exp")] +#[test_case(true ; "leader_down_const")] +fn delayed_leader_start(const_period: bool) { test_qbft(Test { instance: 0, start_delay: HashMap::from([(1, Duration::from_secs(2))]), - const_period: true, decide_round: 2, + const_period, ..Default::default() }); } -#[test] -fn very_late_exp() { - test_qbft(Test { - instance: 3, - start_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]), - decide_round: 4, - ..Default::default() - }); -} - -#[test] -fn very_late_const() { +#[test_case(3, false, 4, false ; "very_late_exp")] +#[test_case(1, true, 0, true ; "very_late_const")] +fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) { test_qbft(Test { - instance: 1, + instance, start_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]), - const_period: true, - random_round: true, - ..Default::default() - }); -} - -#[test] -fn stagger_start_exp() { - test_qbft(Test { - instance: 0, - start_delay: HashMap::from([ - (1, Duration::from_secs(0)), - (2, Duration::from_secs(1)), - (3, Duration::from_secs(2)), - (4, Duration::from_secs(3)), - ]), - random_round: true, // Takes 1 or 2 rounds. + decide_round, + const_period, + random_round, ..Default::default() }); } -#[test] -fn stagger_start_const() { +#[test_case(false ; "stagger_start_exp")] +#[test_case(true ; "stagger_start_const")] +fn stagger_start(const_period: bool) { test_qbft(Test { instance: 0, start_delay: HashMap::from([ @@ -1008,54 +968,40 @@ fn stagger_start_const() { (3, Duration::from_secs(2)), (4, Duration::from_secs(3)), ]), - const_period: true, + const_period, random_round: true, // Takes 1 or 2 rounds. ..Default::default() }); } -#[test] -fn very_delayed_value_exp() { - test_qbft(Test { - instance: 3, - value_delay: HashMap::from([ - (1, Duration::from_secs(5)), - (2, Duration::from_secs(10)), - (4, Duration::from_secs(5)), - ]), - decide_round: 4, - ..Default::default() - }); -} - -#[test] -fn very_delayed_value_const() { - test_qbft(Test { - instance: 1, - value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]), - const_period: true, - random_round: true, - ..Default::default() - }); -} +#[test_case(3, true, false, 4, false ; "very_delayed_value_exp")] +#[test_case(1, false, true, 0, true ; "very_delayed_value_const")] +fn very_delayed_value( + instance: i64, + include_process_4: bool, + const_period: bool, + decide_round: i32, + random_round: bool, +) { + let mut value_delay = + HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]); + if include_process_4 { + value_delay.insert(4, Duration::from_secs(5)); + } -#[test] -fn stagger_delayed_value_exp() { test_qbft(Test { - instance: 0, - value_delay: HashMap::from([ - (1, Duration::from_secs(0)), - (2, Duration::from_secs(1)), - (3, Duration::from_secs(2)), - (4, Duration::from_secs(3)), - ]), - random_round: true, + instance, + value_delay, + const_period, + decide_round, + random_round, ..Default::default() }); } -#[test] -fn stagger_delayed_value_const() { +#[test_case(false ; "stagger_delayed_value_exp")] +#[test_case(true ; "stagger_delayed_value_const")] +fn stagger_delayed_value(const_period: bool) { test_qbft(Test { instance: 0, value_delay: HashMap::from([ @@ -1064,7 +1010,7 @@ fn stagger_delayed_value_const() { (3, Duration::from_secs(2)), (4, Duration::from_secs(3)), ]), - const_period: true, + const_period, random_round: true, ..Default::default() }); @@ -1082,80 +1028,53 @@ fn round1_leader_no_value_round2_leader_offline() { }); } -#[test] -fn jitter_500ms_exp() { - test_qbft(Test { - instance: 3, - bcast_jitter_ms: 500, - random_round: true, - ..Default::default() - }); -} - -#[test] -fn jitter_200ms_const() { +#[test_case(500, false ; "jitter_500ms_exp")] +#[test_case(200, true ; "jitter_200ms_const")] +fn jitter(bcast_jitter_ms: i32, const_period: bool) { test_qbft(Test { instance: 3, - bcast_jitter_ms: 200, // 0.2-0.4s network delay * 3msgs/round == 0.6-1.2s delay per 1s round - const_period: true, - random_round: true, - ..Default::default() - }); -} - -#[test] -fn drop_10_percent_const() { - test_qbft(Test { - instance: 1, - drop_prob: HashMap::from([(1, 0.1), (2, 0.1), (3, 0.1), (4, 0.1)]), - const_period: true, + bcast_jitter_ms, + const_period, random_round: true, ..Default::default() }); } -#[test] -fn drop_30_percent_const() { - test_qbft(Test { - instance: 1, - drop_prob: HashMap::from([(1, 0.3), (2, 0.3), (3, 0.3), (4, 0.3)]), - const_period: true, - random_round: true, - ..Default::default() - }); -} - -#[test] -fn fuzz() { - test_qbft(Test { - instance: 1, - fuzz: true, - const_period: true, - decide_round: 1, - ..Default::default() - }); -} - -#[test] -fn fuzz_with_late_leader() { +#[test_case(0.1 ; "drop_10_percent_const")] +#[test_case(0.3 ; "drop_30_percent_const")] +fn dropped_messages(drop_probability: f64) { test_qbft(Test { instance: 1, - fuzz: true, - start_delay: HashMap::from([(1, Duration::from_secs(2)), (2, Duration::from_secs(2))]), + drop_prob: HashMap::from([ + (1, drop_probability), + (2, drop_probability), + (3, drop_probability), + (4, drop_probability), + ]), const_period: true, random_round: true, ..Default::default() }); } -#[test] -fn fuzz_with_very_late_leader() { +#[test_case(None, 1, false ; "fuzz")] +#[test_case(Some(2), 0, true ; "fuzz_with_late_leader")] +#[test_case(Some(10), 0, true ; "fuzz_with_very_late_leader")] +fn fuzzed(start_delay_secs: Option, decide_round: i32, random_round: bool) { test_qbft(Test { instance: 1, fuzz: true, - start_delay: HashMap::from([(1, Duration::from_secs(10)), (2, Duration::from_secs(10))]), + start_delay: start_delay_secs + .map(|secs| { + HashMap::from([ + (1, Duration::from_secs(secs)), + (2, Duration::from_secs(secs)), + ]) + }) + .unwrap_or_default(), const_period: true, - random_round: true, + decide_round, + random_round, ..Default::default() }); } @@ -1181,41 +1100,35 @@ fn noop_transport() -> Transport { } } -#[test] -fn formulas() { - let expected = [ - (1, 1, 0), - (2, 2, 0), - (3, 2, 0), - (4, 3, 1), - (5, 4, 1), - (6, 4, 1), - (7, 5, 2), - (8, 6, 2), - (9, 6, 2), - (10, 7, 3), - (11, 8, 3), - (12, 8, 3), - (13, 9, 4), - (14, 10, 4), - (15, 10, 4), - (16, 11, 5), - (17, 12, 5), - (18, 12, 5), - (19, 13, 6), - (20, 14, 6), - (21, 14, 6), - (22, 15, 7), - ]; - - for (n, q, f) in expected { - let d = Definition:: { - nodes: n, - ..noop_definition() - }; - assert_eq!(q, d.quorum(), "Quorum given N={n}"); - assert_eq!(f, d.faulty(), "Faulty given N={n}"); - } +#[test_case(1, 1, 0 ; "n1")] +#[test_case(2, 2, 0 ; "n2")] +#[test_case(3, 2, 0 ; "n3")] +#[test_case(4, 3, 1 ; "n4")] +#[test_case(5, 4, 1 ; "n5")] +#[test_case(6, 4, 1 ; "n6")] +#[test_case(7, 5, 2 ; "n7")] +#[test_case(8, 6, 2 ; "n8")] +#[test_case(9, 6, 2 ; "n9")] +#[test_case(10, 7, 3 ; "n10")] +#[test_case(11, 8, 3 ; "n11")] +#[test_case(12, 8, 3 ; "n12")] +#[test_case(13, 9, 4 ; "n13")] +#[test_case(14, 10, 4 ; "n14")] +#[test_case(15, 10, 4 ; "n15")] +#[test_case(16, 11, 5 ; "n16")] +#[test_case(17, 12, 5 ; "n17")] +#[test_case(18, 12, 5 ; "n18")] +#[test_case(19, 13, 6 ; "n19")] +#[test_case(20, 14, 6 ; "n20")] +#[test_case(21, 14, 6 ; "n21")] +#[test_case(22, 15, 7 ; "n22")] +fn formulas(n: i64, q: i64, f: i64) { + let d = Definition:: { + nodes: n, + ..noop_definition() + }; + assert_eq!(q, d.quorum(), "Quorum given N={n}"); + assert_eq!(f, d.faulty(), "Faulty given N={n}"); } #[test] @@ -1354,35 +1267,27 @@ fn idle_run_returns_when_cancelled() { )); } -#[test] -fn invalid_definition_rejected() { +#[test_case(0, 1, true ; "invalid_nodes")] +#[test_case(4, 0, false ; "invalid_fifo_limit")] +fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) { let cts = CancellationTokenSource::new(); let transport = noop_transport(); let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); let mut def = noop_definition(); - def.nodes = 0; - def.fifo_limit = 1; - assert!(matches!( - qbft::run( - cts.token(), - &def, - &transport, - &0, - 1, - input_rx.clone(), - source_rx.clone(), - ), - Err(QbftError::InvalidNodes { nodes: 0 }) - )); - - def.nodes = 4; - def.fifo_limit = 0; - assert!(matches!( - qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx), - Err(QbftError::InvalidFifoLimit { fifo_limit: 0 }) - )); + def.nodes = nodes; + def.fifo_limit = fifo_limit; + + let result = qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx); + if invalid_nodes { + assert!(matches!(result, Err(QbftError::InvalidNodes { nodes: 0 }))); + } else { + assert!(matches!( + result, + Err(QbftError::InvalidFifoLimit { fifo_limit: 0 }) + )); + } } #[test] @@ -1518,24 +1423,20 @@ fn justified_qrc_j1_and_j2() { assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none()); } -#[test] -fn valid_round_change_prepared_round_boundaries() { - let cases = [ - ("negative", 2, -1, false), - ("null at round one", 1, 0, true), - ("previous round", 2, 1, true), - ("current round", 2, 2, false), - ("future round", 2, 3, false), - ]; - - for (name, round, prepared_round, expected) in cases { - let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None); - assert_eq!(expected, valid_round_change_prepared_round(&msg), "{name}"); - } +#[test_case(2, -1, false ; "negative")] +#[test_case(1, 0, true ; "null_at_round_one")] +#[test_case(2, 1, true ; "previous_round")] +#[test_case(2, 2, false ; "current_round")] +#[test_case(2, 3, false ; "future_round")] +fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64, expected: bool) { + let msg = new_msg(MSG_ROUND_CHANGE, 0, 1, round, 0, 0, prepared_round, 7, None); + assert_eq!(expected, valid_round_change_prepared_round(&msg)); } -#[test] -fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() { +#[test_case(-1 ; "negative")] +#[test_case(2 ; "current_round")] +#[test_case(3 ; "future_round")] +fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr: i64) { let mut def = noop_definition(); def.nodes = 4; let target_round = 2; @@ -1544,61 +1445,49 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites() { let prepares = new_prepare_quorum(valid_prepared_round, value); let valid_round_changes = new_round_change_quorum(target_round, valid_prepared_round, value); - for (name, invalid_pr) in [ - ("negative", -1), - ("current round", target_round), - ("future round", target_round + 1), - ] { - let invalid_prepares = new_prepare_quorum(invalid_pr, value); - let invalid_round_change = new_msg( - MSG_ROUND_CHANGE, - 0, - 1, - target_round, - 0, - 0, - invalid_pr, - value, - Some(&invalid_prepares), - ); - assert!( - !is_justified_round_change(&def, &invalid_round_change), - "is_justified_round_change must reject {name} prepared_round" - ); - - let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value); - only_invalid.extend(invalid_prepares); - assert_eq!( - None, - contains_justified_qrc(&def, &only_invalid, target_round), - "contains_justified_qrc must not count {name} prepared_round" - ); - assert!( - get_justified_qrc(&def, &only_invalid, target_round).is_none(), - "get_justified_qrc must not count {name} prepared_round" - ); - - let mut with_invalid_extra = valid_round_changes.clone(); - with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value)); - with_invalid_extra.extend(prepares.clone()); - assert_eq!( - Some(value), - contains_justified_qrc(&def, &with_invalid_extra, target_round), - "contains_justified_qrc must ignore extra {name} prepared_round" - ); - let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round) - .expect("valid quorum must remain after filtering invalid prepared_round"); - assert!( - qrc.iter() - .filter(|msg| msg.type_() == MSG_ROUND_CHANGE) - .all(valid_round_change_prepared_round), - "get_justified_qrc must filter extra {name} prepared_round" - ); - } + let invalid_prepares = new_prepare_quorum(invalid_pr, value); + let invalid_round_change = new_msg( + MSG_ROUND_CHANGE, + 0, + 1, + target_round, + 0, + 0, + invalid_pr, + value, + Some(&invalid_prepares), + ); + assert!(!is_justified_round_change(&def, &invalid_round_change)); + + let mut only_invalid = new_round_change_quorum(target_round, invalid_pr, value); + only_invalid.extend(invalid_prepares); + assert_eq!( + None, + contains_justified_qrc(&def, &only_invalid, target_round) + ); + assert!(get_justified_qrc(&def, &only_invalid, target_round).is_none()); + + let mut with_invalid_extra = valid_round_changes.clone(); + with_invalid_extra.push(new_round_change(4, target_round, invalid_pr, value)); + with_invalid_extra.extend(prepares.clone()); + assert_eq!( + Some(value), + contains_justified_qrc(&def, &with_invalid_extra, target_round) + ); + let qrc = get_justified_qrc(&def, &with_invalid_extra, target_round) + .expect("valid quorum must remain after filtering invalid prepared_round"); + assert!( + qrc.iter() + .filter(|msg| msg.type_() == MSG_ROUND_CHANGE) + .all(valid_round_change_prepared_round) + ); } -#[test] -fn quorum_null_prepared_filters_invalid_prepared_rounds() { +#[test_case(1, -1 ; "negative")] +#[test_case(1, 1 ; "current_round")] +#[test_case(1, 2 ; "future_round")] +#[test_case(0, 0 ; "zero_round")] +fn quorum_null_prepared_filters_invalid_prepared_rounds(round: i64, invalid_pr: i64) { let mut def = noop_definition(); def.nodes = 4; @@ -1607,23 +1496,10 @@ fn quorum_null_prepared_filters_invalid_prepared_rounds() { assert!(ok); assert_eq!(3, qrc.len()); - for (name, round, invalid_pr) in [ - ("negative", 1, -1), - ("current round", 1, 1), - ("future round", 1, 2), - ("zero round", 0, 0), - ] { - let invalid = new_round_change_quorum(round, invalid_pr, 0); - let (qrc, ok) = quorum_null_prepared(&def, &invalid, round); - assert!( - !ok, - "quorum_null_prepared must reject {name} prepared_round" - ); - assert!( - qrc.is_empty(), - "quorum_null_prepared must filter {name} prepared_round" - ); - } + let invalid = new_round_change_quorum(round, invalid_pr, 0); + let (qrc, ok) = quorum_null_prepared(&def, &invalid, round); + assert!(!ok); + assert!(qrc.is_empty()); } #[test] @@ -1936,43 +1812,24 @@ struct ChainSplitTest { should_halt: bool, } -#[test] -fn chain_split_same_value() { - test_qbft_chain_split(ChainSplitTest { - decide_round: 1, - value_source: HashMap::from([(1, 1), (2, 1), (3, 1), (4, 1)]), - prepared_val: 1, - should_halt: false, - }); -} - -#[test] -fn chain_split_non_leader_peer_has_different_value() { - test_qbft_chain_split(ChainSplitTest { - decide_round: 1, - value_source: HashMap::from([(1, 1), (2, 3), (3, 1), (4, 1)]), - prepared_val: 1, - should_halt: false, - }); -} - -#[test] -fn chain_split_first_leader_has_different_value_second_leader_succeeds() { - test_qbft_chain_split(ChainSplitTest { - decide_round: 2, - value_source: HashMap::from([(1, 3), (2, 1), (3, 1), (4, 1)]), - prepared_val: 1, - should_halt: false, - }); -} - -#[test] -fn zz_chain_split_no_consensus_halt() { +#[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")] +#[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")] +#[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")] +#[test_case(1, 1, 3, 3, 0, 0, true ; "zz_no_consensus_halt")] +fn chain_split( + value_1: i64, + value_2: i64, + value_3: i64, + value_4: i64, + decide_round: i32, + prepared_val: i32, + should_halt: bool, +) { test_qbft_chain_split(ChainSplitTest { - decide_round: 0, - value_source: HashMap::from([(1, 1), (2, 1), (3, 3), (4, 3)]), - prepared_val: 0, - should_halt: true, + decide_round, + value_source: HashMap::from([(1, value_1), (2, value_2), (3, value_3), (4, value_4)]), + prepared_val, + should_halt, }); } From 4a364dff40811433efe1604c1abc11aea7ee0e0a Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 13:32:36 +0700 Subject: [PATCH 21/34] fix: small fixes --- crates/core/src/qbft/fake_clock.rs | 1 + crates/core/src/qbft/internal_test.rs | 2 +- crates/core/src/qbft/mod.rs | 12 +++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 8392e7f3..75d07ef6 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -92,6 +92,7 @@ impl FakeClock { inner.now - inner.start } + /// Explicit cleanup; dropping one clone must not cancel timers owned by other clones. pub fn cancel(&self) { let mut inner = self.inner.lock().unwrap(); inner.clients.clear(); diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 38409f61..1ed16e11 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1815,7 +1815,7 @@ struct ChainSplitTest { #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")] #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")] #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")] -#[test_case(1, 1, 3, 3, 0, 0, true ; "zz_no_consensus_halt")] +#[test_case(1, 1, 3, 3, 0, 0, true ; "no_consensus_halt")] fn chain_split( value_1: i64, value_2: i64, diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 54b77949..2537611d 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -57,8 +57,8 @@ pub enum QbftError { #[error("Compare leader value with local value failed")] CompareError, - #[error("bug: expected only comparison or timeout error")] - UnexpectedCompareError, + #[error("bug: expected only comparison or timeout error, got {0}")] + UnexpectedCompareError(Box), #[error("context canceled")] ContextCanceled, @@ -534,7 +534,11 @@ where broadcast_round_change()?; } QbftError::ContextCanceled => return Err(QbftError::ContextCanceled), - _ => return Err(QbftError::UnexpectedCompareError), + _ => { + return Err(QbftError::UnexpectedCompareError(Box::new( + qbft_err, + ))); + } } } } @@ -1288,6 +1292,8 @@ where let null_pr = Default::default(); let null_pv = Some(&Default::default()); + // Normal callers pass round >= 1, which makes pr=0 valid. Keep the explicit + // prepared-round filter so this helper remains safe for direct/future calls. let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv) .into_iter() .filter(valid_round_change_prepared_round) From e759a81c8112593f68701a78affe82e260c1e07d Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 13:38:42 +0700 Subject: [PATCH 22/34] fix: linter --- crates/core/src/qbft/fake_clock.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 75d07ef6..2d6c239d 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -92,7 +92,8 @@ impl FakeClock { inner.now - inner.start } - /// Explicit cleanup; dropping one clone must not cancel timers owned by other clones. + /// Explicit cleanup; dropping one clone must not cancel timers owned by + /// other clones. pub fn cancel(&self) { let mut inner = self.inner.lock().unwrap(); inner.clients.clear(); From 998b9ce0bc3e449a29944dfaa698c93f6a291253 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 14:59:02 +0700 Subject: [PATCH 23/34] fix: remove unnecessary filter --- crates/core/src/qbft/internal_test.rs | 3 +-- crates/core/src/qbft/mod.rs | 7 +------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 1ed16e11..af4cd57d 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1486,8 +1486,7 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr: #[test_case(1, -1 ; "negative")] #[test_case(1, 1 ; "current_round")] #[test_case(1, 2 ; "future_round")] -#[test_case(0, 0 ; "zero_round")] -fn quorum_null_prepared_filters_invalid_prepared_rounds(round: i64, invalid_pr: i64) { +fn quorum_null_prepared_requires_null_prepared_rounds(round: i64, invalid_pr: i64) { let mut def = noop_definition(); def.nodes = 4; diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 2537611d..956f17e8 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -1292,12 +1292,7 @@ where let null_pr = Default::default(); let null_pv = Some(&Default::default()); - // Normal callers pass round >= 1, which makes pr=0 valid. Keep the explicit - // prepared-round filter so this helper remains safe for direct/future calls. - let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv) - .into_iter() - .filter(valid_round_change_prepared_round) - .collect::>(); + let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv); ( justification.clone(), From 0451f2030c14c69a2101e90458e25b32eb780093 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 23:42:51 +0700 Subject: [PATCH 24/34] fix: make_is_leader now 0-based --- crates/core/src/qbft/internal_test.rs | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index af4cd57d..8b8aa42d 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -607,9 +607,7 @@ fn seed_from_label(label: &str) -> u64 { /// Construct a leader election function. fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone { move |instance: &i64, round: i64, process: i64| -> bool { - // Go's helper uses 0-indexed process IDs. These Rust tests use 1..=n, - // so translate the same modulo schedule into the local process range. - ((instance + round - 1).rem_euclid(n)) + 1 == process + (instance + round).rem_euclid(n) == process } } @@ -974,24 +972,12 @@ fn stagger_start(const_period: bool) { }); } -#[test_case(3, true, false, 4, false ; "very_delayed_value_exp")] -#[test_case(1, false, true, 0, true ; "very_delayed_value_const")] -fn very_delayed_value( - instance: i64, - include_process_4: bool, - const_period: bool, - decide_round: i32, - random_round: bool, -) { - let mut value_delay = - HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]); - if include_process_4 { - value_delay.insert(4, Duration::from_secs(5)); - } - +#[test_case(3, false, 4, false ; "very_delayed_value_exp")] +#[test_case(1, true, 0, true ; "very_delayed_value_const")] +fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, random_round: bool) { test_qbft(Test { instance, - value_delay, + value_delay: HashMap::from([(1, Duration::from_secs(5)), (2, Duration::from_secs(10))]), const_period, decide_round, random_round, From e0e51f78b21ead3047bb1cfcd669118e531db508 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 00:11:31 +0700 Subject: [PATCH 25/34] fix: add cancelled state for fake clock --- crates/core/src/qbft/fake_clock.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 2d6c239d..686b549c 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -15,6 +15,7 @@ struct FakeClockInner { start: Instant, now: Instant, last_id: usize, + cancelled: bool, clients: HashMap, Instant)>, } @@ -25,6 +26,7 @@ impl FakeClock { start: now, now, last_id: 1, + cancelled: false, clients: Default::default(), })), } @@ -41,6 +43,10 @@ impl FakeClock { let client_id = { let mut inner = self.inner.lock().unwrap(); + if inner.cancelled { + return (rx, Box::new(|| {})); + } + let id = inner.last_id; let deadline = inner.now + duration; @@ -92,10 +98,11 @@ impl FakeClock { inner.now - inner.start } - /// Explicit cleanup; dropping one clone must not cancel timers owned by - /// other clones. + /// Explicit terminal cleanup; do not reintroduce `Drop`, since dropping one + /// clone must not cancel timers owned by other clones. pub fn cancel(&self) { let mut inner = self.inner.lock().unwrap(); + inner.cancelled = true; inner.clients.clear(); } } @@ -160,6 +167,20 @@ fn multiple_threads_cancellation() { assert_eq!(Duration::ZERO, clock.elapsed()); } +#[test] +fn timer_created_after_cancel_is_closed() { + let clock = FakeClock::new(Instant::now()); + clock.cancel(); + + let (ch, cancel) = clock.new_timer(Duration::from_secs(5)); + + assert!(matches!( + ch.try_recv(), + Err(mpmc::TryRecvError::Disconnected) + )); + cancel(); +} + #[test] fn cancel_one_timer_only() { let clock = FakeClock::new(Instant::now()); From 6cc22da8e7566ea3ebac42f99586099e29ed7c6f Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 00:52:33 +0700 Subject: [PATCH 26/34] fix: flanky tests by ordering and add small settle window time --- crates/core/src/qbft/fake_clock.rs | 8 +++++--- crates/core/src/qbft/internal_test.rs | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 686b549c..d03063c2 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -1,6 +1,6 @@ use crossbeam::channel as mpmc; use std::{ - collections::HashMap, + collections::BTreeMap, sync::{Arc, Mutex}, thread, time::{Duration, Instant}, @@ -16,7 +16,7 @@ struct FakeClockInner { now: Instant, last_id: usize, cancelled: bool, - clients: HashMap, Instant)>, + clients: BTreeMap, Instant)>, } impl FakeClock { @@ -75,7 +75,9 @@ impl FakeClock { inner.now += duration; let now = inner.now; - for (&id, (ch, deadline)) in inner.clients.iter() { + // Preserve timer creation order for equal deadlines. The QBFT + // fake-clock harness asserts exact rounds, so timer ordering matters. + for (&id, (ch, deadline)) in &inner.clients { if *deadline <= now { expired.push((id, ch.clone())); } diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 8b8aa42d..4d321b16 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -276,10 +276,11 @@ fn test_qbft(test: Test) { .as_ref() .expect("value sender kept until run returns") .clone(); + let (delay_ch, cancel) = clock.new_timer(delay); s.spawn(move || { - let (delay_ch, cancel) = clock.new_timer(delay); - _ = delay_ch.recv(); - _ = v_chan_tx_send.send(i); + if delay_ch.recv().is_ok() { + _ = v_chan_tx_send.send(i); + } cancel(); }); @@ -378,7 +379,6 @@ fn test_qbft(test: Test) { } BroadcastEvent::Delayed(delayed) => pending.push(delayed), } - clock.advance(Duration::from_millis(1)); if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { cts.cancel(); clock.cancel(); @@ -517,7 +517,9 @@ fn test_qbft(test: Test) { } default => { - thread::sleep(time::Duration::from_micros(1)); + // Give worker threads a small scheduling window before + // advancing fake time; these tests assert exact rounds. + thread::sleep(Duration::from_micros(50)); clock.advance(Duration::from_millis(1)); if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { cts.cancel(); From 7e48b5658c0dddef31e0c7878263925b4c61d594 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 16:29:31 +0700 Subject: [PATCH 27/34] fix: harden fake-clock to avoid flanky scheduling --- crates/core/src/qbft/fake_clock.rs | 126 +++++++-- crates/core/src/qbft/internal_test.rs | 357 ++++++++++++++++++++++---- 2 files changed, 411 insertions(+), 72 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index d03063c2..160cae56 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -1,7 +1,10 @@ use crossbeam::channel as mpmc; use std::{ collections::BTreeMap, - sync::{Arc, Mutex}, + sync::{ + Arc, Mutex, + atomic::{AtomicIsize, Ordering}, + }, thread, time::{Duration, Instant}, }; @@ -11,15 +14,25 @@ pub struct FakeClock { inner: Arc>, } +#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] +pub enum TimerPriority { + // Match the Go fake-clock harness at equal deadlines: delayed nodes start + // before protocol timers, while delayed input values arrive after them. + StartDelay, + Protocol, + InputValue, +} + struct FakeClockInner { start: Instant, now: Instant, last_id: usize, cancelled: bool, - clients: BTreeMap, Instant)>, + clients: BTreeMap, Instant, TimerPriority)>, } impl FakeClock { + /// Create a fake clock pinned to an initial instant. pub fn new(now: Instant) -> Self { Self { inner: Arc::new(Mutex::new(FakeClockInner { @@ -32,6 +45,7 @@ impl FakeClock { } } + /// Register a protocol timer with the default protocol priority. pub fn new_timer( &self, duration: Duration, @@ -39,7 +53,22 @@ impl FakeClock { mpmc::Receiver, Box, ) { - let (tx, rx) = mpmc::bounded::(1); + self.new_timer_with_priority(duration, TimerPriority::Protocol) + } + + /// Register a timer with explicit same-deadline ordering priority. + pub fn new_timer_with_priority( + &self, + duration: Duration, + priority: TimerPriority, + ) -> ( + mpmc::Receiver, + Box, + ) { + // Synchronous expiry handoff: advancing fake time must wait until the + // timer owner observes the tick, otherwise exact-round QBFT tests race + // worker scheduling. + let (tx, rx) = mpmc::bounded::(0); let client_id = { let mut inner = self.inner.lock().unwrap(); @@ -51,7 +80,7 @@ impl FakeClock { let deadline = inner.now + duration; inner.last_id += 1; - inner.clients.insert(id, (tx, deadline)); + inner.clients.insert(id, (tx, deadline, priority)); id }; @@ -65,7 +94,26 @@ impl FakeClock { (rx, cancel) } - pub fn advance(&self, duration: Duration) { + /// Advance fake time and deliver all timers expired by the new time. + pub fn advance(&self, duration: Duration) -> usize { + self.advance_inner(duration, None) + } + + /// Advance fake time and wait for each delivered timer action to complete. + pub fn advance_and_wait( + &self, + duration: Duration, + pending_timer_actions: &AtomicIsize, + ) -> usize { + self.advance_inner(duration, Some(pending_timer_actions)) + } + + /// Shared advance path; optionally synchronizes timer delivery with the test harness. + fn advance_inner( + &self, + duration: Duration, + pending_timer_actions: Option<&AtomicIsize>, + ) -> usize { // Advance time and collect expired senders under lock, but perform sends // without holding lock. let mut expired = vec![]; @@ -75,31 +123,55 @@ impl FakeClock { inner.now += duration; let now = inner.now; - // Preserve timer creation order for equal deadlines. The QBFT - // fake-clock harness asserts exact rounds, so timer ordering matters. - for (&id, (ch, deadline)) in &inner.clients { + for (&id, (ch, deadline, priority)) in &inner.clients { if *deadline <= now { - expired.push((id, ch.clone())); + expired.push((id, *deadline, *priority, ch.clone())); } } - for (id, _) in expired.iter() { + for (id, _, _, _) in expired.iter() { inner.clients.remove(id); } now }; - for (_, ch) in expired { - let _ = ch.send(now); + // Equal-deadline order is part of the test harness contract: these + // tests assert exact rounds, and Go's fake-clock scheduling is stable. + expired.sort_by_key(|(id, deadline, priority, _)| (*deadline, *priority, *id)); + + let mut delivered = 0; + for (_, _, _, ch) in expired { + if let Some(pending_timer_actions) = pending_timer_actions { + pending_timer_actions.fetch_add(1, Ordering::SeqCst); + } + + if ch.send(now).is_ok() { + delivered += 1; + if let Some(pending_timer_actions) = pending_timer_actions { + while pending_timer_actions.load(Ordering::SeqCst) > 0 { + thread::yield_now(); + } + } + } else if let Some(pending_timer_actions) = pending_timer_actions { + pending_timer_actions.fetch_sub(1, Ordering::SeqCst); + } } + + delivered } + /// Return fake time elapsed since clock creation. pub fn elapsed(&self) -> Duration { let inner = self.inner.lock().unwrap(); inner.now - inner.start } + /// Return currently registered timers. + pub fn timer_count(&self) -> usize { + self.inner.lock().unwrap().clients.len() + } + /// Explicit terminal cleanup; do not reintroduce `Drop`, since dropping one /// clone must not cancel timers owned by other clones. pub fn cancel(&self) { @@ -110,6 +182,7 @@ impl FakeClock { } #[test] +/// Timers registered by different threads fire after fake time passes deadlines. fn multiple_threads_timers() { let clock = FakeClock::new(Instant::now()); let (done_tx, done_rx) = mpmc::bounded(2); @@ -141,6 +214,7 @@ fn multiple_threads_timers() { } #[test] +/// Cancelling the clock closes outstanding timers without advancing fake time. fn multiple_threads_cancellation() { let clock = FakeClock::new(Instant::now()); let (done_tx, done_rx) = mpmc::bounded(2); @@ -170,6 +244,7 @@ fn multiple_threads_cancellation() { } #[test] +/// A timer created after clock cancellation is immediately closed. fn timer_created_after_cancel_is_closed() { let clock = FakeClock::new(Instant::now()); clock.cancel(); @@ -184,25 +259,42 @@ fn timer_created_after_cancel_is_closed() { } #[test] +/// Cancelling one timer does not affect other timers with the same deadline. fn cancel_one_timer_only() { let clock = FakeClock::new(Instant::now()); let (ch_1, cancel_1) = clock.new_timer(Duration::from_secs(5)); let (ch_2, _) = clock.new_timer(Duration::from_secs(5)); + let (done_tx, done_rx) = mpmc::bounded(1); cancel_1(); - clock.advance(Duration::from_secs(5)); + thread::scope(|s| { + s.spawn(move || { + done_tx.send(ch_2.recv().is_ok()).unwrap(); + }); + + clock.advance(Duration::from_secs(5)); + }); assert!(ch_1.try_recv().is_err()); - assert!(ch_2.try_recv().is_ok()); + assert_eq!(Ok(true), done_rx.try_recv()); } #[test] +/// An expired timer is delivered once and removed from the clock. fn expired_timer_delivers_once() { let clock = FakeClock::new(Instant::now()); let (ch, _) = clock.new_timer(Duration::from_secs(5)); + let (done_tx, done_rx) = mpmc::bounded(1); + thread::scope(|s| { + s.spawn(move || { + done_tx.send(ch.recv().is_ok()).unwrap(); + }); + + clock.advance(Duration::from_secs(5)); + }); + + assert_eq!(Ok(true), done_rx.try_recv()); clock.advance(Duration::from_secs(5)); - assert!(ch.try_recv().is_ok()); - clock.advance(Duration::from_secs(5)); - assert!(ch.try_recv().is_err()); + assert!(done_rx.try_recv().is_err()); } diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 4d321b16..0973c924 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1,11 +1,18 @@ -use crate::qbft::{self, fake_clock::FakeClock, *}; +use crate::qbft::{ + self, + fake_clock::{FakeClock, TimerPriority}, + *, +}; use cancellation::CancellationTokenSource; use crossbeam::channel as mpmc; use std::{ collections::{BTreeMap, HashMap}, fmt::Write as _, panic::{self, AssertUnwindSafe}, - sync::{Arc, Mutex}, + sync::{ + Arc, Mutex, + atomic::{AtomicIsize, AtomicUsize, Ordering}, + }, thread, time::Duration, }; @@ -24,10 +31,28 @@ const TEST_STREAM_MSG_ROUND: u64 = 11; const TEST_STREAM_MSG_VALUE: u64 = 12; const TEST_STREAM_MSG_PREPARED_ROUND: u64 = 13; const TEST_STREAM_MSG_PREPARED_VALUE: u64 = 14; +const TEST_WAIT_TIMEOUT: Duration = Duration::from_secs(1); +// Wall-clock guard catches lack of harness progress. Fake time still controls +// protocol progress, so slow-but-progressing parallel runs should not fail. +const TEST_STALL_TIMEOUT: Duration = Duration::from_secs(20); type RunOutcome = std::thread::Result>; type TestMsgRef = Msg; +struct PendingCompareGuard { + pending_compares: Arc, +} + +impl Drop for PendingCompareGuard { + fn drop(&mut self) { + self.pending_compares.fetch_sub(1, Ordering::SeqCst); + } +} + +fn complete_timer_action(pending_timer_actions: &AtomicIsize) { + pending_timer_actions.fetch_sub(1, Ordering::SeqCst); +} + struct PendingBroadcast { deliver_at: Duration, key: u64, @@ -65,6 +90,12 @@ struct Test { pub fuzz: bool, } +// Main QBFT simulation harness: +// 1. build one fake clock, four node transports, and QBFT callbacks; +// 2. spawn one thread per node running `qbft::run`; +// 3. route broadcasts through the in-memory network with optional drop/jitter/fuzz; +// 4. advance fake time only after pending compare/timer work is drained; +// 5. collect all decisions and assert same value plus expected round/value. fn test_qbft(test: Test) { const N: usize = 4; const MAX_ROUND: usize = 50; @@ -77,6 +108,8 @@ fn test_qbft(test: Test) { let clock = FakeClock::new(start_time); let cts = CancellationTokenSource::new(); + let pending_compares = Arc::new(AtomicUsize::new(0)); + let pending_timer_actions = Arc::new(AtomicIsize::new(0)); // Keep peer iteration deterministic. These fake-clock tests assert exact // rounds, and broadcast fanout order affects which node observes quorums // first when tests run in parallel. @@ -91,6 +124,11 @@ fn test_qbft(test: Test) { let (unjust_tx, unjust_rx) = mpmc::unbounded::(); let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N); + let expected_initial_timers = N + test + .value_delay + .keys() + .filter(|process| !test.start_delay.contains_key(process)) + .count(); let is_leader = Box::new(make_is_leader(N as i64)); @@ -116,16 +154,27 @@ fn test_qbft(test: Test) { result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); }) }, - compare: Arc::new(|_, _, _, _, return_err, _| { - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); - }), + compare: { + let pending_compares = pending_compares.clone(); + Arc::new(move |_, _, _, _, return_err, _| { + let _guard = PendingCompareGuard { + pending_compares: pending_compares.clone(), + }; + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }) + }, nodes: N as i64, fifo_limit: FIFO_LIMIT as i64, log_round_change: { let clock = clock.clone(); let trace = trace.clone(); + let pending_timer_actions = pending_timer_actions.clone(); Box::new(move |_, process, round, new_round, upon_rule, _| { + if upon_rule == UPON_ROUND_TIMEOUT { + complete_timer_action(&pending_timer_actions); + } + trace.push(format!( "{:?} - {}@{} change to {} ~= {}", clock.elapsed(), @@ -151,7 +200,12 @@ fn test_qbft(test: Test) { log_upon_rule: { let clock = clock.clone(); let trace = trace.clone(); + let pending_compares = pending_compares.clone(); Box::new(move |_, process, round, msg, upon_rule| { + if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + pending_compares.fetch_add(1, Ordering::SeqCst); + } + trace.push(format!( "{:?} {} => {}@{} -> {}@{} ~= {}", clock.elapsed(), @@ -239,9 +293,11 @@ fn test_qbft(test: Test) { let run_chan_tx = run_chan_tx.clone(); let defs = defs.clone(); let is_leader = is_leader.clone(); + let pending_timer_actions = pending_timer_actions.clone(); let trace = trace.clone(); s.spawn(move || { + let mut start_timer_fired = false; if let Some(delay) = start_delay { trace.push(format!( "{:?} Node {} start delay {:?}", @@ -249,14 +305,17 @@ fn test_qbft(test: Test) { i, delay )); - let (delay_ch, _) = clock.new_timer(delay); - _ = delay_ch.recv(); - trace.push(format!( - "{:?} Node {} starting {:?}", - clock.elapsed(), - i, - delay - )); + let (delay_ch, _) = + clock.new_timer_with_priority(delay, TimerPriority::StartDelay); + if delay_ch.recv().is_ok() { + start_timer_fired = true; + trace.push(format!( + "{:?} Node {} starting {:?}", + clock.elapsed(), + i, + delay + )); + } } if start_delay.is_some() { @@ -265,6 +324,9 @@ fn test_qbft(test: Test) { _ = receiver.recv().expect(READ_CHAN_ERR); } } + if start_timer_fired { + complete_timer_action(&pending_timer_actions); + } let (v_chan_tx, v_chan_rx) = mpmc::bounded::(1); let (vs_chan_tx, vs_chan_rx) = mpmc::bounded::(1); @@ -276,10 +338,13 @@ fn test_qbft(test: Test) { .as_ref() .expect("value sender kept until run returns") .clone(); - let (delay_ch, cancel) = clock.new_timer(delay); + let pending_timer_actions = pending_timer_actions.clone(); + let (delay_ch, cancel) = + clock.new_timer_with_priority(delay, TimerPriority::InputValue); s.spawn(move || { if delay_ch.recv().is_ok() { _ = v_chan_tx_send.send(i); + complete_timer_action(&pending_timer_actions); } cancel(); @@ -322,6 +387,21 @@ fn test_qbft(test: Test) { }); } + while clock.timer_count() < expected_initial_timers { + thread::yield_now(); + if real_start.elapsed() > TEST_STALL_TIMEOUT { + cts.cancel(); + clock.cancel(); + panic!( + "qbft test setup hang: timers={} expected={} seed={}\n{}", + clock.timer_count(), + expected_initial_timers, + seed, + trace.dump() + ); + } + } + let mut results = BTreeMap::>::new(); let mut count = 0; let mut decided = false; @@ -330,9 +410,10 @@ fn test_qbft(test: Test) { let mut pending = Vec::::new(); let mut next_fuzz_at = test.fuzz.then_some(Duration::from_millis(100)); let mut fuzz_counter = 0_u64; + let mut last_progress = time::Instant::now(); loop { - broadcasts += deliver_ready_broadcasts( + let delivered = deliver_ready_broadcasts( &mut pending, &receives, &test.drop_prob, @@ -340,6 +421,10 @@ fn test_qbft(test: Test) { &trace, &clock, ); + broadcasts += delivered; + if delivered > 0 { + last_progress = time::Instant::now(); + } if decided { next_fuzz_at = None; @@ -361,6 +446,7 @@ fn test_qbft(test: Test) { )); broadcasts += fanout_broadcast(&receives, &test.drop_prob, seed, &trace, &clock, msg); + last_progress = time::Instant::now(); next_fuzz_at = Some(next + Duration::from_millis(100)); } @@ -379,7 +465,8 @@ fn test_qbft(test: Test) { } BroadcastEvent::Delayed(delayed) => pending.push(delayed), } - if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { + last_progress = time::Instant::now(); + if clock.elapsed() > Duration::from_secs(180) { cts.cancel(); clock.cancel(); panic!( @@ -405,6 +492,7 @@ fn test_qbft(test: Test) { recv(result_chan_rx) -> res => { let q_commit = res.expect(READ_CHAN_ERR); + last_progress = time::Instant::now(); for commit in q_commit.clone() { for (_, previous) in results.iter() { @@ -490,6 +578,7 @@ fn test_qbft(test: Test) { recv(run_chan_rx) -> res => { let (node, outcome) = res.expect(READ_CHAN_ERR); + last_progress = time::Instant::now(); if !matches!(outcome, Ok(Ok(()))) { if !decided { @@ -517,11 +606,36 @@ fn test_qbft(test: Test) { } default => { - // Give worker threads a small scheduling window before - // advancing fake time; these tests assert exact rounds. - thread::sleep(Duration::from_micros(50)); - clock.advance(Duration::from_millis(1)); - if clock.elapsed() > Duration::from_secs(180) || real_start.elapsed() > Duration::from_secs(20) { + if pending_compares.load(Ordering::SeqCst) != 0 + || pending_timer_actions.load(Ordering::SeqCst) > 0 + { + thread::yield_now(); + if last_progress.elapsed() > TEST_STALL_TIMEOUT { + cts.cancel(); + clock.cancel(); + panic!( + "qbft test hang: pending_compares={} pending_timer_actions={} decided={} done={} count={} elapsed={:?} real_elapsed={:?} broadcasts={} seed={}\n{}", + pending_compares.load(Ordering::SeqCst), + pending_timer_actions.load(Ordering::SeqCst), + decided, + done, + count, + clock.elapsed(), + real_start.elapsed(), + broadcasts, + seed, + trace.dump() + ); + } + continue; + } + + // Matches the Go harness throttle; ordering correctness + // comes from the pending-work barriers, not this duration. + thread::sleep(Duration::from_micros(1)); + clock.advance_and_wait(Duration::from_millis(1), &pending_timer_actions); + last_progress = time::Instant::now(); + if clock.elapsed() > Duration::from_secs(180) { cts.cancel(); clock.cancel(); panic!( @@ -900,6 +1014,8 @@ impl SomeMsg for TestMsg { } } +// Tests the normal-case path with an available round-1 leader. +// Expect all nodes to decide in round 1 on the leader value. #[test_case(0 ; "happy_0")] #[test_case(1 ; "happy_1")] fn happy(instance: i64) { @@ -910,6 +1026,8 @@ fn happy(instance: i64) { }); } +// Tests prepared-value carryover when commits are suppressed in earlier rounds. +// Expect later rounds to decide the highest prepared value, not a new leader value. #[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")] #[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")] fn prepare_round( @@ -932,6 +1050,8 @@ fn prepare_round( }); } +// Tests round change when the first leader starts late. +// Expect the next live leader to drive consensus in round 2. #[test_case(false ; "leader_late_exp")] #[test_case(true ; "leader_down_const")] fn delayed_leader_start(const_period: bool) { @@ -944,6 +1064,8 @@ fn delayed_leader_start(const_period: bool) { }); } +// Tests recovery when two nodes, including early leaders, start much later. +// Expect consensus after enough round changes, with exact round only when deterministic. #[test_case(3, false, 4, false ; "very_late_exp")] #[test_case(1, true, 0, true ; "very_late_const")] fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) { @@ -957,6 +1079,8 @@ fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_ }); } +// Tests staggered node startup and message buffering/draining. +// Expect consensus once enough live nodes join, with round allowed to vary. #[test_case(false ; "stagger_start_exp")] #[test_case(true ; "stagger_start_const")] fn stagger_start(const_period: bool) { @@ -974,6 +1098,8 @@ fn stagger_start(const_period: bool) { }); } +// Tests late input values on nodes that otherwise participate in the protocol. +// Expect round changes until a valid leader value is available. #[test_case(3, false, 4, false ; "very_delayed_value_exp")] #[test_case(1, true, 0, true ; "very_delayed_value_const")] fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, random_round: bool) { @@ -987,6 +1113,8 @@ fn very_delayed_value(instance: i64, const_period: bool, decide_round: i32, rand }); } +// Tests input values arriving at different fake times for all nodes. +// Expect consensus once enough nodes can validate/propose values. #[test_case(false ; "stagger_delayed_value_exp")] #[test_case(true ; "stagger_delayed_value_const")] fn stagger_delayed_value(const_period: bool) { @@ -1004,6 +1132,8 @@ fn stagger_delayed_value(const_period: bool) { }); } +// Tests a round-1 leader without input and a round-2 leader that is offline. +// Expect consensus to skip both blocked leaders and decide in round 3. #[test] fn round1_leader_no_value_round2_leader_offline() { test_qbft(Test { @@ -1016,6 +1146,8 @@ fn round1_leader_no_value_round2_leader_offline() { }); } +// Tests delayed broadcast delivery under fake network jitter. +// Expect safety and eventual consensus despite delayed messages. #[test_case(500, false ; "jitter_500ms_exp")] #[test_case(200, true ; "jitter_200ms_const")] fn jitter(bcast_jitter_ms: i32, const_period: bool) { @@ -1028,6 +1160,8 @@ fn jitter(bcast_jitter_ms: i32, const_period: bool) { }); } +// Tests deterministic message loss at 10% and 30%. +// Expect eventual consensus without conflicting decisions. #[test_case(0.1 ; "drop_10_percent_const")] #[test_case(0.3 ; "drop_30_percent_const")] fn dropped_messages(drop_probability: f64) { @@ -1045,6 +1179,8 @@ fn dropped_messages(drop_probability: f64) { }); } +// Tests bogus message injection during normal and delayed-leader scenarios. +// Expect unjust fuzz traffic to be ignored and honest nodes to still decide. #[test_case(None, 1, false ; "fuzz")] #[test_case(Some(2), 0, true ; "fuzz_with_late_leader")] #[test_case(Some(10), 0, true ; "fuzz_with_very_late_leader")] @@ -1088,6 +1224,8 @@ fn noop_transport() -> Transport { } } +// Tests quorum/faulty formulas across node counts. +// Expect quorum and tolerated-fault counts to match the Charon formula. #[test_case(1, 1, 0 ; "n1")] #[test_case(2, 2, 0 ; "n2")] #[test_case(3, 2, 0 ; "n3")] @@ -1119,6 +1257,8 @@ fn formulas(n: i64, q: i64, f: i64) { assert_eq!(f, d.faulty(), "Faulty given N={n}"); } +// Tests PRE-PREPARE justification with mixed ROUND_CHANGE and PREPARE evidence. +// Expect the proposal to be accepted when it carries a justified prepared value. #[test] fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() { let preprepare = new_msg( @@ -1147,6 +1287,8 @@ fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() { assert!(is_justified_pre_prepare(&def, &1, &preprepare, 0)); } +// Tests duplicate PRE-PREPARE rule handling after compare failure. +// Expect the next round proposal to trigger once and exit by cancellation. #[test] fn duplicate_pre_prepare_rules() { let cts = CancellationTokenSource::new(); @@ -1224,6 +1366,8 @@ fn duplicate_pre_prepare_rules() { assert!(matches!(res, Err(QbftError::ContextCanceled))); } +// Tests idle cancellation while no inputs, timers, or messages are available. +// Expect `run` to unblock and return `ContextCanceled`. #[test] fn idle_run_returns_when_cancelled() { let cts = CancellationTokenSource::new(); @@ -1235,8 +1379,10 @@ fn idle_run_returns_when_cancelled() { let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); let (done_tx, done_rx) = mpmc::bounded(1); + let (started_tx, started_rx) = mpmc::bounded(1); thread::spawn(move || { + started_tx.send(()).expect(WRITE_CHAN_ERR); done_tx .send(qbft::run( &token, &def, &transport, &0, 1, input_rx, source_rx, @@ -1244,17 +1390,21 @@ fn idle_run_returns_when_cancelled() { .expect(WRITE_CHAN_ERR); }); - thread::sleep(Duration::from_millis(10)); + started_rx + .recv_timeout(TEST_WAIT_TIMEOUT) + .expect("run thread must start before cancellation"); cts.cancel(); assert!(matches!( done_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("idle run must unblock on cancellation"), Err(QbftError::ContextCanceled) )); } +// Tests definition validation at the `run` boundary. +// Expect invalid node count and FIFO limit to return typed errors. #[test_case(0, 1, true ; "invalid_nodes")] #[test_case(4, 0, false ; "invalid_fifo_limit")] fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) { @@ -1278,6 +1428,8 @@ fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) } } +// Tests cancellation under a continuously hot receive channel. +// Expect cancellation to win even when incoming traffic is always ready. #[test] fn run_cancels_under_hot_receive_stream() { let cts = CancellationTokenSource::new(); @@ -1294,6 +1446,7 @@ fn run_cancels_under_hot_receive_stream() { let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); let (done_tx, done_rx) = mpmc::bounded(1); + let (started_tx, started_rx) = mpmc::bounded(1); let sender_cts = CancellationTokenSource::new(); let sender_token = sender_cts.token().clone(); @@ -1309,6 +1462,7 @@ fn run_cancels_under_hot_receive_stream() { }); thread::spawn(move || { + started_tx.send(()).expect(WRITE_CHAN_ERR); done_tx .send(qbft::run( &token, &def, &transport, &0, 1, input_rx, source_rx, @@ -1316,12 +1470,14 @@ fn run_cancels_under_hot_receive_stream() { .expect(WRITE_CHAN_ERR); }); - thread::sleep(Duration::from_millis(10)); + started_rx + .recv_timeout(TEST_WAIT_TIMEOUT) + .expect("run thread must start before cancellation"); cts.cancel(); assert!(matches!( done_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("run must unblock on cancellation even with a hot receive stream"), Err(QbftError::ContextCanceled) )); @@ -1330,6 +1486,8 @@ fn run_cancels_under_hot_receive_stream() { sender.join().expect("sender thread must exit"); } +// Tests message classification into QBFT upon-rules. +// Expect each fixture to map to the rule required by the protocol transition. #[test] fn classify_rules() { let mut def = noop_definition(); @@ -1376,6 +1534,8 @@ fn classify_rules() { ); } +// Tests ROUND_CHANGE quorum justification forms J1 and J2. +// Expect null-prepared and highest-prepared quorums to be accepted, invalid `pr` rejected. #[test] fn justified_qrc_j1_and_j2() { let mut def = noop_definition(); @@ -1411,6 +1571,8 @@ fn justified_qrc_j1_and_j2() { assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none()); } +// Tests ROUND_CHANGE prepared-round bounds. +// Expect only null prepared round or strictly previous prepared rounds to be valid. #[test_case(2, -1, false ; "negative")] #[test_case(1, 0, true ; "null_at_round_one")] #[test_case(2, 1, true ; "previous_round")] @@ -1421,6 +1583,8 @@ fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64, assert_eq!(expected, valid_round_change_prepared_round(&msg)); } +// Tests invalid prepared rounds at every justification call site. +// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums survive. #[test_case(-1 ; "negative")] #[test_case(2 ; "current_round")] #[test_case(3 ; "future_round")] @@ -1471,6 +1635,8 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr: ); } +// Tests null-prepared quorum filtering. +// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form J1. #[test_case(1, -1 ; "negative")] #[test_case(1, 1 ; "current_round")] #[test_case(1, 2 ; "future_round")] @@ -1489,6 +1655,8 @@ fn quorum_null_prepared_requires_null_prepared_rounds(round: i64, invalid_pr: i6 assert!(qrc.is_empty()); } +// Tests duplicate sender handling in quorum filters. +// Expect at most one message per source to count toward a quorum. #[test] fn filter_msgs_keeps_one_per_source() { let msgs = vec![ @@ -1506,6 +1674,8 @@ fn filter_msgs_keeps_one_per_source() { ); } +// Tests compare outcomes: success, error, cached value source, and timeout. +// Expect cached value sources to be preserved and timer expiry to return `TimeoutError`. #[test] fn compare_success_error_cached_value_source_and_timeout() { let cts = CancellationTokenSource::new(); @@ -1602,6 +1772,8 @@ fn compare_success_error_cached_value_source_and_timeout() { )); } +// Tests compare timeout with a cooperative but blocked callback. +// Expect `compare` to return timeout without joining the callback first. #[test] fn compare_timeout_does_not_wait_for_blocked_callback() { let cts = CancellationTokenSource::new(); @@ -1627,12 +1799,14 @@ fn compare_timeout_does_not_wait_for_blocked_callback() { assert!(matches!( result_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare must return on timer without waiting for blocked callback"), (0, Err(QbftError::TimeoutError)) )); } +// Tests a compare callback that exits without sending status. +// Expect `compare` to wait for timer/cancel instead of treating disconnect as final. #[test] fn compare_callback_exit_without_status_waits_for_timer() { let cts = CancellationTokenSource::new(); @@ -1654,7 +1828,7 @@ fn compare_callback_exit_without_status_waits_for_timer() { }); callback_done_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare callback must exit"); assert!( result_rx.try_recv().is_err(), @@ -1664,12 +1838,14 @@ fn compare_callback_exit_without_status_waits_for_timer() { timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); assert!(matches!( result_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare must return after timer fires"), (0, Err(QbftError::TimeoutError)) )); } +// Tests parent cancellation propagation into the compare callback token. +// Expect `compare` to return `ContextCanceled` and the callback token to be canceled. #[test] fn compare_parent_cancel_cancels_callback_token() { let cts = CancellationTokenSource::new(); @@ -1698,11 +1874,11 @@ fn compare_parent_cancel_cancels_callback_token() { }); compare_started_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare must start"); cts.cancel(); - match result_rx.recv_timeout(Duration::from_millis(100)) { + match result_rx.recv_timeout(TEST_WAIT_TIMEOUT) { Ok(result) => assert!(matches!(result, (0, Err(QbftError::ContextCanceled)))), Err(err) => { let _ = timer_tx.send(time::Instant::now()); @@ -1710,10 +1886,12 @@ fn compare_parent_cancel_cancels_callback_token() { } } token_cancelled_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("callback token must be canceled by parent token"); } +// Tests parent cancellation while `run` is waiting inside compare. +// Expect `run` to return cancellation without broadcasting PREPARE. #[test] fn run_parent_cancel_during_compare_does_not_prepare() { const LEADER: i64 = 1; @@ -1761,18 +1939,18 @@ fn run_parent_cancel_during_compare_does_not_prepare() { }); compare_started_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare must start"); cts.cancel(); assert!(matches!( done_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("run must return parent cancellation from compare"), Err(QbftError::ContextCanceled) )); compare_cancelled_rx - .recv_timeout(Duration::from_millis(100)) + .recv_timeout(TEST_WAIT_TIMEOUT) .expect("compare callback token must be canceled"); assert!( broadcast_rx.try_iter().all(|type_| type_ != MSG_PREPARE), @@ -1799,6 +1977,8 @@ struct ChainSplitTest { should_halt: bool, } +// Tests value-source disagreement across nodes. +// Expect agreement when a quorum can compare equal values, or halt when no quorum can. #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")] #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")] #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")] @@ -1826,8 +2006,11 @@ fn test_qbft_chain_split(test: ChainSplitTest) { const FIFO_LIMIT: i64 = 100; let clock = FakeClock::new(time::Instant::now()); + let real_start = time::Instant::now(); let cts = CancellationTokenSource::new(); let trace = Trace::new(); + let pending_compares = Arc::new(AtomicUsize::new(0)); + let pending_timer_actions = Arc::new(AtomicIsize::new(0)); // Keep peer iteration deterministic. These fake-clock tests assert exact // rounds, and broadcast fanout order affects which node observes quorums // first when tests run in parallel. @@ -1857,33 +2040,49 @@ fn test_qbft_chain_split(test: ChainSplitTest) { result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); }) }, - compare: Arc::new( - |_, qcommit, input_value_source_ch, input_value_source, return_err, return_value| { - let leader_value_source = qcommit.value_source().expect("value source"); - let local = if *input_value_source == 0 { - let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); - return_value.send(value).expect(WRITE_CHAN_ERR); - value - } else { - *input_value_source - }; - - if leader_value_source != local { - return_err - .send(Err(QbftError::CompareError)) - .expect(WRITE_CHAN_ERR); - return; - } + compare: { + let pending_compares = pending_compares.clone(); + Arc::new( + move |_, + qcommit, + input_value_source_ch, + input_value_source, + return_err, + return_value| { + let _guard = PendingCompareGuard { + pending_compares: pending_compares.clone(), + }; + let leader_value_source = qcommit.value_source().expect("value source"); + let local = if *input_value_source == 0 { + let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); + return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *input_value_source + }; + + if leader_value_source != local { + return_err + .send(Err(QbftError::CompareError)) + .expect(WRITE_CHAN_ERR); + return; + } - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); - }, - ), + return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }, + ) + }, nodes: N as i64, fifo_limit: FIFO_LIMIT, log_round_change: { let clock = clock.clone(); let trace = trace.clone(); + let pending_timer_actions = pending_timer_actions.clone(); Box::new(move |_, process, round, new_round, upon_rule, _| { + if upon_rule == UPON_ROUND_TIMEOUT { + complete_timer_action(&pending_timer_actions); + } + trace.push(format!( "{:?} - {}@{} change to {} ~= {}", clock.elapsed(), @@ -1903,7 +2102,12 @@ fn test_qbft_chain_split(test: ChainSplitTest) { log_upon_rule: { let clock = clock.clone(); let trace = trace.clone(); + let pending_compares = pending_compares.clone(); Box::new(move |_, process, round, msg, upon_rule| { + if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + pending_compares.fetch_add(1, Ordering::SeqCst); + } + trace.push(format!( "{:?} {} => {}@{} -> {}@{} ~= {}", clock.elapsed(), @@ -1977,15 +2181,32 @@ fn test_qbft_chain_split(test: ChainSplitTest) { }); } + while clock.timer_count() < N { + thread::yield_now(); + if real_start.elapsed() > TEST_STALL_TIMEOUT { + cts.cancel(); + clock.cancel(); + panic!( + "chain split setup hang: timers={} expected={} elapsed={:?}\n{}", + clock.timer_count(), + N, + clock.elapsed(), + trace.dump() + ); + } + } + let mut results = BTreeMap::>::new(); let mut count = 0; let mut decided = false; let mut done = 0; + let mut last_progress = time::Instant::now(); loop { mpmc::select! { recv(broadcast_rx) -> msg => { let msg = msg.expect(READ_CHAN_ERR); + last_progress = time::Instant::now(); for (target, (out_tx, _)) in receives.iter() { if *target == msg.source() { continue; @@ -2004,6 +2225,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { } recv(result_chan_rx) -> res => { let q_commit = res.expect(READ_CHAN_ERR); + last_progress = time::Instant::now(); if test.should_halt { cts.cancel(); clock.cancel(); @@ -2066,6 +2288,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { } recv(run_chan_rx) -> res => { let (node, outcome) = res.expect(READ_CHAN_ERR); + last_progress = time::Instant::now(); let expected_halt = test.should_halt && outcome_is_error(&outcome, |err| matches!(err, QbftError::MaxRoundReached)); if !(decided || expected_halt) { @@ -2091,13 +2314,37 @@ fn test_qbft_chain_split(test: ChainSplitTest) { } } default => { + if pending_compares.load(Ordering::SeqCst) != 0 + || pending_timer_actions.load(Ordering::SeqCst) > 0 + { + thread::yield_now(); + if last_progress.elapsed() > TEST_STALL_TIMEOUT { + cts.cancel(); + clock.cancel(); + panic!( + "chain split hang: pending_compares={} pending_timer_actions={} decided={decided} done={done} count={count} elapsed={:?}\n{}", + pending_compares.load(Ordering::SeqCst), + pending_timer_actions.load(Ordering::SeqCst), + clock.elapsed(), + trace.dump() + ); + } + continue; + } + + // Matches the Go harness throttle; ordering correctness comes + // from the pending-work barriers, not this duration. thread::sleep(Duration::from_micros(1)); + // The no-consensus halt case must reach round 11; using Go's + // 1ms tick here makes this Rust harness exceed its real-time + // guard, so only that halt path fast-forwards fake time. let tick = if test.should_halt { Duration::from_millis(100) } else { Duration::from_millis(1) }; - clock.advance(tick); + clock.advance_and_wait(tick, &pending_timer_actions); + last_progress = time::Instant::now(); let limit = if test.should_halt { let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32"); let seconds = 1_u64 From 024bff78d7ae5c8397647dfb486097194e63895c Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 16:34:48 +0700 Subject: [PATCH 28/34] fix: lint --- crates/core/src/qbft/fake_clock.rs | 8 +++--- crates/core/src/qbft/internal_test.rs | 36 ++++++++++++++++++--------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 160cae56..6bc32495 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -108,7 +108,8 @@ impl FakeClock { self.advance_inner(duration, Some(pending_timer_actions)) } - /// Shared advance path; optionally synchronizes timer delivery with the test harness. + /// Shared advance path; optionally synchronizes timer delivery with the + /// test harness. fn advance_inner( &self, duration: Duration, @@ -129,7 +130,7 @@ impl FakeClock { } } - for (id, _, _, _) in expired.iter() { + for (id, ..) in expired.iter() { inner.clients.remove(id); } @@ -182,7 +183,8 @@ impl FakeClock { } #[test] -/// Timers registered by different threads fire after fake time passes deadlines. +/// Timers registered by different threads fire after fake time passes +/// deadlines. fn multiple_threads_timers() { let clock = FakeClock::new(Instant::now()); let (done_tx, done_rx) = mpmc::bounded(2); diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 0973c924..167f2c95 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -93,7 +93,8 @@ struct Test { // Main QBFT simulation harness: // 1. build one fake clock, four node transports, and QBFT callbacks; // 2. spawn one thread per node running `qbft::run`; -// 3. route broadcasts through the in-memory network with optional drop/jitter/fuzz; +// 3. route broadcasts through the in-memory network with optional +// drop/jitter/fuzz; // 4. advance fake time only after pending compare/timer work is drained; // 5. collect all decisions and assert same value plus expected round/value. fn test_qbft(test: Test) { @@ -1027,7 +1028,8 @@ fn happy(instance: i64) { } // Tests prepared-value carryover when commits are suppressed in earlier rounds. -// Expect later rounds to decide the highest prepared value, not a new leader value. +// Expect later rounds to decide the highest prepared value, not a new leader +// value. #[test_case(1, None, 2, 1, false ; "prepare_round_1_decide_round_2")] #[test_case(2, Some(2), 3, 2, true ; "prepare_round_2_decide_round_3")] fn prepare_round( @@ -1065,7 +1067,8 @@ fn delayed_leader_start(const_period: bool) { } // Tests recovery when two nodes, including early leaders, start much later. -// Expect consensus after enough round changes, with exact round only when deterministic. +// Expect consensus after enough round changes, with exact round only when +// deterministic. #[test_case(3, false, 4, false ; "very_late_exp")] #[test_case(1, true, 0, true ; "very_late_const")] fn very_late_start(instance: i64, const_period: bool, decide_round: i32, random_round: bool) { @@ -1258,7 +1261,8 @@ fn formulas(n: i64, q: i64, f: i64) { } // Tests PRE-PREPARE justification with mixed ROUND_CHANGE and PREPARE evidence. -// Expect the proposal to be accepted when it carries a justified prepared value. +// Expect the proposal to be accepted when it carries a justified prepared +// value. #[test] fn is_justified_pre_prepare_mixed_round_change_prepare_fixture() { let preprepare = new_msg( @@ -1535,7 +1539,8 @@ fn classify_rules() { } // Tests ROUND_CHANGE quorum justification forms J1 and J2. -// Expect null-prepared and highest-prepared quorums to be accepted, invalid `pr` rejected. +// Expect null-prepared and highest-prepared quorums to be accepted, invalid +// `pr` rejected. #[test] fn justified_qrc_j1_and_j2() { let mut def = noop_definition(); @@ -1572,7 +1577,8 @@ fn justified_qrc_j1_and_j2() { } // Tests ROUND_CHANGE prepared-round bounds. -// Expect only null prepared round or strictly previous prepared rounds to be valid. +// Expect only null prepared round or strictly previous prepared rounds to be +// valid. #[test_case(2, -1, false ; "negative")] #[test_case(1, 0, true ; "null_at_round_one")] #[test_case(2, 1, true ; "previous_round")] @@ -1584,7 +1590,8 @@ fn valid_round_change_prepared_round_boundaries(round: i64, prepared_round: i64, } // Tests invalid prepared rounds at every justification call site. -// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums survive. +// Expect invalid ROUND_CHANGE messages to be filtered while valid quorums +// survive. #[test_case(-1 ; "negative")] #[test_case(2 ; "current_round")] #[test_case(3 ; "future_round")] @@ -1636,7 +1643,8 @@ fn invalid_round_change_prepared_rounds_are_filtered_from_call_sites(invalid_pr: } // Tests null-prepared quorum filtering. -// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form J1. +// Expect only `prepared_round = 0` and `prepared_value = 0` messages to form +// J1. #[test_case(1, -1 ; "negative")] #[test_case(1, 1 ; "current_round")] #[test_case(1, 2 ; "future_round")] @@ -1675,7 +1683,8 @@ fn filter_msgs_keeps_one_per_source() { } // Tests compare outcomes: success, error, cached value source, and timeout. -// Expect cached value sources to be preserved and timer expiry to return `TimeoutError`. +// Expect cached value sources to be preserved and timer expiry to return +// `TimeoutError`. #[test] fn compare_success_error_cached_value_source_and_timeout() { let cts = CancellationTokenSource::new(); @@ -1806,7 +1815,8 @@ fn compare_timeout_does_not_wait_for_blocked_callback() { } // Tests a compare callback that exits without sending status. -// Expect `compare` to wait for timer/cancel instead of treating disconnect as final. +// Expect `compare` to wait for timer/cancel instead of treating disconnect as +// final. #[test] fn compare_callback_exit_without_status_waits_for_timer() { let cts = CancellationTokenSource::new(); @@ -1845,7 +1855,8 @@ fn compare_callback_exit_without_status_waits_for_timer() { } // Tests parent cancellation propagation into the compare callback token. -// Expect `compare` to return `ContextCanceled` and the callback token to be canceled. +// Expect `compare` to return `ContextCanceled` and the callback token to be +// canceled. #[test] fn compare_parent_cancel_cancels_callback_token() { let cts = CancellationTokenSource::new(); @@ -1978,7 +1989,8 @@ struct ChainSplitTest { } // Tests value-source disagreement across nodes. -// Expect agreement when a quorum can compare equal values, or halt when no quorum can. +// Expect agreement when a quorum can compare equal values, or halt when no +// quorum can. #[test_case(1, 1, 1, 1, 1, 1, false ; "same_value")] #[test_case(1, 3, 1, 1, 1, 1, false ; "non_leader_peer_has_different_value")] #[test_case(3, 1, 1, 1, 2, 1, false ; "first_leader_has_different_value_second_leader_succeeds")] From 1b3664b98359db9ab9c19439609a59f070d042cb Mon Sep 17 00:00:00 2001 From: Quang Le Date: Tue, 19 May 2026 17:45:41 +0700 Subject: [PATCH 29/34] refactor: introduce QbftTypes --- crates/core/src/qbft/internal_test.rs | 74 +++--- crates/core/src/qbft/mod.rs | 348 ++++++++++---------------- 2 files changed, 174 insertions(+), 248 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 167f2c95..365abf78 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -37,7 +37,15 @@ const TEST_WAIT_TIMEOUT: Duration = Duration::from_secs(1); const TEST_STALL_TIMEOUT: Duration = Duration::from_secs(20); type RunOutcome = std::thread::Result>; -type TestMsgRef = Msg; +type TestMsgRef = Msg; + +struct TestQbft; + +impl QbftTypes for TestQbft { + type Instance = i64; + type Value = i64; + type Compare = i64; +} struct PendingCompareGuard { pending_compares: Arc, @@ -114,16 +122,11 @@ fn test_qbft(test: Test) { // Keep peer iteration deterministic. These fake-clock tests assert exact // rounds, and broadcast fanout order affects which node observes quorums // first when tests run in parallel. - let mut receives = BTreeMap::< - i64, - ( - mpmc::Sender>, - mpmc::Receiver>, - ), - >::new(); + let mut receives = + BTreeMap::>, mpmc::Receiver>)>::new(); let (broadcast_tx, broadcast_rx) = mpmc::unbounded::(); let (unjust_tx, unjust_rx) = mpmc::unbounded::(); - let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); + let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N); let expected_initial_timers = N + test .value_delay @@ -223,7 +226,7 @@ fn test_qbft(test: Test) { thread::scope(|s| { for i in 1..=N as i64 { - let (sender, receiver) = mpmc::bounded::>(1000); + let (sender, receiver) = mpmc::bounded::>(1000); let broadcast_tx = broadcast_tx.clone(); receives.insert(i, (sender.clone(), receiver.clone())); @@ -403,7 +406,7 @@ fn test_qbft(test: Test) { } } - let mut results = BTreeMap::>::new(); + let mut results = BTreeMap::>::new(); let mut count = 0; let mut decided = false; let mut done = 0; @@ -739,8 +742,8 @@ fn new_msg( value_source: i64, pr: i64, pv: i64, - justify: Option<&Vec>>, -) -> Msg { + justify: Option<&Vec>>, +) -> Msg { let msgs = match justify { None => vec![], Some(justify) => justify @@ -790,7 +793,7 @@ fn new_round_change_quorum(round: i64, pr: i64, pv: i64) -> Vec { // messages. fn bcast( broadcast: mpmc::Sender, - msg: Msg, + msg: Msg, jitter_ms: i32, clock: FakeClock, trace: Trace, @@ -893,7 +896,7 @@ fn fanout_broadcast( broadcasts } -fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg { +fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg { let message_types = [ MSG_PRE_PREPARE, MSG_PREPARE, @@ -915,12 +918,12 @@ fn random_msg(instance: i64, peer_idx: i64, seed: u64, counter: u64) -> Msg, target: i64, stream_id: u64) -> f64 { +fn deterministic_unit(seed: u64, msg: &Msg, target: i64, stream_id: u64) -> f64 { let value = deterministic_msg_u64(seed, msg, target, stream_id) >> 11; value as f64 / ((1_u64 << 53) as f64) } -fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, stream_id: u64) -> u64 { +fn deterministic_msg_u64(seed: u64, msg: &Msg, target: i64, stream_id: u64) -> u64 { let mut value = splitmix64(seed ^ stream_id); value = splitmix64(value ^ i64_to_u64(msg.type_().0)); value = splitmix64(value ^ i64_to_u64(msg.instance())); @@ -967,7 +970,7 @@ struct TestMsg { justify: Option>, } -impl SomeMsg for TestMsg { +impl SomeMsg for TestMsg { fn type_(&self) -> MessageType { self.msg_type } @@ -1000,12 +1003,12 @@ impl SomeMsg for TestMsg { self.pv } - fn justification(&self) -> Vec> { + fn justification(&self) -> Vec> { match self.justify { None => vec![], Some(ref j) => j .iter() - .map(|j| Arc::new(j.clone()) as Msg) + .map(|j| Arc::new(j.clone()) as Msg) .collect(), } } @@ -1206,7 +1209,7 @@ fn fuzzed(start_delay_secs: Option, decide_round: i32, random_round: bool) }); } -fn noop_definition() -> Definition { +fn noop_definition() -> Definition { Definition { is_leader: Box::new(|_, _, _| false), new_timer: Box::new(|_| (mpmc::never(), Box::new(|| {}))), @@ -1220,7 +1223,7 @@ fn noop_definition() -> Definition { } } -fn noop_transport() -> Transport { +fn noop_transport() -> Transport { Transport { broadcast: Box::new(|_, _, _, _, _, _, _, _, _| Ok(())), receive: mpmc::never(), @@ -1252,7 +1255,7 @@ fn noop_transport() -> Transport { #[test_case(21, 14, 6 ; "n21")] #[test_case(22, 15, 7 ; "n22")] fn formulas(n: i64, q: i64, f: i64) { - let d = Definition:: { + let d = Definition:: { nodes: n, ..noop_definition() }; @@ -1301,7 +1304,7 @@ fn duplicate_pre_prepare_rules() { const NO_LEADER: i64 = 1; const LEADER: i64 = 2; - let new_preprepare = |round: i64| -> Msg { + let new_preprepare = |round: i64| -> Msg { new_msg( MSG_PRE_PREPARE, 0, @@ -1345,7 +1348,7 @@ fn duplicate_pre_prepare_rules() { return_err.send(result).expect(WRITE_CHAN_ERR); }); - let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); + let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); r_chan_tx.send(new_preprepare(1)).expect(WRITE_CHAN_ERR); r_chan_tx.send(new_preprepare(2)).expect(WRITE_CHAN_ERR); @@ -1442,7 +1445,7 @@ fn run_cancels_under_hot_receive_stream() { def.nodes = 4; def.fifo_limit = 100; - let (receive_tx, receive_rx) = mpmc::bounded::>(1024); + let (receive_tx, receive_rx) = mpmc::bounded::>(1024); let transport = Transport { receive: receive_rx, ..noop_transport() @@ -1969,7 +1972,7 @@ fn run_parent_cancel_during_compare_does_not_prepare() { ); } -fn buffer_by_source(msgs: &[Msg]) -> HashMap>> { +fn buffer_by_source(msgs: &[Msg]) -> HashMap>> { let mut buffer = HashMap::new(); for msg in msgs { buffer @@ -2026,15 +2029,10 @@ fn test_qbft_chain_split(test: ChainSplitTest) { // Keep peer iteration deterministic. These fake-clock tests assert exact // rounds, and broadcast fanout order affects which node observes quorums // first when tests run in parallel. - let mut receives = BTreeMap::< - i64, - ( - mpmc::Sender>, - mpmc::Receiver>, - ), - >::new(); - let (broadcast_tx, broadcast_rx) = mpmc::unbounded::>(); - let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); + let mut receives = + BTreeMap::>, mpmc::Receiver>)>::new(); + let (broadcast_tx, broadcast_rx) = mpmc::unbounded::>(); + let (result_chan_tx, result_chan_rx) = mpmc::bounded::>>(N); let (run_chan_tx, run_chan_rx) = mpmc::bounded::<(i64, RunOutcome)>(N); let instance = 0; @@ -2136,7 +2134,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { thread::scope(|s| { for i in 1..=N as i64 { - let (sender, receiver) = mpmc::bounded::>(1000); + let (sender, receiver) = mpmc::bounded::>(1000); receives.insert(i, (sender.clone(), receiver.clone())); let broadcast_tx = broadcast_tx.clone(); let trace = trace.clone(); @@ -2208,7 +2206,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { } } - let mut results = BTreeMap::>::new(); + let mut results = BTreeMap::>::new(); let mut count = 0; let mut decided = false; let mut done = 0; diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 956f17e8..a22ae4b3 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -38,13 +38,19 @@ type Result = std::result::Result; // does not need sub-millisecond latency, and idle instances should stay cheap. const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(50); -type CompareFn = dyn Fn( +pub trait QbftTypes: 'static { + type Instance: Send + Sync + 'static; + type Value: Eq + Hash + Default + 'static; + type Compare: Clone + Send + Sync + Default + 'static; +} + +type CompareFn = dyn Fn( /* ct */ &CancellationToken, - /* qcommit */ &Msg, - /* input_value_source_ch */ &mpmc::Receiver, - /* input_value_source */ &C, + /* qcommit */ &Msg, + /* input_value_source_ch */ &mpmc::Receiver<::Compare>, + /* input_value_source */ &::Compare, /* return_err */ &mpmc::Sender>, - /* return_value */ &mpmc::Sender, + /* return_value */ &mpmc::Sender<::Compare>, ) + Send + Sync + 'static; @@ -80,10 +86,7 @@ pub enum QbftError { } /// Abstracts the transport layer between processes in the consensus system. -pub struct Transport -where - V: PartialEq, -{ +pub struct Transport { /// Broadcast sends a message with the provided fields to all other /// processes in the system (including this process). /// @@ -92,13 +95,13 @@ where dyn Fn( /* ct */ &CancellationToken, /* type_ */ MessageType, - /* instance */ &I, + /* instance */ &T::Instance, /* source */ i64, /* round */ i64, - /* value */ &V, + /* value */ &T::Value, /* pr */ i64, - /* pv */ &V, - /* justification */ Option<&Vec>>, + /* pv */ &T::Value, + /* justification */ Option<&Vec>>, ) -> Result<()> + Send + Sync, @@ -106,19 +109,23 @@ where /// Receive returns a stream of messages received /// from other processes in the system (including this process). - pub receive: mpmc::Receiver>, + pub receive: mpmc::Receiver>, } /// Defines the consensus system parameters that are external to the qbft /// algorithm. This remains constant across multiple instances of consensus /// (calls to `run`). -pub struct Definition -where - V: PartialEq, -{ +pub struct Definition { /// A deterministic leader election function. - pub is_leader: - Box bool + Send + Sync>, + pub is_leader: Box< + dyn Fn( + /* instance */ &T::Instance, + /* round */ i64, + /* process */ i64, + ) -> bool + + Send + + Sync, + >, /// Returns a new timer channel and stop function for the round pub new_timer: Box< @@ -130,15 +137,15 @@ where /// Charon parity hook called when the leader proposes a value. The core /// algorithm only runs this callback and reacts to its result; any /// value-source comparison policy belongs to the caller. - pub compare: sync::Arc>, + pub compare: sync::Arc>, /// Called when consensus has been reached on a value. pub decide: Box< dyn Fn( /* ct */ &CancellationToken, - /* instance */ &I, - /* value */ &V, - /* qcommit */ &Vec>, + /* instance */ &T::Instance, + /* value */ &T::Value, + /* qcommit */ &Vec>, ) + Send + Sync, >, @@ -147,10 +154,10 @@ where /// It includes the rule that triggered it and all received round messages. pub log_upon_rule: Box< dyn Fn( - /* instance */ &I, + /* instance */ &T::Instance, /* process */ i64, /* round */ i64, - /* msg */ &Msg, + /* msg */ &Msg, /* upon_rule */ UponRule, ) + Send + Sync, @@ -158,19 +165,19 @@ where /// Allows debug logging of round changes. pub log_round_change: Box< dyn Fn( - /* instance */ &I, + /* instance */ &T::Instance, /* process */ i64, /* round */ i64, /* new_round */ i64, /* upon_rule */ UponRule, - /* msgs */ &Vec>, + /* msgs */ &Vec>, ) + Send + Sync, >, /// Allows debug logging of unjust messages. pub log_unjust: - Box) + Send + Sync>, + Box) + Send + Sync>, /// Total number of nodes/processes participating in consensus. pub nodes: i64, @@ -179,10 +186,7 @@ where pub fifo_limit: i64, } -impl Definition -where - V: PartialEq, -{ +impl Definition { /// Quorum count for the system. /// See IBFT 2.0 paper for correct formula: pub fn quorum(&self) -> i64 { @@ -235,35 +239,32 @@ impl Display for MessageType { } /// Defines the inter process messages. -pub trait SomeMsg: Send + Sync + fmt::Debug -where - V: PartialEq, -{ +pub trait SomeMsg: Send + Sync + fmt::Debug { /// Type of the message. fn type_(&self) -> MessageType; /// Consensus instance. - fn instance(&self) -> I; + fn instance(&self) -> T::Instance; /// Process that sent the message. fn source(&self) -> i64; /// The round the message pertains to. fn round(&self) -> i64; /// The value being proposed, usually a hash. - fn value(&self) -> V; + fn value(&self) -> T::Value; /// Usually the value that was hashed and is returned in `value`. - fn value_source(&self) -> Result; + fn value_source(&self) -> Result; /// The justified prepared round. fn prepared_round(&self) -> i64; /// The justified prepared value. - fn prepared_value(&self) -> V; + fn prepared_value(&self) -> T::Value; /// Set of messages that explicitly justifies this message. - fn justification(&self) -> Vec>; + fn justification(&self) -> Vec>; /// Cast as `Any` to allow downcasting. fn as_any(&self) -> &dyn any::Any; } /// Alias for any `Msg` implementation tracked by reference counting. -pub type Msg = sync::Arc>; +pub type Msg = sync::Arc>; /// Defines the event based rules that are triggered when messages are received. #[derive(PartialEq, Eq, Hash, Clone, Copy)] @@ -311,36 +312,31 @@ struct DedupKey { /// remains active so it can answer later `ROUND_CHANGE` messages with `DECIDED` /// catch-up messages. /// -/// `I` identifies the consensus instance, `V` is the comparable proposed value, -/// and `C` is the application value used by `Definition::compare` to compare a +/// `T::Instance` identifies the consensus instance, `T::Value` is the comparable proposed value, +/// and `T::Compare` is the application value used by `Definition::compare` to compare a /// leader proposal with the local input source. -pub fn run( +pub fn run( ct: &CancellationToken, - d: &Definition, - t: &Transport, - instance: &I, + d: &Definition, + t: &Transport, + instance: &T::Instance, process: i64, - mut input_value_ch: mpmc::Receiver, - input_value_source_ch: mpmc::Receiver, -) -> Result<()> -where - I: Send + Sync + 'static, - V: PartialEq + Eq + Hash + Default + 'static, - C: Clone + Send + Sync + Default + 'static, -{ + mut input_value_ch: mpmc::Receiver, + input_value_source_ch: mpmc::Receiver, +) -> Result<()> { validate_definition(d)?; // === State === let round: Cell = Cell::new(1); - let input_value: RefCell = RefCell::new(Default::default()); - let mut input_value_source: C = Default::default(); - let ppj_cache: RefCell>>> = RefCell::new(None); // Cached pre-prepare justification for the current round (`None` value is unset). + let input_value: RefCell = RefCell::new(Default::default()); + let mut input_value_source: T::Compare = Default::default(); + let ppj_cache: RefCell>>> = RefCell::new(None); // Cached pre-prepare justification for the current round (`None` value is unset). let prepared_round: Cell = Cell::new(0); - let prepared_value: RefCell = RefCell::new(Default::default()); + let prepared_value: RefCell = RefCell::new(Default::default()); let mut compare_failure_round: i64 = 0; - let prepared_justification: RefCell>>> = RefCell::new(None); - let mut q_commit: Option>> = None; - let buffer: RefCell>>> = RefCell::new(HashMap::new()); + let prepared_justification: RefCell>>> = RefCell::new(None); + let mut q_commit: Option>> = None; + let buffer: RefCell>>> = RefCell::new(HashMap::new()); let dedup_rules: RefCell> = RefCell::new(HashMap::new()); let mut timer_chan: mpmc::Receiver; let mut stop_timer: Box; @@ -349,7 +345,7 @@ where // Broadcasts a non-ROUND-CHANGE message for current round. let broadcast_msg = - |type_: MessageType, value: &V, justification: Option<&Vec>>| { + |type_: MessageType, value: &T::Value, justification: Option<&Vec>>| { (t.broadcast)( ct, type_, @@ -380,7 +376,7 @@ where // Broadcasts a PRE-PREPARE message with current state // and our own input value if present, otherwise it caches the justification // to be used when the input value becomes available. - let broadcast_own_pre_prepare = |justification: Vec>| { + let broadcast_own_pre_prepare = |justification: Vec>| { if ppj_cache.borrow().is_some() { panic!("bug: justification cache must be none") } @@ -395,7 +391,7 @@ where }; // Adds a message to each process' FIFO queue - let buffer_msg = |msg: &Msg| { + let buffer_msg = |msg: &Msg| { let mut b = buffer.borrow_mut(); let fifo = b.entry(msg.source()).or_default(); @@ -623,10 +619,7 @@ where } } -fn validate_definition(d: &Definition) -> Result<()> -where - V: PartialEq, -{ +fn validate_definition(d: &Definition) -> Result<()> { if d.nodes <= 0 { return Err(QbftError::InvalidNodes { nodes: d.nodes }); } @@ -643,21 +636,16 @@ where /// The callback may cache the local input source and return success/failure. /// This helper only preserves that callback result and lets the round timer win /// if the callback blocks. -fn compare( +fn compare( ct: &CancellationToken, - d: &Definition, - msg: &Msg, - input_value_source_ch: &mpmc::Receiver, - input_value_source: C, + d: &Definition, + msg: &Msg, + input_value_source_ch: &mpmc::Receiver, + input_value_source: T::Compare, timer_chan: &mpmc::Receiver, -) -> (C, Result<()>) -where - I: Send + Sync + 'static, - V: PartialEq + 'static, - C: Clone + Send + Sync + 'static, -{ +) -> (T::Compare, Result<()>) { let (compare_err_tx, mut compare_err_rx) = mpmc::bounded::>(1); - let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::(1); + let (compare_value_tx, mut compare_value_rx) = mpmc::bounded::(1); // d.Compare has 2 roles: // 1. Read from the `input_value_source_ch` (if `input_value_source` is empty). @@ -748,13 +736,10 @@ where } /// Returns all messages from the provided round. -fn extract_round_messages( - buffer: &HashMap>>, +fn extract_round_messages( + buffer: &HashMap>>, round: i64, -) -> Vec> -where - V: PartialEq, -{ +) -> Vec> { let mut resp = vec![]; for msgs in buffer.values() { @@ -770,17 +755,14 @@ where /// Returns the rule triggered upon receipt of the last message and its /// justifications. -fn classify( - d: &Definition, - instance: &I, +fn classify( + d: &Definition, + instance: &T::Instance, round: i64, process: i64, - buffer: &HashMap>>, - msg: &Msg, -) -> (UponRule, Option>>) -where - V: Eq + Hash + Default, -{ + buffer: &HashMap>>, + msg: &Msg, +) -> (UponRule, Option>>) { match msg.type_() { MSG_DECIDED => (UPON_JUSTIFIED_DECIDED, Some(msg.justification())), MSG_PRE_PREPARE => { @@ -861,10 +843,7 @@ where /// Implements algorithm 3:6 and returns the next minimum round from received /// round change messages. -fn next_min_round(d: &Definition, frc: &Vec>, round: i64) -> i64 -where - V: PartialEq, -{ +fn next_min_round(d: &Definition, frc: &Vec>, round: i64) -> i64 { // Get all RoundChange messages with round (rj) higher than current round (ri) if (frc.len() as i64) < d.faulty() + 1 { panic!("bug: Frc too short"); @@ -889,15 +868,12 @@ where } /// Returns true if message is justified or if it does not need justification. -fn is_justified( - d: &Definition, - instance: &I, - msg: &Msg, +fn is_justified( + d: &Definition, + instance: &T::Instance, + msg: &Msg, compare_failure_round: i64, -) -> bool -where - V: Eq + Hash + Default, -{ +) -> bool { match msg.type_() { MSG_PRE_PREPARE => is_justified_pre_prepare(d, instance, msg, compare_failure_round), MSG_PREPARE => true, @@ -910,10 +886,7 @@ where /// Returns true if the ROUND_CHANGE message's prepared round and value is /// justified. -fn is_justified_round_change(d: &Definition, msg: &Msg) -> bool -where - V: PartialEq + Default, -{ +fn is_justified_round_change(d: &Definition, msg: &Msg) -> bool { if msg.type_() != MSG_ROUND_CHANGE { panic!("bug: not a round change message"); } @@ -942,7 +915,7 @@ where return false; } - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source::(vec![]); for prepare in prepares { if !uniq(&prepare) { return false; @@ -964,20 +937,14 @@ where true } -fn valid_round_change_prepared_round(msg: &Msg) -> bool -where - V: PartialEq, -{ +fn valid_round_change_prepared_round(msg: &Msg) -> bool { let pr = msg.prepared_round(); pr >= 0 && pr < msg.round() } /// Returns true if the decided message is justified by quorum COMMIT messages /// of identical round and value. -fn is_justified_decided(d: &Definition, msg: &Msg) -> bool -where - V: PartialEq, -{ +fn is_justified_decided(d: &Definition, msg: &Msg) -> bool { if msg.type_() != MSG_DECIDED { panic!("bug: not a decided message"); } @@ -996,15 +963,12 @@ where } /// Returns true if the PRE-PREPARE message is justified. -fn is_justified_pre_prepare( - d: &Definition, - instance: &I, - msg: &Msg, +fn is_justified_pre_prepare( + d: &Definition, + instance: &T::Instance, + msg: &Msg, compare_failure_round: i64, -) -> bool -where - V: Eq + Hash + Default, -{ +) -> bool { if msg.type_() != MSG_PRE_PREPARE { panic!("bug: not a preprepare message"); } @@ -1032,14 +996,11 @@ where /// Implements algorithm 4:1 and returns true and pv if the messages contains a /// justified quorum ROUND_CHANGEs (Qrc). -fn contains_justified_qrc( - d: &Definition, - justification: &Vec>, +fn contains_justified_qrc( + d: &Definition, + justification: &Vec>, round: i64, -) -> Option -where - V: Eq + Hash + Default, -{ +) -> Option { let qrc = filter_round_change(justification, round) .into_iter() .filter(valid_round_change_prepared_round) @@ -1089,17 +1050,14 @@ where /// Extracts the single justified Pr and Pv from quorum PREPARES in list of /// messages. It expects only one possible combination. -fn get_single_justified_pr_pv( - d: &Definition, - msgs: &Vec>, -) -> Option<(i64, V)> -where - V: Eq + Hash + Default, -{ +fn get_single_justified_pr_pv( + d: &Definition, + msgs: &Vec>, +) -> Option<(i64, T::Value)> { let mut pr: i64 = 0; - let mut pv: V = Default::default(); + let mut pv: T::Value = Default::default(); let mut count: i64 = 0; - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source::(vec![]); for msg in msgs { if msg.type_() != MSG_PREPARE { @@ -1128,14 +1086,11 @@ where } /// Implements algorithm 4:1 and returns a justified quorum ROUND_CHANGEs (Qrc) -fn get_justified_qrc( - d: &Definition, - all: &Vec>, +fn get_justified_qrc( + d: &Definition, + all: &Vec>, round: i64, -) -> Option>> -where - V: Eq + Hash + Default, -{ +) -> Option>> { if let (qrc, true) = quorum_null_prepared(d, all, round) { // Return any quorum null pv ROUND_CHANGE messages as Qrc. return Some(qrc); @@ -1149,11 +1104,11 @@ where for prepares in get_prepare_quorums(d, all) { // See if we have quorum ROUND-CHANGE with HIGHEST_PREPARED(qrc) == // prepares.Round. - let mut qrc: Vec> = vec![]; + let mut qrc: Vec> = vec![]; let mut has_highest_prepared = false; let pr = prepares[0].round(); let pv = prepares[0].value(); - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source::(vec![]); for rc in round_changes.iter() { if rc.prepared_round() > pr { @@ -1183,15 +1138,12 @@ where /// Returns true and Faulty+1 ROUND-CHANGE messages (Frc) with the rounds higher /// than the provided round. It returns the highest round per process in order /// to jump furthest. -fn get_fplus1_round_changes( - d: &Definition, - all: &Vec>, +fn get_fplus1_round_changes( + d: &Definition, + all: &Vec>, round: i64, -) -> Option>> -where - V: PartialEq, -{ - let mut highest_by_source = HashMap::>::new(); +) -> Option>> { + let mut highest_by_source = HashMap::>::new(); for msg in all { if msg.type_() != MSG_ROUND_CHANGE { @@ -1236,14 +1188,8 @@ where /// Returns all unique-source PREPARE quorums grouped by identical round and /// value. -fn get_prepare_quorums( - d: &Definition, - all: &Vec>, -) -> Vec>> -where - V: Eq + Hash, -{ - let mut sets = HashMap::, HashMap>>::new(); +fn get_prepare_quorums(d: &Definition, all: &Vec>) -> Vec>> { + let mut sets = HashMap::, HashMap>>::new(); for msg in all { if msg.type_() != MSG_PREPARE { @@ -1281,14 +1227,11 @@ where /// Implements condition J1 and returns Qrc and true if a quorum /// of round changes messages (Qrc) for the round have null prepared round and /// value. -fn quorum_null_prepared( - d: &Definition, - all: &Vec>, +fn quorum_null_prepared( + d: &Definition, + all: &Vec>, round: i64, -) -> (Vec>, bool) -where - V: PartialEq + Default, -{ +) -> (Vec>, bool) { let null_pr = Default::default(); let null_pv = Some(&Default::default()); @@ -1301,41 +1244,32 @@ where } /// Returns the messages matching the type and value. -fn filter_by_round_and_value( - msgs: &Vec>, +fn filter_by_round_and_value( + msgs: &Vec>, message_type: MessageType, round: i64, - value: V, -) -> Vec> -where - V: PartialEq, -{ + value: T::Value, +) -> Vec> { filter_msgs(msgs, message_type, round, Some(&value), None, None) } /// Returns all round change messages for the provided round. -fn filter_round_change(msgs: &Vec>, round: i64) -> Vec> -where - V: PartialEq, -{ - filter_msgs::(msgs, MSG_ROUND_CHANGE, round, None, None, None) +fn filter_round_change(msgs: &Vec>, round: i64) -> Vec> { + filter_msgs::(msgs, MSG_ROUND_CHANGE, round, None, None, None) } /// Returns one message per process matching the provided type and round and /// optional value, pr, pv. -fn filter_msgs( - msgs: &Vec>, +fn filter_msgs( + msgs: &Vec>, message_type: MessageType, round: i64, - value: Option<&V>, + value: Option<&T::Value>, pr: Option, - pv: Option<&V>, -) -> Vec> -where - V: PartialEq, -{ + pv: Option<&T::Value>, +) -> Vec> { let mut resp = Vec::new(); - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source::(vec![]); for msg in msgs { if message_type != msg.type_() { @@ -1374,11 +1308,8 @@ where /// Produce a vector containing all the buffered messages as well as all their /// justifications. -fn flatten(buffer: &HashMap>>) -> Vec> -where - V: PartialEq, -{ - let mut resp: Vec> = Vec::new(); +fn flatten(buffer: &HashMap>>) -> Vec> { + let mut resp: Vec> = Vec::new(); for msgs in buffer.values() { for msg in msgs { @@ -1397,12 +1328,9 @@ where /// Construct a function that returns true if the message is from a unique /// source. -fn uniq_source(vec: Vec>) -> Box) -> bool> -where - V: PartialEq, -{ +fn uniq_source(vec: Vec>) -> Box) -> bool> { let mut s = vec.iter().map(|msg| msg.source()).collect::>(); - Box::new(move |msg: &Msg| { + Box::new(move |msg: &Msg| { let source = msg.source(); if s.contains(&source) { false From 132bc4f845e1593d3e11bf17846fc66f3d6645ed Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 23:41:38 +0700 Subject: [PATCH 30/34] refactor: group QBFT callbacks into typed request/logger structs --- crates/core/src/qbft/internal_test.rs | 635 ++++++++++++++++---------- crates/core/src/qbft/mod.rs | 160 +++---- 2 files changed, 466 insertions(+), 329 deletions(-) diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 365abf78..66745340 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -160,67 +160,69 @@ fn test_qbft(test: Test) { }, compare: { let pending_compares = pending_compares.clone(); - Arc::new(move |_, _, _, _, return_err, _| { + Arc::new(move |req| { let _guard = PendingCompareGuard { pending_compares: pending_compares.clone(), }; - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }) }, nodes: N as i64, fifo_limit: FIFO_LIMIT as i64, - log_round_change: { - let clock = clock.clone(); - let trace = trace.clone(); - let pending_timer_actions = pending_timer_actions.clone(); - - Box::new(move |_, process, round, new_round, upon_rule, _| { - if upon_rule == UPON_ROUND_TIMEOUT { - complete_timer_action(&pending_timer_actions); - } + logger: QbftLogger { + round_change: { + let clock = clock.clone(); + let trace = trace.clone(); + let pending_timer_actions = pending_timer_actions.clone(); + + Box::new(move |_, process, round, new_round, upon_rule, _| { + if upon_rule == UPON_ROUND_TIMEOUT { + complete_timer_action(&pending_timer_actions); + } - trace.push(format!( - "{:?} - {}@{} change to {} ~= {}", - clock.elapsed(), - process, - round, - new_round, - upon_rule, - )); - }) - }, - log_unjust: { - let trace = trace.clone(); - let unjust_tx = unjust_tx.clone(); - let fuzz = test.fuzz; - Box::new(move |_, process, msg| { - let line = format!("Unjust: process={} msg={:?}", process, msg); - trace.push(line.clone()); - if !fuzz { - unjust_tx.send(line).expect(WRITE_CHAN_ERR); - } - }) - }, - log_upon_rule: { - let clock = clock.clone(); - let trace = trace.clone(); - let pending_compares = pending_compares.clone(); - Box::new(move |_, process, round, msg, upon_rule| { - if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { - pending_compares.fetch_add(1, Ordering::SeqCst); - } + trace.push(format!( + "{:?} - {}@{} change to {} ~= {}", + clock.elapsed(), + process, + round, + new_round, + upon_rule, + )); + }) + }, + unjust: { + let trace = trace.clone(); + let unjust_tx = unjust_tx.clone(); + let fuzz = test.fuzz; + Box::new(move |_, process, msg| { + let line = format!("Unjust: process={} msg={:?}", process, msg); + trace.push(line.clone()); + if !fuzz { + unjust_tx.send(line).expect(WRITE_CHAN_ERR); + } + }) + }, + upon_rule: { + let clock = clock.clone(); + let trace = trace.clone(); + let pending_compares = pending_compares.clone(); + Box::new(move |_, process, round, msg, upon_rule| { + if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + pending_compares.fetch_add(1, Ordering::SeqCst); + } - trace.push(format!( - "{:?} {} => {}@{} -> {}@{} ~= {}", - clock.elapsed(), - msg.source(), - msg.type_(), - msg.round(), - process, - round, - upon_rule, - )); - }) + trace.push(format!( + "{:?} {} => {}@{} -> {}@{} ~= {}", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + process, + round, + upon_rule, + )); + }) + }, }, }); @@ -235,55 +237,53 @@ fn test_qbft(test: Test) { let clock = clock.clone(); let trace = trace.clone(); - Box::new( - move |_, type_, instance, source, round, value, pr, pv, justification| { - if round > MAX_ROUND as i64 { - return Err(QbftError::MaxRoundReached); - } - - if type_ == MSG_COMMIT && round <= test.commits_after.into() { - trace.push(format!( - "{:?} {} dropping commit for round {}", - clock.elapsed(), - source, - round - )); - return Ok(()); - } + Box::new(move |req| { + if req.round > MAX_ROUND as i64 { + return Err(QbftError::MaxRoundReached); + } + if req.type_ == MSG_COMMIT && req.round <= test.commits_after.into() { trace.push(format!( - "{:?} {} => {}@{}", + "{:?} {} dropping commit for round {}", clock.elapsed(), - source, - type_, - round + req.source, + req.round )); + return Ok(()); + } - let msg = new_msg( - type_, - *instance, - source, - round, - *value, - *value, - pr, - *pv, - justification, - ); - sender.send(msg.clone()).expect(WRITE_CHAN_ERR); - - bcast( - broadcast_tx.clone(), - msg.clone(), - test.bcast_jitter_ms, - clock.clone(), - trace.clone(), - seed, - ); + trace.push(format!( + "{:?} {} => {}@{}", + clock.elapsed(), + req.source, + req.type_, + req.round + )); - Ok(()) - }, - ) + let msg = new_msg( + req.type_, + *req.instance, + req.source, + req.round, + *req.value, + *req.value, + req.prepared_round, + *req.prepared_value, + req.justification, + ); + sender.send(msg.clone()).expect(WRITE_CHAN_ERR); + + bcast( + broadcast_tx.clone(), + msg.clone(), + test.bcast_jitter_ms, + clock.clone(), + trace.clone(), + seed, + ); + + Ok(()) + }) }, receive: receiver.clone(), }; @@ -1214,22 +1214,166 @@ fn noop_definition() -> Definition { is_leader: Box::new(|_, _, _| false), new_timer: Box::new(|_| (mpmc::never(), Box::new(|| {}))), decide: Box::new(|_, _, _, _| {}), - compare: Arc::new(|_, _, _, _, _, _| {}), + compare: Arc::new(|_| {}), nodes: 0, fifo_limit: 0, - log_round_change: Box::new(|_, _, _, _, _, _| {}), - log_unjust: Box::new(|_, _, _| {}), - log_upon_rule: Box::new(|_, _, _, _, _| {}), + logger: QbftLogger { + round_change: Box::new(|_, _, _, _, _, _| {}), + unjust: Box::new(|_, _, _| {}), + upon_rule: Box::new(|_, _, _, _, _| {}), + }, } } fn noop_transport() -> Transport { Transport { - broadcast: Box::new(|_, _, _, _, _, _, _, _, _| Ok(())), + broadcast: Box::new(|_| Ok(())), receive: mpmc::never(), } } +#[derive(Debug, PartialEq, Eq)] +struct BroadcastRecord { + canceled: bool, + type_: MessageType, + instance: i64, + source: i64, + round: i64, + value: i64, + prepared_round: i64, + prepared_value: i64, + justification_len: usize, +} + +#[test] +fn broadcast_request_maps_protocol_fields() { + let (receive_tx, receive_rx) = mpmc::bounded::>(4); + receive_tx + .send(new_msg(MSG_PRE_PREPARE, 0, 1, 1, 7, 7, 0, 0, None)) + .expect(WRITE_CHAN_ERR); + for source in 1..=3 { + receive_tx + .send(new_msg(MSG_PREPARE, 0, source, 1, 7, 7, 0, 0, None)) + .expect(WRITE_CHAN_ERR); + } + + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let (record_tx, record_rx) = mpmc::unbounded(); + let transport = Transport { + broadcast: Box::new(move |req| { + record_tx + .send(BroadcastRecord { + canceled: req.ct.is_canceled(), + type_: req.type_, + instance: *req.instance, + source: req.source, + round: req.round, + value: *req.value, + prepared_round: req.prepared_round, + prepared_value: *req.prepared_value, + justification_len: req.justification.map_or(0, Vec::len), + }) + .expect(WRITE_CHAN_ERR); + if req.type_ == MSG_COMMIT { + cts.cancel(); + } + Ok(()) + }), + receive: receive_rx, + }; + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; + def.is_leader = Box::new(|_, _, process| process == 1); + def.compare = Arc::new(|req| req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR)); + + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + assert!(matches!( + qbft::run(&token, &def, &transport, &0, 2, input_rx, source_rx), + Err(QbftError::ContextCanceled) + )); + assert_eq!( + record_rx.try_iter().collect::>(), + vec![ + BroadcastRecord { + canceled: false, + type_: MSG_PREPARE, + instance: 0, + source: 2, + round: 1, + value: 7, + prepared_round: 0, + prepared_value: 0, + justification_len: 0, + }, + BroadcastRecord { + canceled: false, + type_: MSG_COMMIT, + instance: 0, + source: 2, + round: 1, + value: 7, + prepared_round: 0, + prepared_value: 0, + justification_len: 0, + }, + ] + ); + + let (timer_tx, timer_rx) = mpmc::bounded(1); + timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); + let cts = CancellationTokenSource::new(); + let token = cts.token().clone(); + let (record_tx, record_rx) = mpmc::unbounded(); + let transport = Transport { + broadcast: Box::new(move |req| { + record_tx + .send(BroadcastRecord { + canceled: req.ct.is_canceled(), + type_: req.type_, + instance: *req.instance, + source: req.source, + round: req.round, + value: *req.value, + prepared_round: req.prepared_round, + prepared_value: *req.prepared_value, + justification_len: req.justification.map_or(0, Vec::len), + }) + .expect(WRITE_CHAN_ERR); + cts.cancel(); + Ok(()) + }), + receive: mpmc::never(), + }; + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 100; + def.new_timer = Box::new(move |_| (timer_rx.clone(), Box::new(|| {}))); + + let (_input_tx, input_rx) = mpmc::bounded::(1); + let (_source_tx, source_rx) = mpmc::bounded::(1); + assert!(matches!( + qbft::run(&token, &def, &transport, &0, 2, input_rx, source_rx), + Err(QbftError::ContextCanceled) + )); + assert_eq!( + record_rx.try_iter().collect::>(), + vec![BroadcastRecord { + canceled: false, + type_: MSG_ROUND_CHANGE, + instance: 0, + source: 2, + round: 2, + value: 0, + prepared_round: 0, + prepared_value: 0, + justification_len: 0, + }] + ); +} + // Tests quorum/faulty formulas across node counts. // Expect quorum and tolerated-fault counts to match the Charon formula. #[test_case(1, 1, 0 ; "n1")] @@ -1323,7 +1467,7 @@ fn duplicate_pre_prepare_rules() { def.nodes = 4; def.fifo_limit = 100; def.is_leader = Box::new(|_, _, process| process == LEADER); - def.log_upon_rule = Box::new(move |_, _, round, msg, upon_rule| { + def.logger.upon_rule = Box::new(move |_, _, round, msg, upon_rule| { println!("UponRule: rule={} round={} ", upon_rule, msg.round()); assert!(upon_rule == UPON_JUSTIFIED_PRE_PREPARE); @@ -1339,13 +1483,13 @@ fn duplicate_pre_prepare_rules() { panic!("unexpected round {}", round); }); - def.compare = Arc::new(|_, msg, _, _, return_err, _| { - let result = if msg.round() == 1 { + def.compare = Arc::new(|req| { + let result = if req.qcommit.round() == 1 { Err(QbftError::CompareError) } else { Ok(()) }; - return_err.send(result).expect(WRITE_CHAN_ERR); + req.return_err.send(result).expect(WRITE_CHAN_ERR); }); let (r_chan_tx, r_chan_rx) = mpmc::bounded::>(2); @@ -1695,8 +1839,8 @@ fn compare_success_error_cached_value_source_and_timeout() { let (_vs_tx, vs_rx) = mpmc::bounded::(1); let timer = mpmc::never(); let mut def = noop_definition(); - def.compare = Arc::new(|_, _, _, _, return_err, _| { - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + def.compare = Arc::new(|req| { + req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), @@ -1704,8 +1848,8 @@ fn compare_success_error_cached_value_source_and_timeout() { )); let mut def = noop_definition(); - def.compare = Arc::new(|_, _, _, _, return_err, _| { - let return_err = return_err.clone(); + def.compare = Arc::new(|req| { + let return_err = req.return_err.clone(); thread::spawn(move || { thread::sleep(Duration::from_millis(10)); return_err.send(Ok(())).expect(WRITE_CHAN_ERR); @@ -1717,8 +1861,8 @@ fn compare_success_error_cached_value_source_and_timeout() { )); let mut def = noop_definition(); - def.compare = Arc::new(|_, _, _, _, return_err, _| { - return_err + def.compare = Arc::new(|req| { + req.return_err .send(Err(QbftError::CompareError)) .expect(WRITE_CHAN_ERR); }); @@ -1730,19 +1874,17 @@ fn compare_success_error_cached_value_source_and_timeout() { let (vs_tx, vs_rx) = mpmc::bounded::(1); vs_tx.send(42).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Arc::new( - |_, _, input_value_source_ch, input_value_source, return_err, return_value| { - let cached = if *input_value_source == 0 { - let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); - return_value.send(value).expect(WRITE_CHAN_ERR); - value - } else { - *input_value_source - }; - assert_eq!(42, cached); - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); - }, - ); + def.compare = Arc::new(|req| { + let cached = if *req.input_value_source == 0 { + let value = req.input_value_source_ch.recv().expect(READ_CHAN_ERR); + req.return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *req.input_value_source + }; + assert_eq!(42, cached); + req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR); + }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), (42, Ok(())) @@ -1751,21 +1893,19 @@ fn compare_success_error_cached_value_source_and_timeout() { let (vs_tx, vs_rx) = mpmc::bounded::(1); vs_tx.send(43).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Arc::new( - |_, _, input_value_source_ch, input_value_source, return_err, return_value| { - let cached = if *input_value_source == 0 { - let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); - return_value.send(value).expect(WRITE_CHAN_ERR); - value - } else { - *input_value_source - }; - assert_eq!(43, cached); - return_err - .send(Err(QbftError::CompareError)) - .expect(WRITE_CHAN_ERR); - }, - ); + def.compare = Arc::new(|req| { + let cached = if *req.input_value_source == 0 { + let value = req.input_value_source_ch.recv().expect(READ_CHAN_ERR); + req.return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *req.input_value_source + }; + assert_eq!(43, cached); + req.return_err + .send(Err(QbftError::CompareError)) + .expect(WRITE_CHAN_ERR); + }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 0, &timer), (43, Err(QbftError::CompareError)) @@ -1774,9 +1914,9 @@ fn compare_success_error_cached_value_source_and_timeout() { let (timer_tx, timer_rx) = mpmc::bounded(1); timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Arc::new(|_, _, _, _, return_err, _| { + def.compare = Arc::new(|req| { thread::sleep(Duration::from_millis(20)); - let _ = return_err.send(Ok(())); + let _ = req.return_err.send(Ok(())); }); assert!(matches!( compare(cts.token(), &def, &msg, &vs_rx, 44, &timer_rx), @@ -1795,11 +1935,11 @@ fn compare_timeout_does_not_wait_for_blocked_callback() { timer_tx.send(time::Instant::now()).expect(WRITE_CHAN_ERR); let mut def = noop_definition(); - def.compare = Arc::new(|ct, _, _, _, return_err, _| { - while !ct.is_canceled() { + def.compare = Arc::new(|req| { + while !req.ct.is_canceled() { thread::sleep(Duration::from_millis(1)); } - let _ = return_err.send(Ok(())); + let _ = req.return_err.send(Ok(())); }); let (result_tx, result_rx) = mpmc::bounded(1); @@ -1829,7 +1969,7 @@ fn compare_callback_exit_without_status_waits_for_timer() { let (callback_done_tx, callback_done_rx) = mpmc::bounded(1); let mut def = noop_definition(); - def.compare = Arc::new(move |_, _, _, _, _, _| { + def.compare = Arc::new(move |_| { callback_done_tx.send(()).expect(WRITE_CHAN_ERR); }); @@ -1871,13 +2011,13 @@ fn compare_parent_cancel_cancels_callback_token() { let (token_cancelled_tx, token_cancelled_rx) = mpmc::bounded(1); let mut def = noop_definition(); - def.compare = Arc::new(move |ct, _, _, _, return_err, _| { + def.compare = Arc::new(move |req| { compare_started_tx.send(()).expect(WRITE_CHAN_ERR); - while !ct.is_canceled() { + while !req.ct.is_canceled() { thread::sleep(Duration::from_millis(1)); } token_cancelled_tx.send(()).expect(WRITE_CHAN_ERR); - let _ = return_err.send(Ok(())); + let _ = req.return_err.send(Ok(())); }); let (result_tx, result_rx) = mpmc::bounded(1); @@ -1923,19 +2063,19 @@ fn run_parent_cancel_during_compare_does_not_prepare() { def.nodes = 4; def.fifo_limit = 100; def.is_leader = Box::new(|_, _, process| process == LEADER); - def.compare = Arc::new(move |ct, _, _, _, return_err, _| { + def.compare = Arc::new(move |req| { compare_started_tx.send(()).expect(WRITE_CHAN_ERR); - while !ct.is_canceled() { + while !req.ct.is_canceled() { thread::sleep(Duration::from_millis(1)); } compare_cancelled_tx.send(()).expect(WRITE_CHAN_ERR); - let _ = return_err.send(Ok(())); + let _ = req.return_err.send(Ok(())); }); let (broadcast_tx, broadcast_rx) = mpmc::bounded(1); let transport = Transport { - broadcast: Box::new(move |_, type_, _, _, _, _, _, _, _| { - broadcast_tx.send(type_).expect(WRITE_CHAN_ERR); + broadcast: Box::new(move |req| { + broadcast_tx.send(req.type_).expect(WRITE_CHAN_ERR); Ok(()) }), receive: receive_rx, @@ -2052,83 +2192,78 @@ fn test_qbft_chain_split(test: ChainSplitTest) { }, compare: { let pending_compares = pending_compares.clone(); - Arc::new( - move |_, - qcommit, - input_value_source_ch, - input_value_source, - return_err, - return_value| { - let _guard = PendingCompareGuard { - pending_compares: pending_compares.clone(), - }; - let leader_value_source = qcommit.value_source().expect("value source"); - let local = if *input_value_source == 0 { - let value = input_value_source_ch.recv().expect(READ_CHAN_ERR); - return_value.send(value).expect(WRITE_CHAN_ERR); - value - } else { - *input_value_source - }; - - if leader_value_source != local { - return_err - .send(Err(QbftError::CompareError)) - .expect(WRITE_CHAN_ERR); - return; - } + Arc::new(move |req| { + let _guard = PendingCompareGuard { + pending_compares: pending_compares.clone(), + }; + let leader_value_source = req.qcommit.value_source().expect("value source"); + let local = if *req.input_value_source == 0 { + let value = req.input_value_source_ch.recv().expect(READ_CHAN_ERR); + req.return_value.send(value).expect(WRITE_CHAN_ERR); + value + } else { + *req.input_value_source + }; - return_err.send(Ok(())).expect(WRITE_CHAN_ERR); - }, - ) - }, - nodes: N as i64, - fifo_limit: FIFO_LIMIT, - log_round_change: { - let clock = clock.clone(); - let trace = trace.clone(); - let pending_timer_actions = pending_timer_actions.clone(); - Box::new(move |_, process, round, new_round, upon_rule, _| { - if upon_rule == UPON_ROUND_TIMEOUT { - complete_timer_action(&pending_timer_actions); + if leader_value_source != local { + req.return_err + .send(Err(QbftError::CompareError)) + .expect(WRITE_CHAN_ERR); + return; } - trace.push(format!( - "{:?} - {}@{} change to {} ~= {}", - clock.elapsed(), - process, - round, - new_round, - upon_rule - )); - }) - }, - log_unjust: { - let trace = trace.clone(); - Box::new(move |_, process, msg| { - trace.push(format!("Unjust: process={} msg={:?}", process, msg)) + req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR); }) }, - log_upon_rule: { - let clock = clock.clone(); - let trace = trace.clone(); - let pending_compares = pending_compares.clone(); - Box::new(move |_, process, round, msg, upon_rule| { - if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { - pending_compares.fetch_add(1, Ordering::SeqCst); - } + nodes: N as i64, + fifo_limit: FIFO_LIMIT, + logger: QbftLogger { + round_change: { + let clock = clock.clone(); + let trace = trace.clone(); + let pending_timer_actions = pending_timer_actions.clone(); + Box::new(move |_, process, round, new_round, upon_rule, _| { + if upon_rule == UPON_ROUND_TIMEOUT { + complete_timer_action(&pending_timer_actions); + } - trace.push(format!( - "{:?} {} => {}@{} -> {}@{} ~= {}", - clock.elapsed(), - msg.source(), - msg.type_(), - msg.round(), - process, - round, - upon_rule - )); - }) + trace.push(format!( + "{:?} - {}@{} change to {} ~= {}", + clock.elapsed(), + process, + round, + new_round, + upon_rule + )); + }) + }, + unjust: { + let trace = trace.clone(); + Box::new(move |_, process, msg| { + trace.push(format!("Unjust: process={} msg={:?}", process, msg)) + }) + }, + upon_rule: { + let clock = clock.clone(); + let trace = trace.clone(); + let pending_compares = pending_compares.clone(); + Box::new(move |_, process, round, msg, upon_rule| { + if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + pending_compares.fetch_add(1, Ordering::SeqCst); + } + + trace.push(format!( + "{:?} {} => {}@{} -> {}@{} ~= {}", + clock.elapsed(), + msg.source(), + msg.type_(), + msg.round(), + process, + round, + upon_rule + )); + }) + }, }, }); @@ -2141,35 +2276,33 @@ fn test_qbft_chain_split(test: ChainSplitTest) { let clock = clock.clone(); let transport = Transport { - broadcast: Box::new( - move |_, type_, instance, source, round, value, pr, pv, justification| { - if round > MAX_ROUND { - return Err(QbftError::MaxRoundReached); - } + broadcast: Box::new(move |req| { + if req.round > MAX_ROUND { + return Err(QbftError::MaxRoundReached); + } - trace.push(format!( - "{:?} {} => {}@{}", - clock.elapsed(), - source, - type_, - round - )); - let msg = new_msg( - type_, - *instance, - source, - round, - *value, - *value, - pr, - *pv, - justification, - ); - sender.send(msg.clone()).expect(WRITE_CHAN_ERR); - broadcast_tx.send(msg).expect(WRITE_CHAN_ERR); - Ok(()) - }, - ), + trace.push(format!( + "{:?} {} => {}@{}", + clock.elapsed(), + req.source, + req.type_, + req.round + )); + let msg = new_msg( + req.type_, + *req.instance, + req.source, + req.round, + *req.value, + *req.value, + req.prepared_round, + *req.prepared_value, + req.justification, + ); + sender.send(msg.clone()).expect(WRITE_CHAN_ERR); + broadcast_tx.send(msg).expect(WRITE_CHAN_ERR); + Ok(()) + }), receive: receiver, }; diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index a22ae4b3..3007cc14 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -44,16 +44,8 @@ pub trait QbftTypes: 'static { type Compare: Clone + Send + Sync + Default + 'static; } -type CompareFn = dyn Fn( - /* ct */ &CancellationToken, - /* qcommit */ &Msg, - /* input_value_source_ch */ &mpmc::Receiver<::Compare>, - /* input_value_source */ &::Compare, - /* return_err */ &mpmc::Sender>, - /* return_value */ &mpmc::Sender<::Compare>, - ) + Send - + Sync - + 'static; +type BroadcastFn = dyn for<'a> Fn(BroadcastRequest<'a, T>) -> Result<()> + Send + Sync; +type CompareFn = dyn for<'a> Fn(CompareRequest<'a, T>) + Send + Sync + 'static; #[derive(Debug, thiserror::Error)] pub enum QbftError { @@ -91,25 +83,61 @@ pub struct Transport { /// processes in the system (including this process). /// /// Note that an error exits the algorithm. - pub broadcast: Box< + pub broadcast: Box>, + + /// Receive returns a stream of messages received + /// from other processes in the system (including this process). + pub receive: mpmc::Receiver>, +} + +/// Input passed to `Transport::broadcast`. +pub struct BroadcastRequest<'a, T: QbftTypes> { + pub ct: &'a CancellationToken, + pub type_: MessageType, + pub instance: &'a T::Instance, + pub source: i64, + pub round: i64, + pub value: &'a T::Value, + pub prepared_round: i64, + pub prepared_value: &'a T::Value, + pub justification: Option<&'a Vec>>, +} + +/// Input passed to `Definition::compare`. +pub struct CompareRequest<'a, T: QbftTypes> { + pub ct: &'a CancellationToken, + pub qcommit: &'a Msg, + pub input_value_source_ch: &'a mpmc::Receiver, + pub input_value_source: &'a T::Compare, + pub return_err: &'a mpmc::Sender>, + pub return_value: &'a mpmc::Sender, +} + +/// Debug hooks for QBFT state transitions and rejected messages. +pub struct QbftLogger { + pub upon_rule: Box< dyn Fn( - /* ct */ &CancellationToken, - /* type_ */ MessageType, /* instance */ &T::Instance, - /* source */ i64, + /* process */ i64, /* round */ i64, - /* value */ &T::Value, - /* pr */ i64, - /* pv */ &T::Value, - /* justification */ Option<&Vec>>, - ) -> Result<()> - + Send + /* msg */ &Msg, + /* upon_rule */ UponRule, + ) + Send + Sync, >, - - /// Receive returns a stream of messages received - /// from other processes in the system (including this process). - pub receive: mpmc::Receiver>, + pub round_change: Box< + dyn Fn( + /* instance */ &T::Instance, + /* process */ i64, + /* round */ i64, + /* new_round */ i64, + /* upon_rule */ UponRule, + /* msgs */ &Vec>, + ) + Send + + Sync, + >, + pub unjust: + Box) + Send + Sync>, } /// Defines the consensus system parameters that are external to the qbft @@ -150,34 +178,8 @@ pub struct Definition { + Sync, >, - /// Allows debug logging of triggered upon rules on message receipt. - /// It includes the rule that triggered it and all received round messages. - pub log_upon_rule: Box< - dyn Fn( - /* instance */ &T::Instance, - /* process */ i64, - /* round */ i64, - /* msg */ &Msg, - /* upon_rule */ UponRule, - ) + Send - + Sync, - >, - /// Allows debug logging of round changes. - pub log_round_change: Box< - dyn Fn( - /* instance */ &T::Instance, - /* process */ i64, - /* round */ i64, - /* new_round */ i64, - /* upon_rule */ UponRule, - /* msgs */ &Vec>, - ) + Send - + Sync, - >, - - /// Allows debug logging of unjust messages. - pub log_unjust: - Box) + Send + Sync>, + /// Debug logging callbacks. + pub logger: QbftLogger, /// Total number of nodes/processes participating in consensus. pub nodes: i64, @@ -346,31 +348,33 @@ pub fn run( // Broadcasts a non-ROUND-CHANGE message for current round. let broadcast_msg = |type_: MessageType, value: &T::Value, justification: Option<&Vec>>| { - (t.broadcast)( + let default_value = T::Value::default(); + (t.broadcast)(BroadcastRequest { ct, type_, instance, - process, - round.get(), + source: process, + round: round.get(), value, - 0, - &Default::default(), + prepared_round: 0, + prepared_value: &default_value, justification, - ) + }) }; // Broadcasts a ROUND-CHANGE message with current state. let broadcast_round_change = || { - (t.broadcast)( + let default_value = T::Value::default(); + (t.broadcast)(BroadcastRequest { ct, - MSG_ROUND_CHANGE, + type_: MSG_ROUND_CHANGE, instance, - process, - round.get(), - &Default::default(), - prepared_round.get(), - &prepared_value.borrow(), - prepared_justification.borrow().as_ref(), - ) + source: process, + round: round.get(), + value: &default_value, + prepared_round: prepared_round.get(), + prepared_value: &prepared_value.borrow(), + justification: prepared_justification.borrow().as_ref(), + }) }; // Broadcasts a PRE-PREPARE message with current state @@ -414,7 +418,7 @@ pub fn run( return; } - (d.log_round_change)( + (d.logger.round_change)( instance, process, round.get(), @@ -477,7 +481,7 @@ pub fn run( // Drop unjust messages if !is_justified(d, instance, &msg, compare_failure_round) { - (d.log_unjust)(instance, process, msg); + (d.logger.unjust)(instance, process, msg); continue; } @@ -490,7 +494,7 @@ pub fn run( continue; } - (d.log_upon_rule)(instance, process, round.get(), &msg, rule); + (d.logger.upon_rule)(instance, process, round.get(), &msg, rule); match rule { // Algorithm 2:1 @@ -668,14 +672,14 @@ fn compare( // caller-provided compare callback ignores cancellation and never reports, // it may outlive this call. thread::spawn(move || { - (compare)( - &compare_ct, - &msg, - &input_value_source_ch, - &input_value_source, - &compare_err_tx, - &compare_value_tx, - ); + (compare)(CompareRequest { + ct: &compare_ct, + qcommit: &msg, + input_value_source_ch: &input_value_source_ch, + input_value_source: &input_value_source, + return_err: &compare_err_tx, + return_value: &compare_value_tx, + }); }); loop { From 1038890a938bc952e459d9c9bb87cd217f3d7f87 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Wed, 20 May 2026 23:48:14 +0700 Subject: [PATCH 31/34] refactor: add ClientRecord --- crates/core/src/qbft/fake_clock.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index 6bc32495..cfa15c76 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -28,7 +28,13 @@ struct FakeClockInner { now: Instant, last_id: usize, cancelled: bool, - clients: BTreeMap, Instant, TimerPriority)>, + clients: BTreeMap, +} + +struct ClientRecord { + sender: mpmc::Sender, + deadline: Instant, + priority: TimerPriority, } impl FakeClock { @@ -80,7 +86,14 @@ impl FakeClock { let deadline = inner.now + duration; inner.last_id += 1; - inner.clients.insert(id, (tx, deadline, priority)); + inner.clients.insert( + id, + ClientRecord { + sender: tx, + deadline, + priority, + }, + ); id }; @@ -124,9 +137,14 @@ impl FakeClock { inner.now += duration; let now = inner.now; - for (&id, (ch, deadline, priority)) in &inner.clients { - if *deadline <= now { - expired.push((id, *deadline, *priority, ch.clone())); + for (&id, record) in &inner.clients { + if record.deadline <= now { + expired.push(( + id, + record.deadline, + record.priority, + record.sender.clone(), + )); } } From 8500d010d3c1e986e9f8abbfcd651fc538501433 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Thu, 21 May 2026 10:50:50 +0700 Subject: [PATCH 32/34] refactor: simplify internal_tests --- crates/core/src/qbft/fake_clock.rs | 7 +- crates/core/src/qbft/internal_test.rs | 179 +++++++++++++------------- crates/core/src/qbft/mod.rs | 7 +- 3 files changed, 98 insertions(+), 95 deletions(-) diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index cfa15c76..bbeeefd2 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -139,12 +139,7 @@ impl FakeClock { for (&id, record) in &inner.clients { if record.deadline <= now { - expired.push(( - id, - record.deadline, - record.priority, - record.sender.clone(), - )); + expired.push((id, record.deadline, record.priority, record.sender.clone())); } } diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 66745340..8b1edf70 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -6,7 +6,7 @@ use crate::qbft::{ use cancellation::CancellationTokenSource; use crossbeam::channel as mpmc; use std::{ - collections::{BTreeMap, HashMap}, + collections::{BTreeMap, HashMap, VecDeque}, fmt::Write as _, panic::{self, AssertUnwindSafe}, sync::{ @@ -31,6 +31,7 @@ const TEST_STREAM_MSG_ROUND: u64 = 11; const TEST_STREAM_MSG_VALUE: u64 = 12; const TEST_STREAM_MSG_PREPARED_ROUND: u64 = 13; const TEST_STREAM_MSG_PREPARED_VALUE: u64 = 14; +const TRACE_DUMP_LIMIT: usize = 200; const TEST_WAIT_TIMEOUT: Duration = Duration::from_secs(1); // Wall-clock guard catches lack of harness progress. Fake time still controls // protocol progress, so slow-but-progressing parallel runs should not fail. @@ -42,9 +43,9 @@ type TestMsgRef = Msg; struct TestQbft; impl QbftTypes for TestQbft { + type Compare = i64; type Instance = i64; type Value = i64; - type Compare = i64; } struct PendingCompareGuard { @@ -661,7 +662,7 @@ fn test_qbft(test: Test) { } #[derive(Clone, Default)] -struct Trace(Arc>>); +struct Trace(Arc>>); impl Trace { fn new() -> Self { @@ -669,14 +670,17 @@ impl Trace { } fn push(&self, line: String) { - self.0.lock().unwrap().push(line); + let mut lines = self.0.lock().unwrap(); + if lines.len() == TRACE_DUMP_LIMIT { + lines.pop_front(); + } + lines.push_back(line); } fn dump(&self) -> String { let lines = self.0.lock().unwrap(); - let start = lines.len().saturating_sub(200); let mut out = String::new(); - for line in &lines[start..] { + for line in lines.iter() { let _ = writeln!(out, "{line}"); } out @@ -703,6 +707,10 @@ fn outcome_is_error(outcome: &RunOutcome, expected: fn(&QbftError) -> bool) -> b matches!(outcome, Ok(Err(err)) if expected(err)) } +fn assert_upon_rule(expected: UponRule, actual: UponRule) { + assert!(actual == expected, "want {expected}, got {actual}"); +} + fn test_seed(test: &Test) -> u64 { let mut seed = seed_from_label(TEST_SEED_LABEL); seed ^= test.instance as u64; @@ -840,10 +848,9 @@ fn deliver_ready_broadcasts( .iter() .take_while(|delayed| delayed.deliver_at <= clock.elapsed()) .count(); - let ready = pending.drain(..ready_count).collect::>(); - ready - .into_iter() + pending + .drain(..ready_count) .map(|delayed| fanout_broadcast(receives, drop_prob, seed, trace, clock, delayed.msg)) .sum() } @@ -1554,29 +1561,41 @@ fn idle_run_returns_when_cancelled() { )); } -// Tests definition validation at the `run` boundary. -// Expect invalid node count and FIFO limit to return typed errors. -#[test_case(0, 1, true ; "invalid_nodes")] -#[test_case(4, 0, false ; "invalid_fifo_limit")] -fn invalid_definition_rejected(nodes: i64, fifo_limit: i64, invalid_nodes: bool) { +fn run_with_definition(def: &Definition) -> Result<()> { let cts = CancellationTokenSource::new(); let transport = noop_transport(); let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); + qbft::run(cts.token(), def, &transport, &0, 1, input_rx, source_rx) +} + +// Tests definition validation at the `run` boundary. +// Expect invalid node count to return a typed error. +#[test] +fn invalid_nodes_rejected() { let mut def = noop_definition(); - def.nodes = nodes; - def.fifo_limit = fifo_limit; - - let result = qbft::run(cts.token(), &def, &transport, &0, 1, input_rx, source_rx); - if invalid_nodes { - assert!(matches!(result, Err(QbftError::InvalidNodes { nodes: 0 }))); - } else { - assert!(matches!( - result, - Err(QbftError::InvalidFifoLimit { fifo_limit: 0 }) - )); - } + def.nodes = 0; + def.fifo_limit = 1; + + assert!(matches!( + run_with_definition(&def), + Err(QbftError::InvalidNodes { nodes: 0 }) + )); +} + +// Tests definition validation at the `run` boundary. +// Expect invalid FIFO limit to return a typed error. +#[test] +fn invalid_fifo_limit_rejected() { + let mut def = noop_definition(); + def.nodes = 4; + def.fifo_limit = 0; + + assert!(matches!( + run_with_definition(&def), + Err(QbftError::InvalidFifoLimit { fifo_limit: 0 }) + )); } // Tests cancellation under a continuously hot receive channel. @@ -1646,15 +1665,17 @@ fn classify_rules() { def.is_leader = Box::new(make_is_leader(4)); let preprepare = new_msg(MSG_PRE_PREPARE, 0, 1, 1, 1, 0, 0, 0, None); - assert!(classify(&def, &0, 1, 2, &HashMap::new(), &preprepare).0 == UPON_JUSTIFIED_PRE_PREPARE); + assert_upon_rule( + UPON_JUSTIFIED_PRE_PREPARE, + classify(&def, &0, 1, 2, &HashMap::new(), &preprepare).0, + ); - let prepares = vec![ - new_msg(MSG_PREPARE, 0, 1, 1, 2, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 2, 1, 2, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 3, 1, 2, 0, 0, 0, None), - ]; + let prepares = new_prepare_quorum(1, 2); let buffer = buffer_by_source(&prepares); - assert!(classify(&def, &0, 1, 2, &buffer, &prepares[2]).0 == UPON_QUORUM_PREPARES); + assert_upon_rule( + UPON_QUORUM_PREPARES, + classify(&def, &0, 1, 2, &buffer, &prepares[2]).0, + ); let commits = vec![ new_msg(MSG_COMMIT, 0, 1, 1, 2, 0, 0, 0, None), @@ -1662,7 +1683,10 @@ fn classify_rules() { new_msg(MSG_COMMIT, 0, 3, 1, 2, 0, 0, 0, None), ]; let buffer = buffer_by_source(&commits); - assert!(classify(&def, &0, 1, 2, &buffer, &commits[2]).0 == UPON_QUORUM_COMMITS); + assert_upon_rule( + UPON_QUORUM_COMMITS, + classify(&def, &0, 1, 2, &buffer, &commits[2]).0, + ); let future_round_changes = vec![ new_msg(MSG_ROUND_CHANGE, 0, 1, 3, 0, 0, 0, 0, None), @@ -1673,15 +1697,11 @@ fn classify_rules() { classify(&def, &0, 1, 2, &buffer, &future_round_changes[1]).0 == UPON_F_PLUS1_ROUND_CHANGES ); - let unjust_round_changes = vec![ - new_msg(MSG_ROUND_CHANGE, 0, 1, 1, 0, 0, 2, 9, None), - new_msg(MSG_ROUND_CHANGE, 0, 2, 1, 0, 0, 2, 9, None), - new_msg(MSG_ROUND_CHANGE, 0, 3, 1, 0, 0, 2, 9, None), - ]; + let unjust_round_changes = new_round_change_quorum(1, 2, 9); let buffer = buffer_by_source(&unjust_round_changes); - assert!( - classify(&def, &0, 1, 2, &buffer, &unjust_round_changes[2]).0 - == UPON_UNJUST_QUORUM_ROUND_CHANGES + assert_upon_rule( + UPON_UNJUST_QUORUM_ROUND_CHANGES, + classify(&def, &0, 1, 2, &buffer, &unjust_round_changes[2]).0, ); } @@ -1692,33 +1712,21 @@ fn classify_rules() { fn justified_qrc_j1_and_j2() { let mut def = noop_definition(); def.nodes = 4; - let j1 = vec![ - new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 0, 0, None), - new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 0, 0, None), - new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None), - ]; + let j1 = new_round_change_quorum(2, 0, 0); assert_eq!(Some(0), contains_justified_qrc(&def, &j1, 2)); assert_eq!(3, get_justified_qrc(&def, &j1, 2).unwrap().len()); - let j2 = vec![ + let mut j2 = vec![ new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 1, 7, None), new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 1, 7, None), new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 1, 1, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 2, 1, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 3, 1, 7, 0, 0, 0, None), ]; + j2.extend(new_prepare_quorum(1, 7)); assert_eq!(Some(7), contains_justified_qrc(&def, &j2, 2)); assert!(get_justified_qrc(&def, &j2, 2).unwrap().len() >= 6); - let invalid_pr = vec![ - new_msg(MSG_ROUND_CHANGE, 0, 1, 2, 0, 0, 2, 7, None), - new_msg(MSG_ROUND_CHANGE, 0, 2, 2, 0, 0, 2, 7, None), - new_msg(MSG_ROUND_CHANGE, 0, 3, 2, 0, 0, 2, 7, None), - new_msg(MSG_PREPARE, 0, 1, 2, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 2, 2, 7, 0, 0, 0, None), - new_msg(MSG_PREPARE, 0, 3, 2, 7, 0, 0, 0, None), - ]; + let mut invalid_pr = new_round_change_quorum(2, 2, 7); + invalid_pr.extend(new_prepare_quorum(2, 7)); assert_eq!(None, contains_justified_qrc(&def, &invalid_pr, 2)); assert!(get_justified_qrc(&def, &invalid_pr, 2).is_none()); } @@ -2344,6 +2352,28 @@ fn test_qbft_chain_split(test: ChainSplitTest) { let mut decided = false; let mut done = 0; let mut last_progress = time::Instant::now(); + let chain_split_seed = seed_from_label(CHAIN_SPLIT_SEED_LABEL); + // The no-consensus halt case must reach round 11; using Go's 1ms tick + // here makes this Rust harness exceed its real-time guard, so only that + // halt path fast-forwards fake time. + let tick = if test.should_halt { + Duration::from_millis(100) + } else { + Duration::from_millis(1) + }; + let timeout_limit = if test.should_halt { + let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32"); + let seconds = 1_u64 + .checked_shl( + max_round + .checked_add(1) + .expect("MAX_ROUND permits timeout limit"), + ) + .expect("MAX_ROUND permits timeout limit"); + Duration::from_secs(seconds) + } else { + Duration::from_secs(60) + }; loop { mpmc::select! { @@ -2355,13 +2385,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { continue; } out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); - if deterministic_unit( - seed_from_label(CHAIN_SPLIT_SEED_LABEL), - &msg, - *target, - TEST_STREAM_DUPLICATE, - ) < 0.1 - { + if deterministic_unit(chain_split_seed, &msg, *target, TEST_STREAM_DUPLICATE) < 0.1 { out_tx.send(msg.clone()).expect(WRITE_CHAN_ERR); } } @@ -2380,7 +2404,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { ); } - for commit in q_commit.clone() { + for commit in &q_commit { for previous in results.values() { if previous.value() != commit.value() { cts.cancel(); @@ -2420,7 +2444,7 @@ fn test_qbft_chain_split(test: ChainSplitTest) { ); } } - results.insert(commit.source(), commit); + results.insert(commit.source(), commit.clone()); } count += 1; if count == N { @@ -2478,26 +2502,9 @@ fn test_qbft_chain_split(test: ChainSplitTest) { // Matches the Go harness throttle; ordering correctness comes // from the pending-work barriers, not this duration. thread::sleep(Duration::from_micros(1)); - // The no-consensus halt case must reach round 11; using Go's - // 1ms tick here makes this Rust harness exceed its real-time - // guard, so only that halt path fast-forwards fake time. - let tick = if test.should_halt { - Duration::from_millis(100) - } else { - Duration::from_millis(1) - }; clock.advance_and_wait(tick, &pending_timer_actions); last_progress = time::Instant::now(); - let limit = if test.should_halt { - let max_round = u32::try_from(MAX_ROUND).expect("MAX_ROUND fits u32"); - let seconds = 1_u64 - .checked_shl(max_round.checked_add(1).expect("MAX_ROUND permits timeout limit")) - .expect("MAX_ROUND permits timeout limit"); - Duration::from_secs(seconds) - } else { - Duration::from_secs(60) - }; - if clock.elapsed() > limit { + if clock.elapsed() > timeout_limit { cts.cancel(); clock.cancel(); panic!("chain split hang: decided={decided} done={done} count={count} elapsed={:?}\n{}", clock.elapsed(), trace.dump()); diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 3007cc14..c6e6c768 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -314,9 +314,10 @@ struct DedupKey { /// remains active so it can answer later `ROUND_CHANGE` messages with `DECIDED` /// catch-up messages. /// -/// `T::Instance` identifies the consensus instance, `T::Value` is the comparable proposed value, -/// and `T::Compare` is the application value used by `Definition::compare` to compare a -/// leader proposal with the local input source. +/// `T::Instance` identifies the consensus instance, `T::Value` is the +/// comparable proposed value, and `T::Compare` is the application value used by +/// `Definition::compare` to compare a leader proposal with the local input +/// source. pub fn run( ct: &CancellationToken, d: &Definition, From 42a49131a57f0c1eef369878c1bf11a10d97508e Mon Sep 17 00:00:00 2001 From: Quang Le Date: Thu, 21 May 2026 10:59:49 +0700 Subject: [PATCH 33/34] refactor: simplify qbft mod.rs --- crates/core/src/qbft/mod.rs | 41 ++++++++++++------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index c6e6c768..4e9de637 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -340,7 +340,7 @@ pub fn run( let prepared_justification: RefCell>>> = RefCell::new(None); let mut q_commit: Option>> = None; let buffer: RefCell>>> = RefCell::new(HashMap::new()); - let dedup_rules: RefCell> = RefCell::new(HashMap::new()); + let dedup_rules: RefCell> = RefCell::new(HashSet::new()); let mut timer_chan: mpmc::Receiver; let mut stop_timer: Box; @@ -410,7 +410,7 @@ pub fn run( // change. let is_duplicated_rule = |upon_rule: UponRule, round: i64| { let k = DedupKey { upon_rule, round }; - dedup_rules.borrow_mut().insert(k, true).is_some() + !dedup_rules.borrow_mut().insert(k) }; // Updates round and clears the rule dedup state. @@ -429,7 +429,7 @@ pub fn run( ); round.set(new_round); - dedup_rules.replace(HashMap::new()); + dedup_rules.replace(HashSet::new()); ppj_cache.replace(None); }; @@ -920,7 +920,7 @@ fn is_justified_round_change(d: &Definition, msg: &Msg) -> b return false; } - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source(); for prepare in prepares { if !uniq(&prepare) { return false; @@ -1062,7 +1062,7 @@ fn get_single_justified_pr_pv( let mut pr: i64 = 0; let mut pv: T::Value = Default::default(); let mut count: i64 = 0; - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source(); for msg in msgs { if msg.type_() != MSG_PREPARE { @@ -1113,7 +1113,7 @@ fn get_justified_qrc( let mut has_highest_prepared = false; let pr = prepares[0].round(); let pv = prepares[0].value(); - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source(); for rc in round_changes.iter() { if rc.prepared_round() > pr { @@ -1218,12 +1218,7 @@ fn get_prepare_quorums(d: &Definition, all: &Vec>) -> Ve continue; } - let mut quorum = vec![]; - for (_, msg) in msgs { - quorum.push(msg); - } - - quorums.push(quorum); + quorums.push(msgs.into_values().collect()); } quorums @@ -1241,11 +1236,9 @@ fn quorum_null_prepared( let null_pv = Some(&Default::default()); let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv); + let has_quorum = justification.len() as i64 >= d.quorum(); - ( - justification.clone(), - justification.len() as i64 >= d.quorum(), - ) + (justification, has_quorum) } /// Returns the messages matching the type and value. @@ -1274,7 +1267,7 @@ fn filter_msgs( pv: Option<&T::Value>, ) -> Vec> { let mut resp = Vec::new(); - let mut uniq = uniq_source::(vec![]); + let mut uniq = uniq_source(); for msg in msgs { if message_type != msg.type_() { @@ -1333,17 +1326,9 @@ fn flatten(buffer: &HashMap>>) -> Vec> { /// Construct a function that returns true if the message is from a unique /// source. -fn uniq_source(vec: Vec>) -> Box) -> bool> { - let mut s = vec.iter().map(|msg| msg.source()).collect::>(); - Box::new(move |msg: &Msg| { - let source = msg.source(); - if s.contains(&source) { - false - } else { - s.insert(source); - true - } - }) +fn uniq_source() -> impl FnMut(&Msg) -> bool { + let mut sources = HashSet::new(); + move |msg: &Msg| sources.insert(msg.source()) } #[cfg(test)] From 70e3ce9a5b547770aba45d642b255019e89b445a Mon Sep 17 00:00:00 2001 From: Quang Le Date: Thu, 21 May 2026 13:29:40 +0700 Subject: [PATCH 34/34] refactor: remove skip clippy on qbft mod --- crates/core/src/qbft/callbacks.rs | 121 ++++++++++ crates/core/src/qbft/fake_clock.rs | 2 + crates/core/src/qbft/internal_test.rs | 152 ++++++++----- crates/core/src/qbft/mod.rs | 316 +++++++++++++++----------- 4 files changed, 396 insertions(+), 195 deletions(-) create mode 100644 crates/core/src/qbft/callbacks.rs diff --git a/crates/core/src/qbft/callbacks.rs b/crates/core/src/qbft/callbacks.rs new file mode 100644 index 00000000..5cba3815 --- /dev/null +++ b/crates/core/src/qbft/callbacks.rs @@ -0,0 +1,121 @@ +use super::{MessageType, Msg, QbftTypes, Result, UponRule}; +use cancellation::CancellationToken; +use crossbeam::channel as mpmc; +use std::time; + +pub(super) type BroadcastFn = + dyn for<'a> Fn(BroadcastRequest<'a, T>) -> Result<()> + Send + Sync; +pub(super) type CompareFn = dyn for<'a> Fn(CompareRequest<'a, T>) + Send + Sync + 'static; +pub(super) type UponRuleLoggerFn = dyn for<'a> Fn(UponRuleLog<'a, T>) + Send + Sync; +pub(super) type RoundChangeLoggerFn = dyn for<'a> Fn(RoundChangeLog<'a, T>) + Send + Sync; +pub(super) type UnjustLoggerFn = dyn for<'a> Fn(UnjustLog<'a, T>) + Send + Sync; +pub(super) type LeaderFn = dyn for<'a> Fn(LeaderRequest<'a, T>) -> bool + Send + Sync; +pub(super) type DecideFn = dyn for<'a> Fn(DecideRequest<'a, T>) + Send + Sync; + +/// Input passed to `Transport::broadcast`. +pub struct BroadcastRequest<'a, T: QbftTypes> { + /// Parent cancellation token. + pub ct: &'a CancellationToken, + /// Message type to broadcast. + pub type_: MessageType, + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Sending process. + pub source: i64, + /// Message round. + pub round: i64, + /// Proposal value. + pub value: &'a T::Value, + /// Prepared round carried by ROUND-CHANGE messages. + pub prepared_round: i64, + /// Prepared value carried by ROUND-CHANGE messages. + pub prepared_value: &'a T::Value, + /// Optional justification piggybacked on the message. + pub justification: Option<&'a Vec>>, +} + +/// Input passed to `Definition::compare`. +pub struct CompareRequest<'a, T: QbftTypes> { + /// Compare-scoped cancellation token. + pub ct: &'a CancellationToken, + /// Proposed commit quorum message. + pub qcommit: &'a Msg, + /// Channel carrying the local compare value if it was not cached yet. + pub input_value_source_ch: &'a mpmc::Receiver, + /// Cached local compare value. + pub input_value_source: &'a T::Compare, + /// Channel used by the callback to return compare status. + pub return_err: &'a mpmc::Sender>, + /// Channel used by the callback to cache the local compare value. + pub return_value: &'a mpmc::Sender, +} + +/// Timer returned by `Definition::new_timer`. +pub struct Timer { + /// Channel that fires when the timer expires. + pub receive: mpmc::Receiver, + /// Stops the timer. + pub stop: Box, +} + +/// Input passed to `Definition::is_leader`. +pub struct LeaderRequest<'a, T: QbftTypes> { + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Round being evaluated. + pub round: i64, + /// Process being evaluated. + pub process: i64, +} + +/// Input passed to `Definition::decide`. +pub struct DecideRequest<'a, T: QbftTypes> { + /// Parent cancellation token. + pub ct: &'a CancellationToken, + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Decided value. + pub value: &'a T::Value, + /// Commit quorum justifying the decision. + pub qcommit: &'a Vec>, +} + +/// Input passed to `QbftLogger::upon_rule`. +pub struct UponRuleLog<'a, T: QbftTypes> { + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Local process. + pub process: i64, + /// Current local round. + pub round: i64, + /// Message that triggered classification. + pub msg: &'a Msg, + /// Rule that fired. + pub upon_rule: UponRule, +} + +/// Input passed to `QbftLogger::round_change`. +pub struct RoundChangeLog<'a, T: QbftTypes> { + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Local process. + pub process: i64, + /// Previous local round. + pub round: i64, + /// New local round. + pub new_round: i64, + /// Rule that caused the round change. + pub upon_rule: UponRule, + /// Messages from the previous round. + pub msgs: &'a Vec>, +} + +/// Input passed to `QbftLogger::unjust`. +pub struct UnjustLog<'a, T: QbftTypes> { + /// Consensus instance identifier. + pub instance: &'a T::Instance, + /// Local process. + pub process: i64, + /// Rejected message. + pub msg: Msg, +} diff --git a/crates/core/src/qbft/fake_clock.rs b/crates/core/src/qbft/fake_clock.rs index bbeeefd2..c1891fb5 100644 --- a/crates/core/src/qbft/fake_clock.rs +++ b/crates/core/src/qbft/fake_clock.rs @@ -1,3 +1,5 @@ +#![allow(clippy::arithmetic_side_effects)] + use crossbeam::channel as mpmc; use std::{ collections::BTreeMap, diff --git a/crates/core/src/qbft/internal_test.rs b/crates/core/src/qbft/internal_test.rs index 8b1edf70..a3113927 100644 --- a/crates/core/src/qbft/internal_test.rs +++ b/crates/core/src/qbft/internal_test.rs @@ -1,3 +1,12 @@ +#![allow( + clippy::arithmetic_side_effects, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::collapsible_if +)] + use crate::qbft::{ self, fake_clock::{FakeClock, TimerPriority}, @@ -150,13 +159,16 @@ fn test_qbft(test: Test) { Duration::from_secs(u64::pow(2, (round as u32) - 1)) }; - clock.new_timer(d) + let (receive, stop) = clock.new_timer(d); + Timer { receive, stop } }) }, decide: { let result_chan_tx = result_chan_tx.clone(); - Box::new(move |_, _, _, q_commit| { - result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); + Box::new(move |req| { + result_chan_tx + .send(req.qcommit.clone()) + .expect(WRITE_CHAN_ERR); }) }, compare: { @@ -176,18 +188,18 @@ fn test_qbft(test: Test) { let trace = trace.clone(); let pending_timer_actions = pending_timer_actions.clone(); - Box::new(move |_, process, round, new_round, upon_rule, _| { - if upon_rule == UPON_ROUND_TIMEOUT { + Box::new(move |req| { + if req.upon_rule == UPON_ROUND_TIMEOUT { complete_timer_action(&pending_timer_actions); } trace.push(format!( "{:?} - {}@{} change to {} ~= {}", clock.elapsed(), - process, - round, - new_round, - upon_rule, + req.process, + req.round, + req.new_round, + req.upon_rule, )); }) }, @@ -195,8 +207,8 @@ fn test_qbft(test: Test) { let trace = trace.clone(); let unjust_tx = unjust_tx.clone(); let fuzz = test.fuzz; - Box::new(move |_, process, msg| { - let line = format!("Unjust: process={} msg={:?}", process, msg); + Box::new(move |req| { + let line = format!("Unjust: process={} msg={:?}", req.process, req.msg); trace.push(line.clone()); if !fuzz { unjust_tx.send(line).expect(WRITE_CHAN_ERR); @@ -207,20 +219,20 @@ fn test_qbft(test: Test) { let clock = clock.clone(); let trace = trace.clone(); let pending_compares = pending_compares.clone(); - Box::new(move |_, process, round, msg, upon_rule| { - if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + Box::new(move |req| { + if req.upon_rule == UPON_JUSTIFIED_PRE_PREPARE { pending_compares.fetch_add(1, Ordering::SeqCst); } trace.push(format!( "{:?} {} => {}@{} -> {}@{} ~= {}", clock.elapsed(), - msg.source(), - msg.type_(), - msg.round(), - process, - round, - upon_rule, + req.msg.source(), + req.msg.type_(), + req.msg.round(), + req.process, + req.round, + req.upon_rule, )); }) }, @@ -362,7 +374,11 @@ fn test_qbft(test: Test) { s.spawn(move || { _ = v_chan_tx_send.send(i); }); - } else if is_leader(&test.instance, 1, i) { + } else if is_leader(LeaderRequest { + instance: &test.instance, + round: 1, + process: i, + }) { let v_chan_tx_send = keep_value_sender .as_ref() .expect("value sender kept until run returns") @@ -545,7 +561,11 @@ fn test_qbft(test: Test) { ); } } else { // Otherwise check that leader value was used. - if !is_leader(&test.instance, commit.round(), commit.value()) { + if !is_leader(LeaderRequest { + instance: &test.instance, + round: commit.round(), + process: commit.value(), + }) { cts.cancel(); clock.cancel(); panic!( @@ -733,10 +753,8 @@ fn seed_from_label(label: &str) -> u64 { } /// Construct a leader election function. -fn make_is_leader(n: i64) -> impl Fn(&i64, i64, i64) -> bool + Clone { - move |instance: &i64, round: i64, process: i64| -> bool { - (instance + round).rem_euclid(n) == process - } +fn make_is_leader(n: i64) -> impl for<'a> Fn(LeaderRequest<'a, TestQbft>) -> bool + Clone { + move |req| (*req.instance + req.round).rem_euclid(n) == req.process } /// Returns a new message to be broadcast. @@ -1218,16 +1236,19 @@ fn fuzzed(start_delay_secs: Option, decide_round: i32, random_round: bool) fn noop_definition() -> Definition { Definition { - is_leader: Box::new(|_, _, _| false), - new_timer: Box::new(|_| (mpmc::never(), Box::new(|| {}))), - decide: Box::new(|_, _, _, _| {}), + is_leader: Box::new(|_| false), + new_timer: Box::new(|_| Timer { + receive: mpmc::never(), + stop: Box::new(|| {}), + }), + decide: Box::new(|_| {}), compare: Arc::new(|_| {}), nodes: 0, fifo_limit: 0, logger: QbftLogger { - round_change: Box::new(|_, _, _, _, _, _| {}), - unjust: Box::new(|_, _, _| {}), - upon_rule: Box::new(|_, _, _, _, _| {}), + round_change: Box::new(|_| {}), + unjust: Box::new(|_| {}), + upon_rule: Box::new(|_| {}), }, } } @@ -1292,7 +1313,7 @@ fn broadcast_request_maps_protocol_fields() { let mut def = noop_definition(); def.nodes = 4; def.fifo_limit = 100; - def.is_leader = Box::new(|_, _, process| process == 1); + def.is_leader = Box::new(|req| req.process == 1); def.compare = Arc::new(|req| req.return_err.send(Ok(())).expect(WRITE_CHAN_ERR)); let (_input_tx, input_rx) = mpmc::bounded::(1); @@ -1357,7 +1378,10 @@ fn broadcast_request_maps_protocol_fields() { let mut def = noop_definition(); def.nodes = 4; def.fifo_limit = 100; - def.new_timer = Box::new(move |_| (timer_rx.clone(), Box::new(|| {}))); + def.new_timer = Box::new(move |_| Timer { + receive: timer_rx.clone(), + stop: Box::new(|| {}), + }); let (_input_tx, input_rx) = mpmc::bounded::(1); let (_source_tx, source_rx) = mpmc::bounded::(1); @@ -1473,22 +1497,26 @@ fn duplicate_pre_prepare_rules() { let mut def = noop_definition(); def.nodes = 4; def.fifo_limit = 100; - def.is_leader = Box::new(|_, _, process| process == LEADER); - def.logger.upon_rule = Box::new(move |_, _, round, msg, upon_rule| { - println!("UponRule: rule={} round={} ", upon_rule, msg.round()); + def.is_leader = Box::new(|req| req.process == LEADER); + def.logger.upon_rule = Box::new(move |req| { + println!( + "UponRule: rule={} round={} ", + req.upon_rule, + req.msg.round() + ); - assert!(upon_rule == UPON_JUSTIFIED_PRE_PREPARE); + assert!(req.upon_rule == UPON_JUSTIFIED_PRE_PREPARE); - if msg.round() == 1 { + if req.msg.round() == 1 { return; } - if msg.round() == 2 { + if req.msg.round() == 2 { cts.cancel(); return; } - panic!("unexpected round {}", round); + panic!("unexpected round {}", req.round); }); def.compare = Arc::new(|req| { let result = if req.qcommit.round() == 1 { @@ -2070,7 +2098,7 @@ fn run_parent_cancel_during_compare_does_not_prepare() { let mut def = noop_definition(); def.nodes = 4; def.fifo_limit = 100; - def.is_leader = Box::new(|_, _, process| process == LEADER); + def.is_leader = Box::new(|req| req.process == LEADER); def.compare = Arc::new(move |req| { compare_started_tx.send(()).expect(WRITE_CHAN_ERR); while !req.ct.is_canceled() { @@ -2189,13 +2217,17 @@ fn test_qbft_chain_split(test: ChainSplitTest) { new_timer: { let clock = clock.clone(); Box::new(move |round| { - clock.new_timer(Duration::from_secs(u64::pow(2, (round as u32) - 1))) + let (receive, stop) = + clock.new_timer(Duration::from_secs(u64::pow(2, (round as u32) - 1))); + Timer { receive, stop } }) }, decide: { let result_chan_tx = result_chan_tx.clone(); - Box::new(move |_, _, _, q_commit| { - result_chan_tx.send(q_commit.clone()).expect(WRITE_CHAN_ERR); + Box::new(move |req| { + result_chan_tx + .send(req.qcommit.clone()) + .expect(WRITE_CHAN_ERR); }) }, compare: { @@ -2230,45 +2262,45 @@ fn test_qbft_chain_split(test: ChainSplitTest) { let clock = clock.clone(); let trace = trace.clone(); let pending_timer_actions = pending_timer_actions.clone(); - Box::new(move |_, process, round, new_round, upon_rule, _| { - if upon_rule == UPON_ROUND_TIMEOUT { + Box::new(move |req| { + if req.upon_rule == UPON_ROUND_TIMEOUT { complete_timer_action(&pending_timer_actions); } trace.push(format!( "{:?} - {}@{} change to {} ~= {}", clock.elapsed(), - process, - round, - new_round, - upon_rule + req.process, + req.round, + req.new_round, + req.upon_rule )); }) }, unjust: { let trace = trace.clone(); - Box::new(move |_, process, msg| { - trace.push(format!("Unjust: process={} msg={:?}", process, msg)) + Box::new(move |req| { + trace.push(format!("Unjust: process={} msg={:?}", req.process, req.msg)) }) }, upon_rule: { let clock = clock.clone(); let trace = trace.clone(); let pending_compares = pending_compares.clone(); - Box::new(move |_, process, round, msg, upon_rule| { - if upon_rule == UPON_JUSTIFIED_PRE_PREPARE { + Box::new(move |req| { + if req.upon_rule == UPON_JUSTIFIED_PRE_PREPARE { pending_compares.fetch_add(1, Ordering::SeqCst); } trace.push(format!( "{:?} {} => {}@{} -> {}@{} ~= {}", clock.elapsed(), - msg.source(), - msg.type_(), - msg.round(), - process, - round, - upon_rule + req.msg.source(), + req.msg.type_(), + req.msg.round(), + req.process, + req.round, + req.upon_rule )); }) }, diff --git a/crates/core/src/qbft/mod.rs b/crates/core/src/qbft/mod.rs index 4e9de637..554fcfb6 100644 --- a/crates/core/src/qbft/mod.rs +++ b/crates/core/src/qbft/mod.rs @@ -10,16 +10,6 @@ //! - No domain-specific dependencies. //! - Explicit justifications. -// TODO: Remove these checks -#![allow(missing_docs)] -#![allow(clippy::type_complexity)] -#![allow(clippy::collapsible_if)] -#![allow(clippy::cast_sign_loss)] -#![allow(clippy::cast_precision_loss)] -#![allow(clippy::cast_possible_wrap)] -#![allow(clippy::cast_possible_truncation)] -#![allow(clippy::arithmetic_side_effects)] - use cancellation::{CancellationToken, CancellationTokenSource}; use crossbeam::channel as mpmc; use std::{ @@ -31,6 +21,16 @@ use std::{ sync, thread, time, }; +mod callbacks; +use callbacks::{ + BroadcastFn, CompareFn, DecideFn, LeaderFn, RoundChangeLoggerFn, UnjustLoggerFn, + UponRuleLoggerFn, +}; +pub use callbacks::{ + BroadcastRequest, CompareRequest, DecideRequest, LeaderRequest, RoundChangeLog, Timer, + UnjustLog, UponRuleLog, +}; + type Result = std::result::Result; // The `cancellation` crate is callback-based, not channel-based, so it cannot @@ -38,41 +38,58 @@ type Result = std::result::Result; // does not need sub-millisecond latency, and idle instances should stay cheap. const CANCELLATION_POLL_INTERVAL: time::Duration = time::Duration::from_millis(50); +/// Associated types used by a QBFT instance. pub trait QbftTypes: 'static { + /// Consensus instance identifier. type Instance: Send + Sync + 'static; + /// Consensus value. type Value: Eq + Hash + Default + 'static; + /// Application value used by the compare callback. type Compare: Clone + Send + Sync + Default + 'static; } -type BroadcastFn = dyn for<'a> Fn(BroadcastRequest<'a, T>) -> Result<()> + Send + Sync; -type CompareFn = dyn for<'a> Fn(CompareRequest<'a, T>) + Send + Sync + 'static; - +/// Errors returned by the QBFT core. #[derive(Debug, thiserror::Error)] pub enum QbftError { + /// Round timer expired before compare completed. #[error("Timeout")] TimeoutError, + /// Leader proposal failed application-level comparison. #[error("Compare leader value with local value failed")] CompareError, + /// Compare returned an error variant that core does not expect. #[error("bug: expected only comparison or timeout error, got {0}")] UnexpectedCompareError(Box), + /// Parent cancellation token was canceled. #[error("context canceled")] ContextCanceled, + /// Test or caller configured maximum round was reached. #[error("Maximum round reached")] MaxRoundReached, + /// Own input value was the null/default value. #[error("Zero input value not supported")] ZeroInputValue, + /// Node count must be positive. #[error("invalid node count: must be greater than zero, got {nodes}")] - InvalidNodes { nodes: i64 }, + InvalidNodes { + /// Configured node count. + nodes: i64, + }, + /// Per-source FIFO limit must be positive. #[error("invalid FIFO limit: must be greater than zero, got {fifo_limit}")] - InvalidFifoLimit { fifo_limit: i64 }, + InvalidFifoLimit { + /// Configured FIFO limit. + fifo_limit: i64, + }, + /// Receive channel closed unexpectedly. #[error("Failed to read from channel: {0}")] ChannelError(#[from] mpmc::RecvError), } @@ -90,54 +107,14 @@ pub struct Transport { pub receive: mpmc::Receiver>, } -/// Input passed to `Transport::broadcast`. -pub struct BroadcastRequest<'a, T: QbftTypes> { - pub ct: &'a CancellationToken, - pub type_: MessageType, - pub instance: &'a T::Instance, - pub source: i64, - pub round: i64, - pub value: &'a T::Value, - pub prepared_round: i64, - pub prepared_value: &'a T::Value, - pub justification: Option<&'a Vec>>, -} - -/// Input passed to `Definition::compare`. -pub struct CompareRequest<'a, T: QbftTypes> { - pub ct: &'a CancellationToken, - pub qcommit: &'a Msg, - pub input_value_source_ch: &'a mpmc::Receiver, - pub input_value_source: &'a T::Compare, - pub return_err: &'a mpmc::Sender>, - pub return_value: &'a mpmc::Sender, -} - /// Debug hooks for QBFT state transitions and rejected messages. pub struct QbftLogger { - pub upon_rule: Box< - dyn Fn( - /* instance */ &T::Instance, - /* process */ i64, - /* round */ i64, - /* msg */ &Msg, - /* upon_rule */ UponRule, - ) + Send - + Sync, - >, - pub round_change: Box< - dyn Fn( - /* instance */ &T::Instance, - /* process */ i64, - /* round */ i64, - /* new_round */ i64, - /* upon_rule */ UponRule, - /* msgs */ &Vec>, - ) + Send - + Sync, - >, - pub unjust: - Box) + Send + Sync>, + /// Called when an upon-rule fires. + pub upon_rule: Box>, + /// Called when the local process changes round. + pub round_change: Box>, + /// Called when an unjustified message is rejected. + pub unjust: Box>, } /// Defines the consensus system parameters that are external to the qbft @@ -145,22 +122,10 @@ pub struct QbftLogger { /// (calls to `run`). pub struct Definition { /// A deterministic leader election function. - pub is_leader: Box< - dyn Fn( - /* instance */ &T::Instance, - /* round */ i64, - /* process */ i64, - ) -> bool - + Send - + Sync, - >, + pub is_leader: Box>, /// Returns a new timer channel and stop function for the round - pub new_timer: Box< - dyn Fn(/* round */ i64) -> (mpmc::Receiver, Box) - + Send - + Sync, - >, + pub new_timer: Box Timer + Send + Sync>, /// Charon parity hook called when the leader proposes a value. The core /// algorithm only runs this callback and reacts to its result; any @@ -168,15 +133,7 @@ pub struct Definition { pub compare: sync::Arc>, /// Called when consensus has been reached on a value. - pub decide: Box< - dyn Fn( - /* ct */ &CancellationToken, - /* instance */ &T::Instance, - /* value */ &T::Value, - /* qcommit */ &Vec>, - ) + Send - + Sync, - >, + pub decide: Box>, /// Debug logging callbacks. pub logger: QbftLogger, @@ -192,13 +149,32 @@ impl Definition { /// Quorum count for the system. /// See IBFT 2.0 paper for correct formula: pub fn quorum(&self) -> i64 { - (self.nodes as u64 * 2).div_ceil(3) as i64 + self.nodes + .checked_mul(2) + .and_then(|nodes| nodes.checked_add(2)) + .and_then(|nodes| nodes.checked_div(3)) + .expect("node count permits quorum calculation") } /// Maximum number of faulty/byzantine nodes supported in the system. /// See IBFT 2.0 paper for correct formula: pub fn faulty(&self) -> i64 { - (self.nodes - 1) / 3 + self.nodes + .checked_sub(1) + .and_then(|nodes| nodes.checked_div(3)) + .expect("node count permits faulty-node calculation") + } + + fn quorum_count(&self) -> usize { + usize::try_from(self.quorum()).expect("quorum fits usize") + } + + fn faulty_plus_one_count(&self) -> usize { + let threshold = self + .faulty() + .checked_add(1) + .expect("faulty-node count permits threshold calculation"); + usize::try_from(threshold).expect("faulty-node threshold fits usize") } } @@ -208,11 +184,17 @@ pub struct MessageType(i64); // NOTE: message type ordering MUST not change, since it breaks backwards // compatibility. +/// Unknown message type. pub const MSG_UNKNOWN: MessageType = MessageType(0); +/// PRE-PREPARE message type. pub const MSG_PRE_PREPARE: MessageType = MessageType(1); +/// PREPARE message type. pub const MSG_PREPARE: MessageType = MessageType(2); +/// COMMIT message type. pub const MSG_COMMIT: MessageType = MessageType(3); +/// ROUND-CHANGE message type. pub const MSG_ROUND_CHANGE: MessageType = MessageType(4); +/// DECIDED catch-up message type. pub const MSG_DECIDED: MessageType = MessageType(5); const MSG_SENTINEL: MessageType = MessageType(6); // intentionally not public @@ -272,14 +254,23 @@ pub type Msg = sync::Arc>; #[derive(PartialEq, Eq, Hash, Clone, Copy)] pub struct UponRule(i64); +/// No upon-rule fired. pub const UPON_NOTHING: UponRule = UponRule(0); +/// PRE-PREPARE was justified. pub const UPON_JUSTIFIED_PRE_PREPARE: UponRule = UponRule(1); +/// Quorum PREPARE messages was received. pub const UPON_QUORUM_PREPARES: UponRule = UponRule(2); +/// Quorum COMMIT messages was received. pub const UPON_QUORUM_COMMITS: UponRule = UponRule(3); +/// Quorum ROUND-CHANGE messages was received but not justified. pub const UPON_UNJUST_QUORUM_ROUND_CHANGES: UponRule = UponRule(4); +/// F+1 future ROUND-CHANGE messages was received. pub const UPON_F_PLUS1_ROUND_CHANGES: UponRule = UponRule(5); +/// Quorum ROUND-CHANGE messages was received. pub const UPON_QUORUM_ROUND_CHANGES: UponRule = UponRule(6); +/// DECIDED message was justified. pub const UPON_JUSTIFIED_DECIDED: UponRule = UponRule(7); +/// Round timer expired. pub const UPON_ROUND_TIMEOUT: UponRule = UponRule(8); // This is not triggered by a message, but by a timer. impl Display for UponRule { @@ -328,6 +319,7 @@ pub fn run( input_value_source_ch: mpmc::Receiver, ) -> Result<()> { validate_definition(d)?; + let fifo_limit = usize::try_from(d.fifo_limit).expect("validated FIFO limit fits usize"); // === State === let round: Cell = Cell::new(1); @@ -342,7 +334,7 @@ pub fn run( let buffer: RefCell>>> = RefCell::new(HashMap::new()); let dedup_rules: RefCell> = RefCell::new(HashSet::new()); let mut timer_chan: mpmc::Receiver; - let mut stop_timer: Box; + let mut stop_timer: Box; // === Helpers == @@ -401,8 +393,12 @@ pub fn run( let fifo = b.entry(msg.source()).or_default(); fifo.push(msg.clone()); - if fifo.len() as i64 > d.fifo_limit { - fifo.drain(0..(fifo.len() - d.fifo_limit as usize)); + if fifo.len() > fifo_limit { + let expired = fifo + .len() + .checked_sub(fifo_limit) + .expect("FIFO length exceeds limit"); + fifo.drain(0..expired); } }; @@ -419,14 +415,14 @@ pub fn run( return; } - (d.logger.round_change)( + (d.logger.round_change)(RoundChangeLog { instance, process, - round.get(), + round: round.get(), new_round, - rule, - &extract_round_messages(&buffer.borrow(), round.get()), - ); + upon_rule: rule, + msgs: &extract_round_messages(&buffer.borrow(), round.get()), + }); round.set(new_round); dedup_rules.replace(HashSet::new()); @@ -435,12 +431,18 @@ pub fn run( // Algorithm 1:11 { - if (d.is_leader)(instance, round.get(), process) { + if (d.is_leader)(LeaderRequest { + instance, + round: round.get(), + process, + }) { // Note round==1 at this point. broadcast_own_pre_prepare(vec![])?; // Empty justification since round==1 } - (timer_chan, stop_timer) = (d.new_timer)(round.get()); + let timer = (d.new_timer)(round.get()); + timer_chan = timer.receive; + stop_timer = timer.stop; } loop { @@ -469,20 +471,24 @@ pub fn run( recv(t.receive) -> result => { let msg = result?; - if let Some(v) = q_commit.as_ref() { - if !v.is_empty() { - if msg.source() != process && msg.type_() == MSG_ROUND_CHANGE { - // Algorithm 3:17 - broadcast_msg(MSG_DECIDED, &v[0].value(), Some(v))?; - } - - continue; + if let Some(v) = q_commit.as_ref() + && !v.is_empty() + { + if msg.source() != process && msg.type_() == MSG_ROUND_CHANGE { + // Algorithm 3:17 + broadcast_msg(MSG_DECIDED, &v[0].value(), Some(v))?; } + + continue; } // Drop unjust messages if !is_justified(d, instance, &msg, compare_failure_round) { - (d.logger.unjust)(instance, process, msg); + (d.logger.unjust)(UnjustLog { + instance, + process, + msg, + }); continue; } @@ -495,7 +501,13 @@ pub fn run( continue; } - (d.logger.upon_rule)(instance, process, round.get(), &msg, rule); + (d.logger.upon_rule)(UponRuleLog { + instance, + process, + round: round.get(), + msg: &msg, + upon_rule: rule, + }); match rule { // Algorithm 2:1 @@ -503,7 +515,9 @@ pub fn run( change_round(msg.round(), rule); stop_timer(); - (timer_chan, stop_timer) = (d.new_timer)(round.get()); + let timer = (d.new_timer)(round.get()); + timer_chan = timer.receive; + stop_timer = timer.stop; let (new_input_value_source, compare_result) = compare( ct, @@ -527,10 +541,16 @@ pub fn run( // might timeout in the meantime. If // this happens, we trigger round change. // Algorithm 3:1 - change_round(round.get() + 1, UPON_ROUND_TIMEOUT); + let next_round = round + .get() + .checked_add(1) + .expect("round permits increment"); + change_round(next_round, UPON_ROUND_TIMEOUT); stop_timer(); - (timer_chan, stop_timer) = (d.new_timer)(round.get()); + let timer = (d.new_timer)(round.get()); + timer_chan = timer.receive; + stop_timer = timer.stop; broadcast_round_change()?; } @@ -563,7 +583,12 @@ pub fn run( let justification = q_commit.as_ref() .expect("Rules `UPON_QUORUM_COMMITS` and `UPON_JUSTIFIED_DECIDED` always include a justification"); - (d.decide)(ct, instance, &msg.value(), justification); + (d.decide)(DecideRequest { + ct, + instance, + value: &msg.value(), + qcommit: justification, + }); } UPON_F_PLUS1_ROUND_CHANGES => { // Algorithm 3:5 @@ -579,7 +604,9 @@ pub fn run( ); stop_timer(); - (timer_chan, stop_timer) = (d.new_timer)(round.get()); + let timer = (d.new_timer)(round.get()); + timer_chan = timer.receive; + stop_timer = timer.stop; broadcast_round_change()?; } @@ -607,10 +634,16 @@ pub fn run( recv(timer_chan) -> result => { result?; - change_round(round.get() + 1, UPON_ROUND_TIMEOUT); + let next_round = round + .get() + .checked_add(1) + .expect("round permits increment"); + change_round(next_round, UPON_ROUND_TIMEOUT); stop_timer(); - (timer_chan, stop_timer) = (d.new_timer)(round.get()); + let timer = (d.new_timer)(round.get()); + timer_chan = timer.receive; + stop_timer = timer.stop; broadcast_round_change()?; } @@ -786,7 +819,7 @@ fn classify( let prepares = filter_by_round_and_value(&flatten(buffer), MSG_PREPARE, msg.round(), msg.value()); - if prepares.len() as i64 >= d.quorum() { + if prepares.len() >= d.quorum_count() { (UPON_QUORUM_PREPARES, Some(prepares)) } else { (UPON_NOTHING, None) @@ -800,7 +833,7 @@ fn classify( let commits = filter_by_round_and_value(&flatten(buffer), MSG_COMMIT, msg.round(), msg.value()); - if commits.len() as i64 >= d.quorum() { + if commits.len() >= d.quorum_count() { (UPON_QUORUM_COMMITS, Some(commits)) } else { (UPON_NOTHING, None) @@ -826,7 +859,7 @@ fn classify( /* else msg.round() == round */ let qrc = filter_round_change(&all, msg.round()); - if (qrc.len() as i64) < d.quorum() { + if qrc.len() < d.quorum_count() { return (UPON_NOTHING, None); } @@ -834,7 +867,11 @@ fn classify( return (UPON_UNJUST_QUORUM_ROUND_CHANGES, None); }; - if !(d.is_leader)(instance, msg.round(), process) { + if !(d.is_leader)(LeaderRequest { + instance, + round: msg.round(), + process, + }) { return (UPON_NOTHING, None); } @@ -850,7 +887,7 @@ fn classify( /// round change messages. fn next_min_round(d: &Definition, frc: &Vec>, round: i64) -> i64 { // Get all RoundChange messages with round (rj) higher than current round (ri) - if (frc.len() as i64) < d.faulty() + 1 { + if frc.len() < d.faulty_plus_one_count() { panic!("bug: Frc too short"); } @@ -916,7 +953,7 @@ fn is_justified_round_change(d: &Definition, msg: &Msg) -> b // No need to check for all possible combinations, since justified should only // contain a one. - if (prepares.len() as i64) < d.quorum() { + if prepares.len() < d.quorum_count() { return false; } @@ -964,7 +1001,7 @@ fn is_justified_decided(d: &Definition, msg: &Msg) -> bool { None, ); - (commits.len() as i64) >= d.quorum() + commits.len() >= d.quorum_count() } /// Returns true if the PRE-PREPARE message is justified. @@ -978,13 +1015,20 @@ fn is_justified_pre_prepare( panic!("bug: not a preprepare message"); } - if !(d.is_leader)(instance, msg.round(), msg.source()) { + if !(d.is_leader)(LeaderRequest { + instance, + round: msg.round(), + process: msg.source(), + }) { return false; } // Justified if PrePrepare is the first round OR if comparison failed previous // round. - if msg.round() == 1 || (msg.round() == compare_failure_round + 1) { + let next_compare_round = compare_failure_round + .checked_add(1) + .expect("compare failure round permits increment"); + if msg.round() == 1 || (msg.round() == next_compare_round) { return true; } @@ -1010,7 +1054,7 @@ fn contains_justified_qrc( .into_iter() .filter(valid_round_change_prepared_round) .collect::>(); - if (qrc.len() as i64) < d.quorum() { + if qrc.len() < d.quorum_count() { return None; } @@ -1061,7 +1105,7 @@ fn get_single_justified_pr_pv( ) -> Option<(i64, T::Value)> { let mut pr: i64 = 0; let mut pv: T::Value = Default::default(); - let mut count: i64 = 0; + let mut count: usize = 0; let mut uniq = uniq_source(); for msg in msgs { @@ -1080,10 +1124,12 @@ fn get_single_justified_pr_pv( return None; } - count += 1; + count = count + .checked_add(1) + .expect("prepare count permits increment"); } - if count >= d.quorum() { + if count >= d.quorum_count() { Some((pr, pv)) } else { None @@ -1131,7 +1177,7 @@ fn get_justified_qrc( qrc.push(rc.clone()); } - if (qrc.len() as i64) >= d.quorum() && has_highest_prepared { + if qrc.len() >= d.quorum_count() && has_highest_prepared { qrc.extend(prepares); return Some(qrc); } @@ -1159,20 +1205,20 @@ fn get_fplus1_round_changes( continue; } - if let Some(highest) = highest_by_source.get(&msg.source()) { - if highest.round() > msg.round() { - continue; - } + if let Some(highest) = highest_by_source.get(&msg.source()) + && highest.round() > msg.round() + { + continue; } highest_by_source.insert(msg.source(), msg.clone()); - if (highest_by_source.len() as i64) == d.faulty() + 1 { + if highest_by_source.len() == d.faulty_plus_one_count() { break; } } - if (highest_by_source.len() as i64) < d.faulty() + 1 { + if highest_by_source.len() < d.faulty_plus_one_count() { return None; } @@ -1214,7 +1260,7 @@ fn get_prepare_quorums(d: &Definition, all: &Vec>) -> Ve let mut quorums = vec![]; for (_, msgs) in sets { - if (msgs.len() as i64) < d.quorum() { + if msgs.len() < d.quorum_count() { continue; } @@ -1236,7 +1282,7 @@ fn quorum_null_prepared( let null_pv = Some(&Default::default()); let justification = filter_msgs(all, MSG_ROUND_CHANGE, round, None, Some(null_pr), null_pv); - let has_quorum = justification.len() as i64 >= d.quorum(); + let has_quorum = justification.len() >= d.quorum_count(); (justification, has_quorum) }