From f6f2724d0c234fd30e21e96f99ed922b00beb028 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Mar 2026 02:01:53 +0000 Subject: [PATCH 1/2] nvme: deadlock enabling doorbell buffer while doing I/O 9a3a93a introduced a bug in the previously-okay SubQueue::pop; before, acc_mem.access() would go through MemAccessor, acquire a reference on the underlying MemCtx (an Arc refcount bump), drop the lock, and continue. immediately after, SubQueue::pop would lock the SubQueue's state and actually perform the pop. this ordering is backwards from set_db_buf, which locks the SubQueue's state *then* conditionally performs acc_mem.access() if the buffer is set up as part of an import. after 9a3a93a, SubQueue::pop uses access_borrowed() to avoid contention on the refcount for MemCtx, the cost of retaining the lock on the SubQueue's MemAccessor node while taking the lock on the same queue's SubQueueState. at this point a concurrent SubQueue::pop and SubQueue::set_db_buf could deadlock; one holds the queue state lock, the other holds the accessor node lock, and both will try to take the other. resolve this tension by picking a consistent ordering for the queue state and acc_mem locks: SubQueueState first, then acc_mem.{access,_borrow}(). this was already the ordering in CompQueue::push and both set_db_buf, so pop() gets the trivial change. --- lib/propolis/src/hw/nvme/queue.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/propolis/src/hw/nvme/queue.rs b/lib/propolis/src/hw/nvme/queue.rs index ebe5b80c3..bb697aa19 100644 --- a/lib/propolis/src/hw/nvme/queue.rs +++ b/lib/propolis/src/hw/nvme/queue.rs @@ -649,12 +649,16 @@ impl SubQueue { pub fn pop( self: &Arc, ) -> Option<(GuestData, Permit, u16)> { + // Lock the SubQueueState early to order consistently with MemAccessor; + // in all cases we acquire the queue state lock before working with + // memory. + let mut state = self.state.lock(); + let Some(mem) = self.state.acc_mem.access_borrow() else { return None }; let mem = mem.view(); // Attempt to reserve an entry on the Completion Queue let permit = self.cq.reserve_entry(&self, &mem)?; - let mut state = self.state.lock(); // Check for last-minute updates to the tail via any configured doorbell // page, prior to attempting the pop itself. From aa8373f4b0f185e9dc54434a8b6494e346a8439d Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Mar 2026 19:38:03 +0000 Subject: [PATCH 2/2] better docs --- lib/propolis/src/hw/nvme/queue.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/propolis/src/hw/nvme/queue.rs b/lib/propolis/src/hw/nvme/queue.rs index bb697aa19..b977f3427 100644 --- a/lib/propolis/src/hw/nvme/queue.rs +++ b/lib/propolis/src/hw/nvme/queue.rs @@ -113,7 +113,13 @@ struct QueueState { /// a `SubQueueState` for a Submission Queue. inner: Mutex>, - pub acc_mem: MemAccessor, + /// This queue's memory accessor node. + /// + /// Be careful about lock ordering when using this accessor; access_borrow() + /// holds this node's lock. If a user of this queue state requires both + /// `access_borrow()` and `QueueInner`, the protocol is to lock queue + /// state first and this accessor second. + acc_mem: MemAccessor, } impl QueueState { fn new(size: u32, acc_mem: MemAccessor, inner: QS) -> Self { @@ -649,9 +655,8 @@ impl SubQueue { pub fn pop( self: &Arc, ) -> Option<(GuestData, Permit, u16)> { - // Lock the SubQueueState early to order consistently with MemAccessor; - // in all cases we acquire the queue state lock before working with - // memory. + // Lock the SubQueueState early to conform to lock ordering requirement; + // see docs on QueueState::acc_mem. let mut state = self.state.lock(); let Some(mem) = self.state.acc_mem.access_borrow() else { return None };