From bbd3a956feb197f15abfa1451cbd16329553771e Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Apr 2026 11:26:45 -0700 Subject: [PATCH 1/6] [spr] changes to main this commit is based on Created using spr 1.3.6-beta.1 [skip ci] --- crates/iddqd/src/bi_hash_map/entry_indexes.rs | 16 +- crates/iddqd/src/bi_hash_map/imp.rs | 30 ++-- crates/iddqd/src/bi_hash_map/iter.rs | 7 +- crates/iddqd/src/id_hash_map/entry.rs | 5 +- crates/iddqd/src/id_hash_map/imp.rs | 20 ++- crates/iddqd/src/id_hash_map/iter.rs | 7 +- crates/iddqd/src/id_ord_map/entry.rs | 6 +- crates/iddqd/src/id_ord_map/imp.rs | 69 +++++---- crates/iddqd/src/support/btree_table.rs | 46 +++--- crates/iddqd/src/support/hash_table.rs | 35 ++--- crates/iddqd/src/support/item_index.rs | 79 ++++++++++ crates/iddqd/src/support/item_set.rs | 138 +++++++++++++----- crates/iddqd/src/support/mod.rs | 3 + crates/iddqd/src/tri_hash_map/imp.rs | 22 +-- crates/iddqd/src/tri_hash_map/iter.rs | 7 +- 15 files changed, 336 insertions(+), 154 deletions(-) create mode 100644 crates/iddqd/src/support/item_index.rs diff --git a/crates/iddqd/src/bi_hash_map/entry_indexes.rs b/crates/iddqd/src/bi_hash_map/entry_indexes.rs index a3336592..2ac61aca 100644 --- a/crates/iddqd/src/bi_hash_map/entry_indexes.rs +++ b/crates/iddqd/src/bi_hash_map/entry_indexes.rs @@ -1,11 +1,13 @@ +use crate::support::ItemIndex; + #[derive(Clone, Copy, Debug)] pub(super) enum EntryIndexes { - Unique(usize), + Unique(ItemIndex), NonUnique { // Invariant: at least one index is Some, and indexes are different from // each other. - index1: Option, - index2: Option, + index1: Option, + index2: Option, }, } @@ -43,8 +45,8 @@ impl EntryIndexes { } pub(super) enum DisjointKeys<'a> { - Unique(usize), - Key1(usize), - Key2(usize), - Key12([&'a usize; 2]), + Unique(ItemIndex), + Key1(ItemIndex), + Key2(ItemIndex), + Key12([&'a ItemIndex; 2]), } diff --git a/crates/iddqd/src/bi_hash_map/imp.rs b/crates/iddqd/src/bi_hash_map/imp.rs index e529ef29..ca8e5782 100644 --- a/crates/iddqd/src/bi_hash_map/imp.rs +++ b/crates/iddqd/src/bi_hash_map/imp.rs @@ -10,6 +10,7 @@ use crate::{ errors::DuplicateItem, internal::{ValidateCompact, ValidationError}, support::{ + ItemIndex, alloc::{AllocWrapper, Allocator, Global, global_alloc}, borrow::DormantMutRef, fmt_utils::StrDisplayAsDebug, @@ -79,7 +80,7 @@ use hashbrown::hash_table; #[derive(Clone)] pub struct BiHashMap { pub(super) items: ItemSet, - // Invariant: the values (usize) in these tables are valid indexes into + // Invariant: the values (ItemIndex) in these tables are valid indexes into // `items`, and are a 1:1 mapping. pub(super) tables: BiHashMapTables, } @@ -1822,12 +1823,12 @@ impl BiHashMap { let (index1, index2) = { // index1 and index2 are explicitly typed to show that it has a // trivial Drop impl that doesn't capture anything from map. - let index1: Option = map.tables.k1_to_item.find_index( + let index1: Option = map.tables.k1_to_item.find_index( &map.tables.state, &key1, |index| map.items[index].key1(), ); - let index2: Option = map.tables.k2_to_item.find_index( + let index2: Option = map.tables.k2_to_item.find_index( &map.tables.state, &key2, |index| map.items[index].key2(), @@ -1997,7 +1998,7 @@ impl BiHashMap { self.find1_index(k).map(|ix| &self.items[ix]) } - fn find1_index<'a, Q>(&'a self, k: &Q) -> Option + fn find1_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -2013,7 +2014,7 @@ impl BiHashMap { self.find2_index(k).map(|ix| &self.items[ix]) } - fn find2_index<'a, Q>(&'a self, k: &Q) -> Option + fn find2_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -2094,7 +2095,7 @@ impl BiHashMap { pub(super) fn get_by_index_mut( &mut self, - index: usize, + index: ItemIndex, ) -> Option> { let borrowed = self.items.get_mut(index)?; let state = self.tables.state.clone(); @@ -2107,7 +2108,7 @@ impl BiHashMap { pub(super) fn insert_unique_impl( &mut self, value: T, - ) -> Result> { + ) -> Result> { let mut duplicates = BTreeSet::new(); // Check for duplicates *before* inserting the new item, because we @@ -2140,7 +2141,7 @@ impl BiHashMap { )); } - let next_index = self.items.insert_at_next_index(value); + let next_index = self.items.assert_can_grow().insert(value); // e1 and e2 are all Some because if they were None, duplicates // would be non-empty, and we'd have bailed out earlier. e1.unwrap().insert(next_index); @@ -2178,7 +2179,10 @@ impl BiHashMap { } } - pub(super) fn remove_by_index(&mut self, remove_index: usize) -> Option { + pub(super) fn remove_by_index( + &mut self, + remove_index: ItemIndex, + ) -> Option { // For panic safety, look up both table entries while `self.items` still // holds the value, then remove from both tables and items in sequence. // hashbrown's `find_entry` is panic-safe under user-`Hash`/`Eq` panics @@ -2218,7 +2222,7 @@ impl BiHashMap { &mut self, indexes: EntryIndexes, value: T, - ) -> (usize, Vec) { + ) -> (ItemIndex, Vec) { match indexes { EntryIndexes::Unique(index) => { let old_item = &self.items[index]; @@ -2426,9 +2430,9 @@ impl Eq } fn detect_dup_or_insert<'a, A: Allocator>( - item: hash_table::Entry<'a, usize, AllocWrapper>, - duplicates: &mut BTreeSet, -) -> Option>> { + item: hash_table::Entry<'a, ItemIndex, AllocWrapper>, + duplicates: &mut BTreeSet, +) -> Option>> { match item { hash_table::Entry::Vacant(slot) => Some(slot), hash_table::Entry::Occupied(slot) => { diff --git a/crates/iddqd/src/bi_hash_map/iter.rs b/crates/iddqd/src/bi_hash_map/iter.rs index 9d80090d..f2e4c182 100644 --- a/crates/iddqd/src/bi_hash_map/iter.rs +++ b/crates/iddqd/src/bi_hash_map/iter.rs @@ -2,6 +2,7 @@ use super::{RefMut, tables::BiHashMapTables}; use crate::{ BiHashItem, DefaultHashBuilder, support::{ + ItemIndex, alloc::{AllocWrapper, Allocator, Global}, item_set::ItemSet, }, @@ -20,7 +21,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: BiHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: hash_map::Values<'a, ItemIndex, T>, } impl<'a, T: BiHashItem> Iter<'a, T> { @@ -67,7 +68,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a BiHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: hash_map::ValuesMut<'a, ItemIndex, T>, } impl<'a, T: BiHashItem, S: Clone + BuildHasher, A: Allocator> @@ -120,7 +121,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: hash_map::IntoValues>, } impl IntoIter { diff --git a/crates/iddqd/src/id_hash_map/entry.rs b/crates/iddqd/src/id_hash_map/entry.rs index 23cb679b..a4e16337 100644 --- a/crates/iddqd/src/id_hash_map/entry.rs +++ b/crates/iddqd/src/id_hash_map/entry.rs @@ -2,6 +2,7 @@ use super::{IdHashItem, IdHashMap, RefMut}; use crate::{ DefaultHashBuilder, support::{ + ItemIndex, alloc::{Allocator, Global}, borrow::DormantMutRef, map_hash::MapHash, @@ -165,7 +166,7 @@ pub struct OccupiedEntry< > { map: DormantMutRef<'a, IdHashMap>, // index is a valid index into the map's internal hash table. - index: usize, + index: ItemIndex, } impl<'a, T: IdHashItem, S, A: Allocator> fmt::Debug @@ -187,7 +188,7 @@ impl<'a, T: IdHashItem, S: Clone + BuildHasher, A: Allocator> /// `DormantMutRef::new` must not be used. pub(super) unsafe fn new( map: DormantMutRef<'a, IdHashMap>, - index: usize, + index: ItemIndex, ) -> Self { OccupiedEntry { map, index } } diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 49a87e9c..eecde817 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -7,6 +7,7 @@ use crate::{ errors::DuplicateItem, internal::{ValidateCompact, ValidationError}, support::{ + ItemIndex, alloc::{Allocator, Global, global_alloc}, borrow::DormantMutRef, item_set::ItemSet, @@ -1238,7 +1239,7 @@ impl IdHashMap { { // index is explicitly typed to show that it has a trivial Drop impl // that doesn't capture anything from map. - let index: Option = map.tables.key_to_item.find_index( + let index: Option = map.tables.key_to_item.find_index( &map.tables.state, &key, |index| map.items[index].key(), @@ -1349,7 +1350,7 @@ impl IdHashMap { }); } - fn find_index<'a, Q>(&'a self, k: &Q) -> Option + fn find_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -1366,13 +1367,13 @@ impl IdHashMap { self.tables.make_key_hash::(key) } - pub(super) fn get_by_index(&self, index: usize) -> Option<&T> { + pub(super) fn get_by_index(&self, index: ItemIndex) -> Option<&T> { self.items.get(index) } pub(super) fn get_by_index_mut( &mut self, - index: usize, + index: ItemIndex, ) -> Option> { let state = self.tables.state.clone(); let hashes = self.make_hash(&self.items[index]); @@ -1383,7 +1384,7 @@ impl IdHashMap { pub(super) fn insert_unique_impl( &mut self, value: T, - ) -> Result> { + ) -> Result> { let mut duplicates = BTreeSet::new(); // Check for duplicates *before* inserting the new item, because we @@ -1411,13 +1412,16 @@ impl IdHashMap { )); } - let next_index = self.items.insert_at_next_index(value); + let next_index = self.items.assert_can_grow().insert(value); entry.unwrap().insert(next_index); Ok(next_index) } - pub(super) fn remove_by_index(&mut self, remove_index: usize) -> Option { + pub(super) fn remove_by_index( + &mut self, + remove_index: ItemIndex, + ) -> Option { // For panic safety, look up the table entry while `self.items` still // holds the value, then remove from the table and items in sequence. // hashbrown's `find_entry` is panic-safe under user-`Hash`/`Eq` panics @@ -1443,7 +1447,7 @@ impl IdHashMap { ) } - pub(super) fn replace_at_index(&mut self, index: usize, value: T) -> T { + pub(super) fn replace_at_index(&mut self, index: ItemIndex, value: T) -> T { // We check the key before removing it, to avoid leaving the map in an // inconsistent state. let old_key = diff --git a/crates/iddqd/src/id_hash_map/iter.rs b/crates/iddqd/src/id_hash_map/iter.rs index d785ab6b..1e293f50 100644 --- a/crates/iddqd/src/id_hash_map/iter.rs +++ b/crates/iddqd/src/id_hash_map/iter.rs @@ -2,6 +2,7 @@ use super::{RefMut, tables::IdHashMapTables}; use crate::{ DefaultHashBuilder, IdHashItem, support::{ + ItemIndex, alloc::{AllocWrapper, Allocator, Global}, item_set::ItemSet, }, @@ -20,7 +21,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: IdHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: hash_map::Values<'a, ItemIndex, T>, } impl<'a, T: IdHashItem> Iter<'a, T> { @@ -67,7 +68,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a IdHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: hash_map::ValuesMut<'a, ItemIndex, T>, } impl<'a, T: IdHashItem, S: BuildHasher, A: Allocator> IterMut<'a, T, S, A> { @@ -118,7 +119,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: hash_map::IntoValues>, } impl IntoIter { diff --git a/crates/iddqd/src/id_ord_map/entry.rs b/crates/iddqd/src/id_ord_map/entry.rs index 19d9660c..956a96e3 100644 --- a/crates/iddqd/src/id_ord_map/entry.rs +++ b/crates/iddqd/src/id_ord_map/entry.rs @@ -1,5 +1,5 @@ use super::{IdOrdItem, IdOrdMap, RefMut}; -use crate::support::borrow::DormantMutRef; +use crate::support::{ItemIndex, borrow::DormantMutRef}; use core::{fmt, hash::Hash}; /// An implementation of the Entry API for [`IdOrdMap`]. @@ -208,7 +208,7 @@ impl<'a, T: IdOrdItem> VacantEntry<'a, T> { pub struct OccupiedEntry<'a, T: IdOrdItem> { map: DormantMutRef<'a, IdOrdMap>, // index is a valid index into the map's internal hash table. - index: usize, + index: ItemIndex, } impl<'a, T: IdOrdItem> fmt::Debug for OccupiedEntry<'a, T> { @@ -226,7 +226,7 @@ impl<'a, T: IdOrdItem> OccupiedEntry<'a, T> { /// `DormantMutRef::new` must not be used. pub(super) unsafe fn new( map: DormantMutRef<'a, IdOrdMap>, - index: usize, + index: ItemIndex, ) -> Self { OccupiedEntry { map, index } } diff --git a/crates/iddqd/src/id_ord_map/imp.rs b/crates/iddqd/src/id_ord_map/imp.rs index 44d6927d..e13a7d7c 100644 --- a/crates/iddqd/src/id_ord_map/imp.rs +++ b/crates/iddqd/src/id_ord_map/imp.rs @@ -6,6 +6,7 @@ use crate::{ errors::DuplicateItem, internal::{ValidateChaos, ValidateCompact, ValidationError}, support::{ + ItemIndex, alloc::{Global, global_alloc}, borrow::DormantMutRef, item_set::ItemSet, @@ -65,7 +66,7 @@ pub struct IdOrdMap { // We don't expose an allocator trait here because it isn't stable with // std's BTreeMap. pub(super) items: ItemSet, - // Invariant: the values (usize) in these tables are valid indexes into + // Invariant: the values (ItemIndex) in these tables are valid indexes into // `items`, and are a 1:1 mapping. pub(super) tables: IdOrdMapTables, } @@ -967,7 +968,7 @@ impl IdOrdMap { { // index is explicitly typed to show that it has a trivial Drop impl // that doesn't capture anything from map. - let index: Option = map + let index: Option = map .tables .key_to_item .find_index(&key, |index| map.items[index].key()); @@ -1357,7 +1358,7 @@ impl IdOrdMap { self.find_index(k).map(|ix| &self.items[ix]) } - fn linear_search_index<'a, Q>(&'a self, k: &Q) -> Option + fn linear_search_index<'a, Q>(&'a self, k: &Q) -> Option where Q: ?Sized + Ord + Equivalent>, { @@ -1366,20 +1367,20 @@ impl IdOrdMap { }) } - fn find_index<'a, Q>(&'a self, k: &Q) -> Option + fn find_index<'a, Q>(&'a self, k: &Q) -> Option where Q: ?Sized + Comparable>, { self.tables.key_to_item.find_index(k, |index| self.items[index].key()) } - pub(super) fn get_by_index(&self, index: usize) -> Option<&T> { + pub(super) fn get_by_index(&self, index: ItemIndex) -> Option<&T> { self.items.get(index) } pub(super) fn get_by_index_mut<'a>( &'a mut self, - index: usize, + index: ItemIndex, ) -> Option> where T::Key<'a>: Hash, @@ -1400,41 +1401,57 @@ impl IdOrdMap { pub(super) fn insert_unique_impl( &mut self, value: T, - ) -> Result> { + ) -> Result> { let mut duplicates = BTreeSet::new(); // Check for duplicates *before* inserting the new item, because we // don't want to partially insert the new item and then have to roll // back. - let key = value.key(); - - if let Some(index) = self - .tables - .key_to_item - .find_index(&key, |index| self.items[index].key()) + // + // Scope this `key` to avoid lifetime issues. { - duplicates.insert(index); - } + let key = value.key(); + if let Some(index) = self + .tables + .key_to_item + .find_index(&key, |index| self.items[index].key()) + { + duplicates.insert(index); + } - if !duplicates.is_empty() { - drop(key); - return Err(DuplicateItem::__internal_new( - value, - duplicates.iter().map(|ix| &self.items[*ix]).collect(), - )); + if !duplicates.is_empty() { + drop(key); + return Err(DuplicateItem::__internal_new( + value, + duplicates.iter().map(|ix| &self.items[*ix]).collect(), + )); + } } - let next_index = self.items.next_index(); + // Take the `GrowHandle` after the read-only duplicate check but before + // the B-tree mutation. With this approach, a panic from + // `assert_can_grow` (which means that the map is full) cannot leave the + // B-tree referencing an index that was never assigned to an item. + // + // The handle holds `&mut self.items` and is consumed by + // `GrowHandle::insert`, so the type system enforces that we cannot + // reach the push without the cap check. + let grow_handle = self.items.assert_can_grow(); + let next_index = grow_handle.next_index(); + let key = value.key(); self.tables .key_to_item - .insert(next_index, &key, |index| self.items[index].key()); + .insert(next_index, &key, |index| grow_handle[index].key()); drop(key); - self.items.insert_at_next_index(value); + grow_handle.insert(value); Ok(next_index) } - pub(super) fn remove_by_index(&mut self, remove_index: usize) -> Option { + pub(super) fn remove_by_index( + &mut self, + remove_index: ItemIndex, + ) -> Option { // For panic safety, read the key while self.items still holds the slot, // then remove from the B-tree *before* mutating self.items. // @@ -1456,7 +1473,7 @@ impl IdOrdMap { ) } - pub(super) fn replace_at_index(&mut self, index: usize, value: T) -> T { + pub(super) fn replace_at_index(&mut self, index: ItemIndex, value: T) -> T { // We check the key before removing it, to avoid leaving the map in an // inconsistent state. let old_key = diff --git a/crates/iddqd/src/support/btree_table.rs b/crates/iddqd/src/support/btree_table.rs index 4c4ac1e4..9ef8eb94 100644 --- a/crates/iddqd/src/support/btree_table.rs +++ b/crates/iddqd/src/support/btree_table.rs @@ -4,7 +4,7 @@ //! integers (that are indexes corresponding to items), but use an external //! comparator. -use super::map_hash::MapHash; +use super::{ItemIndex, map_hash::MapHash}; use crate::internal::{TableValidationError, ValidateCompact}; use alloc::{ collections::{BTreeSet, btree_set}, @@ -123,7 +123,7 @@ impl MapBTreeTable { } indexes.sort_unstable(); for (i, index) in indexes.iter().enumerate() { - if *index != i { + if index.as_u32() as usize != i { return Err(TableValidationError::new(format!( "value at index {i} should be {i}, was {index}", ))); @@ -134,7 +134,7 @@ impl MapBTreeTable { // There should be no duplicates, and the sentinel value // should not be stored. let indexes: Vec<_> = self.items.iter().copied().collect(); - let index_set: BTreeSet = + let index_set: BTreeSet = indexes.iter().map(|ix| ix.0).collect(); if index_set.len() != indexes.len() { return Err(TableValidationError::new(format!( @@ -156,12 +156,12 @@ impl MapBTreeTable { } #[inline] - pub(crate) fn first(&self) -> Option { + pub(crate) fn first(&self) -> Option { self.items.first().map(|ix| ix.0) } #[inline] - pub(crate) fn last(&self) -> Option { + pub(crate) fn last(&self) -> Option { self.items.last().map(|ix| ix.0) } @@ -169,11 +169,11 @@ impl MapBTreeTable { &self, key: &Q, lookup: F, - ) -> Option + ) -> Option where K: Ord, Q: ?Sized + Comparable, - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, { let f = find_cmp(key, lookup); @@ -195,11 +195,15 @@ impl MapBTreeTable { ret } - pub(crate) fn insert(&mut self, index: usize, key: &Q, lookup: F) - where + pub(crate) fn insert( + &mut self, + index: ItemIndex, + key: &Q, + lookup: F, + ) where K: Ord, Q: ?Sized + Comparable, - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, { let f = insert_cmp(index, key, lookup); let guard = CmpDropGuard::new(&f); @@ -210,9 +214,9 @@ impl MapBTreeTable { drop(guard); } - pub(crate) fn remove(&mut self, index: usize, key: K, lookup: F) + pub(crate) fn remove(&mut self, index: ItemIndex, key: K, lookup: F) where - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, K: Ord, { let f = insert_cmp(index, &key, lookup); @@ -226,7 +230,7 @@ impl MapBTreeTable { pub(crate) fn retain(&mut self, mut f: F) where - F: FnMut(usize) -> bool, + F: FnMut(ItemIndex) -> bool, { // We don't need to set up a comparator in the environment because // `retain` doesn't do any comparisons as part of its operation. @@ -272,7 +276,7 @@ impl<'a> Iter<'a> { } impl<'a> Iterator for Iter<'a> { - type Item = usize; + type Item = ItemIndex; fn next(&mut self) -> Option { self.inner.next().map(|index| index.0) @@ -291,7 +295,7 @@ impl IntoIter { } impl Iterator for IntoIter { - type Item = usize; + type Item = ItemIndex; fn next(&mut self) -> Option { self.inner.next().map(|index| index.0) @@ -304,7 +308,7 @@ fn find_cmp<'a, K, Q, F>( ) -> impl Fn(Index, Index) -> Ordering + 'a where Q: ?Sized + Comparable, - F: 'a + Fn(usize) -> K, + F: 'a + Fn(ItemIndex) -> K, K: Ord, { move |a: Index, b: Index| { @@ -326,13 +330,13 @@ where } fn insert_cmp<'a, K, Q, F>( - index: usize, + index: ItemIndex, key: &'a Q, lookup: F, ) -> impl Fn(Index, Index) -> Ordering + 'a where Q: ?Sized + Comparable, - F: 'a + Fn(usize) -> K, + F: 'a + Fn(ItemIndex) -> K, K: Ord, { move |a: Index, b: Index| { @@ -389,14 +393,14 @@ impl Drop for CmpDropGuard<'_> { } #[derive(Clone, Copy, Debug)] -struct Index(usize); +struct Index(ItemIndex); impl Index { - const SENTINEL_VALUE: usize = usize::MAX; + const SENTINEL_VALUE: ItemIndex = ItemIndex::SENTINEL; const SENTINEL: Self = Self(Self::SENTINEL_VALUE); #[inline] - fn new(value: usize) -> Self { + fn new(value: ItemIndex) -> Self { if value == Self::SENTINEL_VALUE { panic!("btree map overflow, index with value {value:?} was added") } diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index 0341e84d..9d5a3ad5 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -1,6 +1,7 @@ //! A wrapper around a hash table with some random state. use super::{ + ItemIndex, alloc::{AllocWrapper, Allocator}, map_hash::MapHash, }; @@ -19,7 +20,7 @@ use hashbrown::{ #[derive(Clone, Default)] pub(crate) struct MapHashTable { - pub(super) items: HashTable>, + pub(super) items: HashTable>, } impl fmt::Debug for MapHashTable { @@ -62,7 +63,7 @@ impl MapHashTable { let mut values: Vec<_> = self.items.iter().copied().collect(); values.sort_unstable(); for (i, value) in values.iter().enumerate() { - if *value != i { + if value.as_u32() as usize != i { return Err(TableValidationError::new(format!( "expected value at index {i} to be {i}, was {value}" ))); @@ -101,9 +102,9 @@ impl MapHashTable { state: &S, key: &Q, lookup: F, - ) -> Option + ) -> Option where - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, Q: ?Sized + Hash + Equivalent, { let hash = state.hash_one(key); @@ -115,9 +116,9 @@ impl MapHashTable { state: &S, key: K, lookup: F, - ) -> Entry<'_, usize, AllocWrapper> + ) -> Entry<'_, ItemIndex, AllocWrapper> where - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, { let hash = state.hash_one(&key); self.items.entry( @@ -133,11 +134,11 @@ impl MapHashTable { key: &Q, lookup: F, ) -> Result< - OccupiedEntry<'_, usize, AllocWrapper>, - AbsentEntry<'_, usize, AllocWrapper>, + OccupiedEntry<'_, ItemIndex, AllocWrapper>, + AbsentEntry<'_, ItemIndex, AllocWrapper>, > where - F: Fn(usize) -> K, + F: Fn(ItemIndex) -> K, K: Hash + Eq + Borrow, Q: ?Sized + Hash + Eq, { @@ -150,18 +151,18 @@ impl MapHashTable { hash: u64, mut f: F, ) -> Result< - OccupiedEntry<'_, usize, AllocWrapper>, - AbsentEntry<'_, usize, AllocWrapper>, + OccupiedEntry<'_, ItemIndex, AllocWrapper>, + AbsentEntry<'_, ItemIndex, AllocWrapper>, > where - F: FnMut(usize) -> bool, + F: FnMut(ItemIndex) -> bool, { self.items.find_entry(hash, |index| f(*index)) } pub(crate) fn retain(&mut self, mut f: F) where - F: FnMut(usize) -> bool, + F: FnMut(ItemIndex) -> bool, { self.items.retain(|index| f(*index)); } @@ -183,7 +184,7 @@ impl MapHashTable { pub(crate) fn reserve( &mut self, additional: usize, - hasher: impl Fn(&usize) -> u64, + hasher: impl Fn(&ItemIndex) -> u64, ) { self.items.reserve(additional, hasher); } @@ -192,7 +193,7 @@ impl MapHashTable { /// /// See [`Self::reserve`] for the contract `hasher` must satisfy. #[inline] - pub(crate) fn shrink_to_fit(&mut self, hasher: impl Fn(&usize) -> u64) { + pub(crate) fn shrink_to_fit(&mut self, hasher: impl Fn(&ItemIndex) -> u64) { self.items.shrink_to_fit(hasher); } @@ -203,7 +204,7 @@ impl MapHashTable { pub(crate) fn shrink_to( &mut self, min_capacity: usize, - hasher: impl Fn(&usize) -> u64, + hasher: impl Fn(&ItemIndex) -> u64, ) { self.items.shrink_to(min_capacity, hasher); } @@ -215,7 +216,7 @@ impl MapHashTable { pub(crate) fn try_reserve( &mut self, additional: usize, - hasher: impl Fn(&usize) -> u64, + hasher: impl Fn(&ItemIndex) -> u64, ) -> Result<(), hashbrown::TryReserveError> { self.items.try_reserve(additional, hasher) } diff --git a/crates/iddqd/src/support/item_index.rs b/crates/iddqd/src/support/item_index.rs new file mode 100644 index 00000000..8e507306 --- /dev/null +++ b/crates/iddqd/src/support/item_index.rs @@ -0,0 +1,79 @@ +//! A newtype identifying items within an `ItemSet`. + +use core::fmt; + +/// An index identifying an item within an +/// [`ItemSet`](super::item_set::ItemSet). +/// +/// We use a `u32` and not a `usize` for these indexes because the increased +/// density leads to meaningful performance improvements on 64-bit targets. This +/// does mean that the maximum number of items is limited to 2^32 - 1 (we +/// reserve `u32::MAX` for the sentinel), but we consider that to be a +/// reasonable tradeoff. The limit is enforced within +/// [`ItemSet::assert_can_grow`]; see [`Self::MAX_VALID`]. +/// +/// [`ItemSet::assert_can_grow`]: super::item_set::ItemSet::assert_can_grow +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct ItemIndex(u32); + +impl ItemIndex { + /// The smallest possible index. + pub(crate) const ZERO: Self = Self(0); + + /// The largest index that may be assigned to an item. + /// + /// One below `u32::MAX`, which is reserved as [`Self::SENTINEL`]. + /// Equivalently, the maximum number of items that may ever be inserted + /// into a single map (across the map's lifetime, since indexes are never + /// reused other than through `last_index`) is `u32::MAX`. + pub(crate) const MAX_VALID: Self = Self(u32::MAX - 1); + + /// Reserved sentinel value marking the root/empty slot. Never assigned to + /// an item. + #[cfg_attr(not(feature = "std"), expect(dead_code))] + pub(crate) const SENTINEL: Self = Self(u32::MAX); + + /// Wraps a raw `u32`. + #[inline] + pub(crate) const fn new(value: u32) -> Self { + Self(value) + } + + /// Returns the underlying `u32`. + #[inline] + pub(crate) const fn as_u32(self) -> u32 { + self.0 + } + + /// Returns this index plus one, panicking on overflow. + /// + /// Used by [`ItemSet`](super::item_set::ItemSet) to advance + /// `next_index` after an insert. + #[inline] + pub(crate) fn next(self) -> Self { + Self(self.0.checked_add(1).expect("ItemIndex did not overflow")) + } + + /// Returns this index minus one, panicking on underflow. + /// + /// Used by [`ItemSet`](super::item_set::ItemSet) to roll back + /// `next_index` when removing the highest-index item. + #[inline] + pub(crate) fn prev(self) -> Self { + Self(self.0.checked_sub(1).expect("ItemIndex did not underflow")) + } +} + +impl fmt::Debug for ItemIndex { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl fmt::Display for ItemIndex { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index 8f804680..eda24794 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -1,4 +1,4 @@ -use super::alloc::AllocWrapper; +use super::{ItemIndex, alloc::AllocWrapper}; use crate::{ internal::{ValidateCompact, ValidationError}, support::alloc::{Allocator, Global, global_alloc}, @@ -14,12 +14,13 @@ use rustc_hash::FxBuildHasher; #[derive(Clone)] pub(crate) struct ItemSet { // rustc-hash's FxHashMap is custom-designed for compact-ish integer keys. - items: HashMap>, - // The next index to use. This only ever goes up, not down. + items: HashMap>, + // The next index to use. This only ever goes up, not down (modulo the + // small `remove`-of-tail optimization below). // // An alternative might be to use a free list of indexes, but that's // unnecessarily complex. - next_index: usize, + next_index: ItemIndex, } impl fmt::Debug for ItemSet { @@ -38,12 +39,60 @@ impl ItemSet { } } +/// A typestate that proves there's space within the item set to grow the set by +/// exactly one slot. +/// +/// This handle is created by [`ItemSet::assert_can_grow`] and consumed by +/// [`GrowHandle::insert`]. The handle holds a `&mut ItemSet`. +/// +/// Splitting the assertion from the insertion lets callers fail the cap check +/// before indexes are mutated. During this interval, if callers need access to +/// the individual items, they can use the `Index` impl below. More +/// functionality can be added to this handle as necessary. +#[must_use = "must be consumed by GrowHandle::insert"] +pub(crate) struct GrowHandle<'a, T, A: Allocator> { + items: &'a mut ItemSet, +} + +impl Index for GrowHandle<'_, T, A> { + type Output = T; + + #[inline] + fn index(&self, index: ItemIndex) -> &T { + &self.items[index] + } +} + +impl<'a, T, A: Allocator> GrowHandle<'a, T, A> { + /// Returns the index that [`Self::insert`] will assign. + #[cfg_attr(not(feature = "std"), expect(dead_code))] + #[inline] + pub(crate) fn next_index(&self) -> ItemIndex { + self.items.next_index + } + + /// Inserts `value` at [`Self::next_index`] and returns the chosen + /// index, consuming the handle. + /// + /// This is the only way to grow an [`ItemSet`]. + #[inline] + pub(crate) fn insert(self, value: T) -> ItemIndex { + let index = self.items.next_index; + self.items.items.insert(index, value); + // `assert_can_grow` guarantees `next_index <= ItemIndex::MAX_VALID` + // here, so the post-insert bump is at most `ItemIndex::SENTINEL` — + // no overflow. + self.items.next_index = self.items.next_index.next(); + index + } +} + impl ItemSet { #[inline] pub(crate) const fn new_in(alloc: A) -> Self { Self { items: HashMap::with_hasher_in(FxBuildHasher, AllocWrapper(alloc)), - next_index: 0, + next_index: ItemIndex::ZERO, } } @@ -54,7 +103,7 @@ impl ItemSet { Default::default(), AllocWrapper(alloc), ), - next_index: 0, + next_index: ItemIndex::ZERO, } } @@ -71,10 +120,11 @@ impl ItemSet { // between 0 and next_index are present. match compactness { ValidateCompact::Compact => { - for i in 0..self.next_index { - if !self.items.contains_key(&i) { + for i in 0..self.next_index.as_u32() { + let idx = ItemIndex::new(i); + if !self.items.contains_key(&idx) { return Err(ValidationError::General(format!( - "ItemSet is not compact: missing index {i}" + "ItemSet is not compact: missing index {idx}" ))); } } @@ -102,70 +152,77 @@ impl ItemSet { } #[inline] - pub(crate) fn iter(&self) -> hash_map::Iter<'_, usize, T> { + pub(crate) fn iter(&self) -> hash_map::Iter<'_, ItemIndex, T> { self.items.iter() } #[inline] #[expect(dead_code)] - pub(crate) fn iter_mut(&mut self) -> hash_map::IterMut<'_, usize, T> { + pub(crate) fn iter_mut(&mut self) -> hash_map::IterMut<'_, ItemIndex, T> { self.items.iter_mut() } #[inline] - pub(crate) fn values(&self) -> hash_map::Values<'_, usize, T> { + pub(crate) fn values(&self) -> hash_map::Values<'_, ItemIndex, T> { self.items.values() } #[inline] - pub(crate) fn values_mut(&mut self) -> hash_map::ValuesMut<'_, usize, T> { + pub(crate) fn values_mut( + &mut self, + ) -> hash_map::ValuesMut<'_, ItemIndex, T> { self.items.values_mut() } #[inline] pub(crate) fn into_values( self, - ) -> hash_map::IntoValues> { + ) -> hash_map::IntoValues> { self.items.into_values() } #[inline] - pub(crate) fn get(&self, index: usize) -> Option<&T> { + pub(crate) fn get(&self, index: ItemIndex) -> Option<&T> { self.items.get(&index) } #[inline] - pub(crate) fn get_mut(&mut self, index: usize) -> Option<&mut T> { + pub(crate) fn get_mut(&mut self, index: ItemIndex) -> Option<&mut T> { self.items.get_mut(&index) } #[inline] pub(crate) fn get_disjoint_mut( &mut self, - indexes: [&usize; N], + indexes: [&ItemIndex; N], ) -> [Option<&mut T>; N] { self.items.get_many_mut(indexes) } - // This is only used by IdOrdMap. - #[cfg_attr(not(feature = "std"), expect(dead_code))] - #[inline] - pub(crate) fn next_index(&self) -> usize { - self.next_index - } - + /// Returns a [`GrowHandle`] that grants exclusive access to grow the set by + /// exactly one slot, panicking if the set is already full. + /// + /// The returned handle is the only way to grow an [`ItemSet`], so the + /// capacity check cannot be skipped. Because the handle holds a `&mut + /// ItemSet`, the item set cannot be mutated in between. #[inline] - pub(crate) fn insert_at_next_index(&mut self, value: T) -> usize { - let index = self.next_index; - self.items.insert(index, value); - self.next_index += 1; - index + #[must_use = "GrowHandle must be passed to GrowHandle::insert"] + pub(crate) fn assert_can_grow(&mut self) -> GrowHandle<'_, T, A> { + // `MAX_VALID` is the largest assignable index. After that final insert, + // `next_index` is bumped to `SENTINEL` and the next `assert_can_grow` + // will panic. + assert!( + self.next_index <= ItemIndex::MAX_VALID, + "ItemSet index exceeds maximum index {}", + ItemIndex::MAX_VALID, + ); + GrowHandle { items: self } } #[inline] - pub(crate) fn remove(&mut self, index: usize) -> Option { + pub(crate) fn remove(&mut self, index: ItemIndex) -> Option { let entry = self.items.remove(&index); - if entry.is_some() && index == self.next_index - 1 { + if entry.is_some() && index == self.next_index.prev() { // If we removed the last entry, decrement next_index. Not strictly // necessary but a nice optimization. // @@ -180,7 +237,10 @@ impl ItemSet { // // Compactness would require a heap acting as a free list. But that // seems generally unnecessary. - self.next_index -= 1; + // + // `entry.is_some()` implies an item lived at `index`, so + // `next_index > index >= 0` and `prev()` cannot underflow. + self.next_index = self.next_index.prev(); } entry } @@ -189,16 +249,16 @@ impl ItemSet { #[inline] pub(crate) fn clear(&mut self) { self.items.clear(); - self.next_index = 0; + self.next_index = ItemIndex::ZERO; } // This method assumes that value has the same ID. It also asserts that // `index` is valid (and panics if it isn't). #[inline] - pub(crate) fn replace(&mut self, index: usize, value: T) -> T { + pub(crate) fn replace(&mut self, index: ItemIndex, value: T) -> T { self.items .insert(index, value) - .unwrap_or_else(|| panic!("EntrySet index not found: {index}")) + .unwrap_or_else(|| panic!("ItemSet index not found: {index}")) } /// Reserves capacity for at least `additional` more items. @@ -247,20 +307,20 @@ mod serde_impls { } } -impl Index for ItemSet { +impl Index for ItemSet { type Output = T; #[inline] - fn index(&self, index: usize) -> &Self::Output { + fn index(&self, index: ItemIndex) -> &Self::Output { self.items .get(&index) .unwrap_or_else(|| panic!("ItemSet index not found: {index}")) } } -impl IndexMut for ItemSet { +impl IndexMut for ItemSet { #[inline] - fn index_mut(&mut self, index: usize) -> &mut Self::Output { + fn index_mut(&mut self, index: ItemIndex) -> &mut Self::Output { self.items .get_mut(&index) .unwrap_or_else(|| panic!("ItemSet index not found: {index}")) diff --git a/crates/iddqd/src/support/mod.rs b/crates/iddqd/src/support/mod.rs index 2f2ff930..be97610f 100644 --- a/crates/iddqd/src/support/mod.rs +++ b/crates/iddqd/src/support/mod.rs @@ -7,7 +7,10 @@ pub(crate) mod daft_utils; pub(crate) mod fmt_utils; pub(crate) mod hash_builder; pub(crate) mod hash_table; +pub(crate) mod item_index; pub(crate) mod item_set; pub(crate) mod map_hash; #[cfg(feature = "schemars08")] pub(crate) mod schemars_utils; + +pub(crate) use item_index::ItemIndex; diff --git a/crates/iddqd/src/tri_hash_map/imp.rs b/crates/iddqd/src/tri_hash_map/imp.rs index 7979dd8a..9ce1171f 100644 --- a/crates/iddqd/src/tri_hash_map/imp.rs +++ b/crates/iddqd/src/tri_hash_map/imp.rs @@ -4,6 +4,7 @@ use crate::{ errors::DuplicateItem, internal::ValidationError, support::{ + ItemIndex, alloc::{AllocWrapper, Allocator, Global, global_alloc}, borrow::DormantMutRef, fmt_utils::StrDisplayAsDebug, @@ -85,7 +86,7 @@ use hashbrown::hash_table::{Entry, VacantEntry}; #[derive(Clone)] pub struct TriHashMap { pub(super) items: ItemSet, - // Invariant: the values (usize) in these tables are valid indexes into + // Invariant: the values (ItemIndex) in these tables are valid indexes into // `items`, and are a 1:1 mapping. tables: TriHashMapTables, } @@ -1465,7 +1466,7 @@ impl TriHashMap { )); } - let next_index = self.items.insert_at_next_index(value); + let next_index = self.items.assert_can_grow().insert(value); // e1, e2 and e3 are all Some because if they were None, duplicates // would be non-empty, and we'd have bailed out earlier. e1.unwrap().insert(next_index); @@ -2676,7 +2677,7 @@ impl TriHashMap { self.find1_index(k).map(|ix| &self.items[ix]) } - fn find1_index<'a, Q>(&'a self, k: &Q) -> Option + fn find1_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -2692,7 +2693,7 @@ impl TriHashMap { self.find2_index(k).map(|ix| &self.items[ix]) } - fn find2_index<'a, Q>(&'a self, k: &Q) -> Option + fn find2_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -2708,7 +2709,7 @@ impl TriHashMap { self.find3_index(k).map(|ix| &self.items[ix]) } - fn find3_index<'a, Q>(&'a self, k: &Q) -> Option + fn find3_index<'a, Q>(&'a self, k: &Q) -> Option where Q: Hash + Equivalent> + ?Sized, { @@ -2717,7 +2718,10 @@ impl TriHashMap { .find_index(&self.tables.state, k, |index| self.items[index].key3()) } - pub(super) fn remove_by_index(&mut self, remove_index: usize) -> Option { + pub(super) fn remove_by_index( + &mut self, + remove_index: ItemIndex, + ) -> Option { // For panic safety, look up all three table entries while `self.items` // still holds the value, then remove from all three tables and items // in sequence. hashbrown's `find_entry` is panic-safe under @@ -2911,9 +2915,9 @@ impl Extend } fn detect_dup_or_insert<'a, A: Allocator>( - item: Entry<'a, usize, AllocWrapper>, - duplicates: &mut BTreeSet, -) -> Option>> { + item: Entry<'a, ItemIndex, AllocWrapper>, + duplicates: &mut BTreeSet, +) -> Option>> { match item { Entry::Vacant(slot) => Some(slot), Entry::Occupied(slot) => { diff --git a/crates/iddqd/src/tri_hash_map/iter.rs b/crates/iddqd/src/tri_hash_map/iter.rs index 8ebe6ff4..8c472d2a 100644 --- a/crates/iddqd/src/tri_hash_map/iter.rs +++ b/crates/iddqd/src/tri_hash_map/iter.rs @@ -2,6 +2,7 @@ use super::{RefMut, tables::TriHashMapTables}; use crate::{ DefaultHashBuilder, TriHashItem, support::{ + ItemIndex, alloc::{AllocWrapper, Allocator, Global}, item_set::ItemSet, }, @@ -20,7 +21,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: TriHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: hash_map::Values<'a, ItemIndex, T>, } impl<'a, T: TriHashItem> Iter<'a, T> { @@ -67,7 +68,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a TriHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: hash_map::ValuesMut<'a, ItemIndex, T>, } impl<'a, T: TriHashItem, S: Clone + BuildHasher, A: Allocator> @@ -120,7 +121,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: hash_map::IntoValues>, } impl IntoIter { From 8c5b053e93be2d41cb00c252a8e4bcbdace63a92 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Apr 2026 11:40:11 -0700 Subject: [PATCH 2/6] simplify comments Created using spr 1.3.6-beta.1 --- crates/iddqd/src/support/item_set.rs | 55 ++++++++-------------------- 1 file changed, 15 insertions(+), 40 deletions(-) diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index bfcb123f..557d387c 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -168,9 +168,8 @@ impl<'a, T, A: Allocator> GrowHandle<'a, T, A> { idx } else { let idx = self.items.free_head; - // Replace the `Vacant { next }` at `idx` with `Occupied` - // and splice `idx` out of the chain by advancing - // `free_head` past it. + // Replace the `Vacant { next }` at `idx` with `Occupied`, + // and advance `free_head` to `next`. let slot = &mut self.items.items[idx.as_u32() as usize]; let next = match slot { SlabEntry::Occupied(_) => { @@ -193,17 +192,12 @@ impl<'a, T, A: Allocator> GrowHandle<'a, T, A> { /// type. All other interaction with slots goes through `ItemSet`'s safe /// methods. pub(crate) enum SlabEntry { - /// Slot holds a live value. + /// The slot holds a live value. Occupied(T), - /// Slot is free; `next` is the index of the next free slot, or - /// [`SENTINEL`] if this is the end of the chain. + /// The slot is free. /// - /// `ItemIndex` is a `u32` newtype, so `SlabEntry` stays - /// minimal-overhead for small `T`: when `size_of::() < - /// size_of::()`, this saves four bytes per slot compared to - /// a `usize` (and may shrink the enum's overall size once - /// alignment is taken into account). For larger `T` the variant - /// size is dominated by `T` and there is no layout difference. + /// `next` is the index of the next free slot, or [`ItemIndex::SENTINEL`] if + /// this is the end of the chain. Vacant { next: ItemIndex }, } @@ -259,18 +253,13 @@ impl fmt::Debug for SlabEntry { /// See the [module-level docs](self) for the design and tradeoffs. pub(crate) struct ItemSet { items: Vec, AllocWrapper>, - /// LIFO head of the embedded free-list chain, or [`SENTINEL`] - /// when no slots are free. Stored as [`ItemIndex`] for symmetry - /// with [`SlabEntry::Vacant::next`] — they're the same conceptual - /// value (a slot index along the free chain). + /// LIFO head of the embedded free chain, or [`ItemIndex::SENTINEL`] when no + /// slots are free. free_head: ItemIndex, /// Count of `Occupied` slots, maintained by insert/remove. - /// Stored as `u32` because [`ItemIndex::MAX_VALID`] caps the - /// assignable slot count at `u32::MAX`, so `len` always fits. The - /// `as usize` - /// casts at boundaries with `Vec::len()` and capacity arguments - /// document where slab-internal indexing meets the wider `usize` - /// world. + /// + /// (ItemIndex is a u32, as is len, so the struct can be more tightly packed + /// than if both were usizes.) len: u32, } @@ -328,25 +317,13 @@ impl ItemSet { } /// Returns a raw pointer to the backing slot buffer. - /// - /// Intended for iterator types that need to hand out disjoint - /// `&mut T` across iterations without reborrowing `&mut ItemSet` - /// each time. The `SlabEntry` element type is exposed so callers can - /// pattern-match occupancy via [`SlabEntry::as_mut`]. - /// - /// Pair with [`Self::slot_count`] to get the in-bounds upper limit - /// for `add(index)` arithmetic. #[inline] #[cfg_attr(not(feature = "std"), expect(dead_code))] pub(crate) fn as_mut_ptr(&mut self) -> *mut SlabEntry { self.items.as_mut_ptr() } - /// Returns the number of slots in the backing buffer (occupied + - /// vacant). - /// - /// This is the in-bounds upper limit for any [`ItemIndex`] used - /// against the pointer returned by [`Self::as_mut_ptr`]. + /// Returns the number of slots in the backing buffer. #[inline] #[cfg_attr(not(feature = "std"), expect(dead_code))] pub(crate) fn slot_count(&self) -> usize { @@ -643,11 +620,9 @@ impl ItemSet { return IndexRemap::Identity; } - // Two-pointer compaction: forward scan, writing each `Occupied` - // into the next write position. As we go, build a direct position - // array `new_pos[old] = new` so callers can rewrite their stored - // indexes in O(1) per entry, instead of doing a binary_search - // over a holes list. + // Do a forward scan, writing each `Occupied` into the next write + // position. As we go, build a `new_pos[old] = new` index so callers can + // rewrite their stored indexes. assert!( pre_len <= ItemIndex::MAX_VALID.as_u32() as usize, "compact: items.len() {pre_len} exceeds MAX_VALID {}", From 27929bb3570ec06e4226b45f9e4f9472cbcaa71e Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Apr 2026 16:46:24 -0700 Subject: [PATCH 3/6] some simplifiations Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_ord_map/iter.rs | 8 +-- crates/iddqd/src/support/btree_table.rs | 1 - crates/iddqd/src/support/item_set.rs | 56 ++++++------------- crates/iddqd/tests/integration/id_hash_map.rs | 13 ++--- 4 files changed, 25 insertions(+), 53 deletions(-) diff --git a/crates/iddqd/src/id_ord_map/iter.rs b/crates/iddqd/src/id_ord_map/iter.rs index b29c5cc7..10640855 100644 --- a/crates/iddqd/src/id_ord_map/iter.rs +++ b/crates/iddqd/src/id_ord_map/iter.rs @@ -117,8 +117,6 @@ where } } -// `Send` and `Sync` are auto-derived for IterMut based on ItemSetPtr. - impl<'a, T: IdOrdItem + 'a> Iterator for IterMut<'a, T> where T::Key<'a>: Hash, @@ -141,9 +139,9 @@ where self.items.slot_count, ); - // SAFETY: The big invariants to uphold here are that: + // SAFETY: We need to show: // - // * self.items.ptr.add(raw_index) points at valid memory. + // * `self.items.ptr.add(raw_index)` points at valid memory. // * There are no overlapping mutable borrows of the same memory. // // This is shown by the following observations: @@ -163,7 +161,7 @@ where let item: &'a mut T = unsafe { (*self.items.ptr.add(raw_index)) .as_mut() - .expect("btree index points at a Some slot in ItemSet") + .expect("btree index points at an Occupied slot in ItemSet") }; let (hash, dormant) = { diff --git a/crates/iddqd/src/support/btree_table.rs b/crates/iddqd/src/support/btree_table.rs index ac7f6b2b..30b4c147 100644 --- a/crates/iddqd/src/support/btree_table.rs +++ b/crates/iddqd/src/support/btree_table.rs @@ -262,7 +262,6 @@ impl MapBTreeTable { /// [`ItemSet::shrink_to_fit`]: super::item_set::ItemSet::shrink_to_fit /// [`ItemSet::shrink_to`]: super::item_set::ItemSet::shrink_to pub(crate) fn remap_indexes(&mut self, remap: &IndexRemap) { - // Note that this is an in-place rewrite of `self.items` values. for idx in self.items.iter() { let new = remap.remap(idx.value()); idx.set_value(new); diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index 557d387c..73f84115 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -57,7 +57,6 @@ use allocator_api2::vec::Vec; use core::{ fmt, iter::FusedIterator, - marker::PhantomData, ops::{Index, IndexMut}, }; @@ -191,6 +190,7 @@ impl<'a, T, A: Allocator> GrowHandle<'a, T, A> { /// raw pointer into the `items` buffer; callers need to name the element /// type. All other interaction with slots goes through `ItemSet`'s safe /// methods. +#[derive(Clone, Debug)] pub(crate) enum SlabEntry { /// The slot holds a live value. Occupied(T), @@ -222,28 +222,9 @@ impl SlabEntry { #[inline] fn is_occupied(&self) -> bool { - matches!(self, SlabEntry::Occupied(_)) - } -} - -impl Clone for SlabEntry { - fn clone(&self) -> Self { match self { - SlabEntry::Occupied(v) => SlabEntry::Occupied(v.clone()), - SlabEntry::Vacant { next } => SlabEntry::Vacant { next: *next }, - } - } -} - -impl fmt::Debug for SlabEntry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - SlabEntry::Occupied(v) => { - f.debug_tuple("Occupied").field(v).finish() - } - SlabEntry::Vacant { next } => { - f.debug_struct("Vacant").field("next", next).finish() - } + SlabEntry::Occupied(_) => true, + SlabEntry::Vacant { .. } => false, } } } @@ -573,17 +554,19 @@ impl ItemSet { /// that `index` is valid (and panics if it isn't). #[inline] pub(crate) fn replace(&mut self, index: ItemIndex, value: T) -> T { - match self.items.get_mut(index.as_u32() as usize) { - Some(slot @ SlabEntry::Occupied(_)) => { - match core::mem::replace(slot, SlabEntry::Occupied(value)) { - SlabEntry::Occupied(old) => old, - SlabEntry::Vacant { .. } => { - unreachable!("slot was just matched as Occupied") - } - } - } - _ => panic!("ItemSet index not found: {index}"), - } + let Some(slot) = self + .items + .get_mut(index.as_u32() as usize) + .filter(|s| s.is_occupied()) + else { + panic!("ItemSet index not found: {index}") + }; + let SlabEntry::Occupied(old) = + core::mem::replace(slot, SlabEntry::Occupied(value)) + else { + unreachable!("slot was just matched as Occupied") + }; + old } #[inline] @@ -942,18 +925,13 @@ impl FusedIterator for ValuesMut<'_, T> {} pub(crate) struct IntoValues { inner: allocator_api2::vec::IntoIter, AllocWrapper>, remaining: usize, - _marker: PhantomData, } impl IntoValues { fn new(set: ItemSet) -> Self { let remaining = set.len(); let consuming = set.into_consuming(); - Self { - inner: consuming.items.into_iter(), - remaining, - _marker: PhantomData, - } + Self { inner: consuming.items.into_iter(), remaining } } } diff --git a/crates/iddqd/tests/integration/id_hash_map.rs b/crates/iddqd/tests/integration/id_hash_map.rs index 5149daa8..275caa41 100644 --- a/crates/iddqd/tests/integration/id_hash_map.rs +++ b/crates/iddqd/tests/integration/id_hash_map.rs @@ -242,9 +242,9 @@ impl Operation { | Operation::RetainValueContains(_, _) | Operation::RetainModulo(_, _, _) | Operation::Extend(_) => CompactnessChange::NoLongerCompact, - // Clear always makes the map compact (empty). Shrink - // operations fully compact the backing store, restoring - // the `Compact` invariant. + // Clear empties the map, so it is de-facto compact. Shrink + // operations fully compact the backing store, restoring the + // `Compact` invariant. Operation::Clear | Operation::ShrinkToFit | Operation::ShrinkTo(_) => CompactnessChange::BecomesCompact, @@ -360,15 +360,12 @@ fn proptest_ops( } Operation::ShrinkToFit => { map.shrink_to_fit(); - // The naive map has no shrink operation; contents stay - // unchanged. Compactness is validated below via the - // standard `map.validate(compactness)` call, which — - // thanks to the compactness-change tracker — now - // expects `Compact`. + // The naive map has no shrink operation. map.validate(compactness).expect("map should be valid"); } Operation::ShrinkTo(min_capacity) => { map.shrink_to(min_capacity); + // The naive map has no shrink operation. map.validate(compactness).expect("map should be valid"); } } From 37a4a9870ed713fa8c748374723fa6df69e00282 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Apr 2026 16:52:16 -0700 Subject: [PATCH 4/6] more simplification Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_ord_map/iter.rs | 6 ++--- crates/iddqd/src/support/item_set.rs | 36 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/crates/iddqd/src/id_ord_map/iter.rs b/crates/iddqd/src/id_ord_map/iter.rs index 10640855..4e85ea9d 100644 --- a/crates/iddqd/src/id_ord_map/iter.rs +++ b/crates/iddqd/src/id_ord_map/iter.rs @@ -223,10 +223,8 @@ impl Iterator for IntoIter { #[inline] fn next(&mut self) -> Option { let index = self.iter.next()?; - // We own `self.items` and the btree's indexes are never revisited, - // so take directly from the consuming view (O(1), no free-list - // allocation) rather than `ItemSet::remove`, which would push to - // the free list per call. + // We own `self.items` and the B-tree's indexes are never revisited, so + // we can take directly from the consuming view. let next = self .items .take(index) diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index 73f84115..855138d8 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -2,14 +2,13 @@ //! //! # Design //! -//! Each slot is an `SlabEntry` that is either `Occupied(T)` or `Vacant { next -//! }`. The free list is threaded inline through the vacant slots with -//! `free_head` as its LIFO top and [`ItemIndex::SENTINEL`] as the -//! end-of-list sentinel. +//! Each slot is a `SlabEntry` that is either `Occupied(T)` or `Vacant { next +//! }`. The free chain consists of vacant slots that are linked together via +//! `next` pointers, with `free_head` as its LIFO top and +//! [`ItemIndex::SENTINEL`] as the end-of-list sentinel. //! -//! Removed slots are recycled by the next [`GrowHandle::insert`] at no -//! memory cost, so a churn workload stabilizes at the high-water mark of -//! simultaneously-live items rather than the cumulative insertion count. +//! Removed slots are recycled by the next [`GrowHandle::insert`], so a churn +//! workload stabilizes at the high-water mark of simultaneously-live items. //! //! The container maintains a single allocation (`items`) and uses two `u32`s //! of stack footprint beyond it (the `free_head` and the current `len`). @@ -25,12 +24,12 @@ //! the disjoint-indexes trick in [`get_disjoint_mut`], which any slot-based //! container needs regardless of backend. //! -//! This does mean that `SlabEntry` carries a discriminant, so slots are at -//! least `max(size_of::(), size_of::()) + align_of::>()` -//! regardless of whether `T` has a niche. For types with a niche (including -//! structs where a field has a niche), this is one word larger per slot than -//! `Option` would be. Benchmarking indicates that overall this is a wash. -//! Based on that, we choose the implementation with less unsafe code. +//! The tradeoff is that `SlabEntry` carries a discriminant, so slots are at +//! least `max(size_of::(), size_of::()) + align_of::>()`. +//! For types with a niche (including structs where a field has a niche), this +//! is one word larger per slot than `Option` would be. Benchmarking +//! indicates that overall this is a wash. Based on that, we choose the +//! implementation with less unsafe code. //! //! # Invariants //! @@ -41,9 +40,10 @@ //! contains no in-bounds index that refers to an `Occupied` slot. //! 3. `len == items.iter().filter(|e| matches!(e, Occupied(_))).count()`. //! -//! Under these invariants the live item count is `self.len`, and -//! `items.len() - self.len` equals the number of vacant slots (and -//! therefore the length of the embedded free-list chain). +//! Under these invariants: +//! +//! * The number of occupied slots is `self.len`. +//! * The number of vacant slots is `items.len() - self.len`. use super::{ ItemIndex, @@ -589,7 +589,7 @@ impl ItemSet { } /// Moves every live slot down to fill `Vacant` holes, truncates - /// `items` to its new length, and clears the free-list chain. + /// `items` to its new length, and clears the free chain. fn compact(&mut self) -> IndexRemap { let pre_len = self.items.len(); if pre_len == self.len as usize { @@ -1006,7 +1006,7 @@ impl ConsumingItemSet { return None; } // The free chain is no longer maintained in this view, so any - // `next` value is fine — `SENTINEL` is a natural choice. + // `next` value is fine. `SENTINEL` is a natural choice. let SlabEntry::Occupied(v) = core::mem::replace( slot, SlabEntry::Vacant { next: ItemIndex::SENTINEL }, From c2a08475cc28e4b868d7602f6359cc2b02578167 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 4 May 2026 13:12:18 -0700 Subject: [PATCH 5/6] fix build Created using spr 1.3.6-beta.1 --- crates/iddqd/src/support/hash_builder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/iddqd/src/support/hash_builder.rs b/crates/iddqd/src/support/hash_builder.rs index 129f69d2..324d1ac5 100644 --- a/crates/iddqd/src/support/hash_builder.rs +++ b/crates/iddqd/src/support/hash_builder.rs @@ -18,17 +18,17 @@ mod dummy { type Hasher = Self; fn build_hasher(&self) -> Self::Hasher { - match self {} + unreachable!("this is an empty enum so self cannot exist") } } impl Hasher for DefaultHashBuilder { fn write(&mut self, _bytes: &[u8]) { - match self {} + unreachable!("this is an empty enum so self cannot exist") } fn finish(&self) -> u64 { - match self {} + unreachable!("this is an empty enum so self cannot exist") } } } From 88508b4e16ea557a82927d1d2f45c122a8f79b81 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 4 May 2026 13:14:45 -0700 Subject: [PATCH 6/6] rustdoc fixes Created using spr 1.3.6-beta.1 --- crates/iddqd/src/support/item_set.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index af2c6bc8..dc0d393e 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -21,7 +21,7 @@ //! hand-rolled unsafe allocator to manage the secondary allocation (i.e., north //! of 350 lines of layout-math, lifetime, and `Send`/`Sync` reasoning). The //! slot-based layout eliminates that module entirely: the only unsafe in this -//! file is the disjoint-indexes trick in [`get_disjoint_mut`], which any +//! file is the disjoint-indexes trick in `get_disjoint_mut`, which any //! slot-based container needs regardless of backend. //! //! The tradeoff is that `ItemSlot` carries a discriminant, so slots are at @@ -36,7 +36,7 @@ //! 1. For every `i < items.len()`: `items[i]` is `Occupied` iff `i` is //! not reachable from `free_head` via the `Vacant::next` chain. //! 2. The `Vacant::next` chain starting at `free_head` terminates at -//! [`SENTINEL`], visits every vacant slot exactly once, and +//! `SENTINEL`, visits every vacant slot exactly once, and //! contains no in-bounds index that refers to an `Occupied` slot. //! 3. `len == items.iter().filter(|e| matches!(e, Occupied(_))).count()`. //! @@ -186,7 +186,7 @@ impl<'a, T, A: Allocator> GrowHandle<'a, T, A> { /// A single slot in an [`ItemSet`]. /// -/// Exposed at `pub(crate)` because [`ItemSet::as_mut_ptr`] hands out a +/// Exposed at `pub(crate)` because `ItemSet::as_mut_ptr` hands out a /// raw pointer into the `items` buffer; callers need to name the element /// type. All other interaction with slots goes through `ItemSet`'s safe /// methods.