@@ -15,7 +15,7 @@ SparseSketch::SparseSketch(vec_t vector_len, uint64_t seed, size_t _samples, siz
1515 // plus 1, deterministic bucket
1616 num_buckets = num_columns * num_dense_rows + sparse_data_size + 1 ;
1717 buckets = new Bucket[num_buckets];
18- sparse_buckets = (SparseBucket *) &buckets[num_columns * num_dense_rows + 2 ];
18+ sparse_buckets = (SparseBucket *) &buckets[num_columns * num_dense_rows + 1 ];
1919
2020 // initialize bucket values
2121 for (size_t i = 0 ; i < num_buckets; ++i) {
@@ -57,71 +57,80 @@ SparseSketch::~SparseSketch() { delete[] buckets; }
5757
5858// Helper functions for interfacing with SparseBuckets
5959void SparseSketch::reallocate_if_needed () {
60+ if (num_dense_rows <= min_num_dense_rows) return ; // do not reallocate
6061 if (number_of_sparse_buckets > num_columns && number_of_sparse_buckets < sparse_capacity)
6162 return ; // do not reallocate
62- else {
63- const size_t old_buckets = num_buckets;
64- Bucket *new_buckets;
65-
66- if (number_of_sparse_buckets < num_columns && num_dense_rows > min_num_dense_rows) {
67- // shrink dense region by 1 row
68- // 1. Scan over deepest row of dense region and add all those buckets to sparse
69- size_t depth = num_dense_rows - 1 ;
70- for (size_t c = 0 ; c < num_columns; c++) {
71- Bucket bkt = bucket (c, depth);
72- if (!Bucket_Boruvka::is_empty (bkt)) {
73- uint16_t sparse_position = (c << 8 ) + depth;
74- update_sparse (sparse_position, bkt.alpha , bkt.gamma );
75- }
76- }
7763
78- // 2. Allocate new memory
79- --num_dense_rows;
80- num_buckets = num_columns * num_dense_rows + sparse_data_size + 1 ;
81- new_buckets = new Bucket[num_buckets];
82- } else {
83- // grow dense region by 1 row
84- // 1. Allocate new memory
85- ++num_dense_rows;
86- num_buckets = num_columns * num_dense_rows + sparse_data_size + 1 ;
87- new_buckets = new Bucket[num_buckets];
88-
89- // 2. Skip
64+ // we are performing a reallocation
65+ std::cout << " Reallocating!" << std::endl;
66+ std::cout << " num_sparse: " << number_of_sparse_buckets << std::endl;
67+ std::cout << " capacity: " << sparse_capacity << std::endl;
68+ const size_t old_buckets = num_buckets;
69+ const size_t old_rows = num_dense_rows;
70+ SparseBucket *old_sparse_pointer = sparse_buckets;
71+ Bucket *new_buckets;
72+
73+ if (number_of_sparse_buckets < num_columns) {
74+ // shrink dense region by 1 row
75+ // Scan over deepest row of dense region and add all those buckets to sparse
76+ size_t depth = num_dense_rows - 1 ;
77+ for (size_t c = 0 ; c < num_columns; c++) {
78+ Bucket bkt = bucket (c, depth);
79+ if (!Bucket_Boruvka::is_empty (bkt)) {
80+ uint16_t sparse_position = (c << 8 ) + depth;
81+ update_sparse (sparse_position, bkt.alpha , bkt.gamma );
82+ }
9083 }
9184
92- // 3. Copy over content
93- size_t dense_buckets = num_columns * num_dense_rows + 1 ;
94- for (size_t i = 0 ; i < dense_buckets; i++) {
95- new_buckets[i] = buckets[i];
96- }
97- for (size_t i = 0 ; i < sparse_capacity; i++) {
98- new_buckets[num_buckets - i] = buckets[old_buckets - i];
85+ // Allocate new memory
86+ --num_dense_rows;
87+ num_buckets = num_columns * num_dense_rows + sparse_data_size + 1 ;
88+ new_buckets = new Bucket[num_buckets];
89+ } else {
90+ // grow dense region by 1 row
91+ // Allocate new memory
92+ ++num_dense_rows;
93+ num_buckets = num_columns * num_dense_rows + sparse_data_size + 1 ;
94+ new_buckets = new Bucket[num_buckets];
95+ }
96+ sparse_buckets = (SparseBucket *) &new_buckets[num_columns * num_dense_rows + 1 ];
97+
98+ // Copy dense content
99+ for (size_t c = 0 ; c < num_columns; c++) {
100+ for (size_t r = 0 ; r < std::min (num_dense_rows, old_rows); r++) {
101+ new_buckets[position_func (c, r, num_dense_rows)] = buckets[position_func (c, r, old_rows)];
99102 }
103+ }
104+ // sparse contents
105+ memcpy (sparse_buckets, old_sparse_pointer, sparse_capacity * sizeof (SparseBucket));
100106
101- if (num_buckets > old_buckets) {
102- // 3.5. Scan sparse buckets and move all updates of depth num_dense_rows-1
103- // to the new dense row
104- uint16_t depth_mask = 0xFFFF ;
105- for (size_t i = 0 ; i < sparse_capacity; i++) {
106- if ((sparse_buckets[i].position & depth_mask) == num_dense_rows - 1 ) {
107- size_t column = sparse_buckets[i].position >> 8 ;
108- bucket (column, num_dense_rows - 1 ) = sparse_buckets[i].bkt ;
109- sparse_buckets[i].position = uint16_t (-1 ); // tombstone
110- }
107+
108+ if (num_buckets > old_buckets) {
109+ // We shrinking
110+ // Scan sparse buckets and move all updates of depth num_dense_rows-1
111+ // to the new dense row
112+ uint16_t depth_mask = 0xFFFF ;
113+ for (size_t i = 0 ; i < sparse_capacity; i++) {
114+ if ((sparse_buckets[i].position & depth_mask) == num_dense_rows - 1 ) {
115+ size_t column = sparse_buckets[i].position >> 8 ;
116+ bucket (column, num_dense_rows - 1 ) = sparse_buckets[i].bkt ;
117+ sparse_buckets[i].position = uint16_t (-1 ); // tombstone
118+ number_of_sparse_buckets -= 1 ;
111119 }
112120 }
113-
114- // 4. Clean up
115- std::swap (buckets, new_buckets);
116- delete[] new_buckets;
117121 }
122+
123+ // 4. Clean up
124+ std::swap (buckets, new_buckets);
125+ delete[] new_buckets;
118126}
119127
120128// Update a bucket value
121- // Returns 1 if we added a new bucket value
122- // 0 if the bucket was found and update (but not cleared)
123- // -1 if the bucket was found and cleared of all content
124- int SparseSketch::update_sparse (uint16_t pos, vec_t update_idx, vec_hash_t checksum) {
129+ // Changes number_of_sparse_buckets as follows:
130+ // +1 if we added a new bucket value
131+ // 0 if the bucket was found and update (but not cleared)
132+ // -1 if the bucket was found and cleared of all content
133+ void SparseSketch::update_sparse (uint16_t pos, vec_t update_idx, vec_hash_t checksum) {
125134 SparseBucket *tombstone = nullptr ;
126135 uint16_t tombstone_pos = uint16_t (-1 );
127136 for (size_t i = 0 ; i < num_buckets; i++) {
@@ -135,8 +144,9 @@ int SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t check
135144 // did we clear it out?
136145 if (Bucket_Boruvka::is_empty (sparse_bucket.bkt )) {
137146 sparse_bucket.position = tombstone_pos; // mark it as tombstone
138- return - 1 ;
147+ number_of_sparse_buckets -= 1 ;
139148 }
149+ return ;
140150 } else {
141151 if (tombstone != nullptr ) {
142152 // use the tombstone
@@ -148,22 +158,28 @@ int SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t check
148158 }
149159
150160 // we created a new sparse bucket
151- return 1 ;
161+ number_of_sparse_buckets += 1 ;
162+ return ;
152163 }
153164 } else if (sparse_bucket.position == tombstone_pos && tombstone == nullptr ) {
154165 tombstone = &sparse_bucket;
166+ number_of_sparse_buckets += 1 ;
167+ return ;
155168 }
156169 }
157170 // this is an error!
171+ std::cout << " num_sparse: " << number_of_sparse_buckets << std::endl;
172+ std::cout << " capacity: " << sparse_capacity << std::endl;
158173 throw std::runtime_error (" update_sparse(): Failed to find update location!" );
159174}
160175
161176// sample a good bucket from the sparse region if one exists.
162177// Additionally, specify the column to query from
163178// TODO: Do we want to include this column thing?
164- SketchSample SparseSketch::sample_sparse (size_t column ) {
179+ SketchSample SparseSketch::sample_sparse (size_t first_col, size_t end_col ) {
165180 for (size_t i = 0 ; i < sparse_capacity; i++) {
166- if (size_t (sparse_buckets[i].position >> 8 ) == column &&
181+ if (size_t (sparse_buckets[i].position >> 8 ) >= first_col &&
182+ size_t (sparse_buckets[i].position >> 8 ) < end_col &&
167183 Bucket_Boruvka::is_good (sparse_buckets[i].bkt , checksum_seed ())) {
168184 return {sparse_buckets[i].bkt .alpha , GOOD};
169185 }
@@ -187,7 +203,7 @@ void SparseSketch::update(const vec_t update_idx) {
187203 likely_if (depth < num_dense_rows) {
188204 Bucket_Boruvka::update (bucket (i, depth), update_idx, checksum);
189205 } else {
190- number_of_sparse_buckets += update_sparse ((i << 8 ) | depth, update_idx, checksum);
206+ update_sparse ((i << 8 ) | depth, update_idx, checksum);
191207
192208 // based upon this update to sparse matrix, check if we need to reallocate dense region
193209 reallocate_if_needed ();
@@ -229,8 +245,8 @@ SketchSample SparseSketch::sample() {
229245 }
230246 }
231247
232- // TODO: Sample sparse region!
233- return { 0 , FAIL} ;
248+ // Sample sparse region
249+ return sample_sparse (first_column, first_column + cols_per_sample) ;
234250}
235251
236252ExhaustiveSketchSample SparseSketch::exhaustive_sample () {
@@ -258,7 +274,11 @@ ExhaustiveSketchSample SparseSketch::exhaustive_sample() {
258274 }
259275 }
260276
261- // TODO: Implement this with sparse!
277+ // TODO: How do we do exhaustive sampling properly here?
278+ SketchSample sample = sample_sparse (first_column, first_column + cols_per_sample);
279+ if (sample.result == GOOD) {
280+ ret.insert (sample.idx );
281+ }
262282
263283 unlikely_if (ret.size () == 0 )
264284 return {ret, FAIL};
@@ -298,6 +318,8 @@ void SparseSketch::range_merge(const SparseSketch &other, size_t start_sample, s
298318 bucket (i, j).gamma ^= other.bucket (i, j).gamma ;
299319 }
300320 }
321+
322+ // TODO: Handle sparse!
301323}
302324
303325void SparseSketch::merge_raw_bucket_buffer (const Bucket *raw_buckets) {
0 commit comments