Skip to content

Commit 2b9f78c

Browse files
committed
more progress
1 parent 50d39ee commit 2b9f78c

3 files changed

Lines changed: 105 additions & 65 deletions

File tree

include/bucket.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@ struct Bucket {
1212
struct SparseBucket {
1313
uint16_t position; // (col << 8) | row
1414
Bucket bkt;
15+
16+
// TODO: Use these functions and also maybe optimize
17+
inline uint16_t col() const {
18+
return position >> 8;
19+
}
20+
inline uint16_t row() const {
21+
return position & 0xFFFF;
22+
}
23+
inline void set_col(uint16_t col) {
24+
position = (col << 8) + row();
25+
}
26+
inline void set_row(uint16_t row) {
27+
position = (col() << 8) + row;
28+
}
1529
};
1630
#pragma pack(pop)
1731

include/sparse_sketch.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ class SparseSketch {
8888
// that will be using that space
8989
size_t sparse_data_size = ceil(double(sparse_capacity) * sizeof(SparseBucket) / sizeof(Bucket));
9090

91-
int update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t checksum);
92-
SketchSample sample_sparse(size_t column);
91+
void update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t checksum);
92+
SketchSample sample_sparse(size_t first_col, size_t end_col);
9393

9494
inline Bucket& deterministic_bucket() {
9595
return buckets[0];
@@ -98,14 +98,18 @@ class SparseSketch {
9898
return buckets[0];
9999
}
100100

101+
inline size_t position_func(size_t col, size_t row, size_t num_rows) const {
102+
return col * num_rows + row + 1;
103+
}
104+
101105
// return the bucket at a particular index in bucket array
102106
inline Bucket& bucket(size_t col, size_t row) {
103107
assert(row < num_dense_rows);
104-
return buckets[col * num_dense_rows + row + 1];
108+
return buckets[position_func(col, row, num_dense_rows)];
105109
}
106110
inline const Bucket& bucket(size_t col, size_t row) const {
107111
assert(row < num_dense_rows);
108-
return buckets[col * num_dense_rows + row + 1];
112+
return buckets[position_func(col, row, num_dense_rows)];
109113
}
110114

111115
public:

src/sparse_sketch.cpp

Lines changed: 83 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ SparseSketch::SparseSketch(vec_t vector_len, uint64_t seed, size_t _samples, siz
1515
// plus 1, deterministic bucket
1616
num_buckets = num_columns * num_dense_rows + sparse_data_size + 1;
1717
buckets = new Bucket[num_buckets];
18-
sparse_buckets = (SparseBucket *) &buckets[num_columns * num_dense_rows + 2];
18+
sparse_buckets = (SparseBucket *) &buckets[num_columns * num_dense_rows + 1];
1919

2020
// initialize bucket values
2121
for (size_t i = 0; i < num_buckets; ++i) {
@@ -57,71 +57,80 @@ SparseSketch::~SparseSketch() { delete[] buckets; }
5757

5858
// Helper functions for interfacing with SparseBuckets
5959
void SparseSketch::reallocate_if_needed() {
60+
if (num_dense_rows <= min_num_dense_rows) return; // do not reallocate
6061
if (number_of_sparse_buckets > num_columns && number_of_sparse_buckets < sparse_capacity)
6162
return; // do not reallocate
62-
else {
63-
const size_t old_buckets = num_buckets;
64-
Bucket *new_buckets;
65-
66-
if (number_of_sparse_buckets < num_columns && num_dense_rows > min_num_dense_rows) {
67-
// shrink dense region by 1 row
68-
// 1. Scan over deepest row of dense region and add all those buckets to sparse
69-
size_t depth = num_dense_rows - 1;
70-
for (size_t c = 0; c < num_columns; c++) {
71-
Bucket bkt = bucket(c, depth);
72-
if (!Bucket_Boruvka::is_empty(bkt)) {
73-
uint16_t sparse_position = (c << 8) + depth;
74-
update_sparse(sparse_position, bkt.alpha, bkt.gamma);
75-
}
76-
}
7763

78-
// 2. Allocate new memory
79-
--num_dense_rows;
80-
num_buckets = num_columns * num_dense_rows + sparse_data_size + 1;
81-
new_buckets = new Bucket[num_buckets];
82-
} else {
83-
// grow dense region by 1 row
84-
// 1. Allocate new memory
85-
++num_dense_rows;
86-
num_buckets = num_columns * num_dense_rows + sparse_data_size + 1;
87-
new_buckets = new Bucket[num_buckets];
88-
89-
// 2. Skip
64+
// we are performing a reallocation
65+
std::cout << "Reallocating!" << std::endl;
66+
std::cout << "num_sparse: " << number_of_sparse_buckets << std::endl;
67+
std::cout << "capacity: " << sparse_capacity << std::endl;
68+
const size_t old_buckets = num_buckets;
69+
const size_t old_rows = num_dense_rows;
70+
SparseBucket *old_sparse_pointer = sparse_buckets;
71+
Bucket *new_buckets;
72+
73+
if (number_of_sparse_buckets < num_columns) {
74+
// shrink dense region by 1 row
75+
// Scan over deepest row of dense region and add all those buckets to sparse
76+
size_t depth = num_dense_rows - 1;
77+
for (size_t c = 0; c < num_columns; c++) {
78+
Bucket bkt = bucket(c, depth);
79+
if (!Bucket_Boruvka::is_empty(bkt)) {
80+
uint16_t sparse_position = (c << 8) + depth;
81+
update_sparse(sparse_position, bkt.alpha, bkt.gamma);
82+
}
9083
}
9184

92-
// 3. Copy over content
93-
size_t dense_buckets = num_columns * num_dense_rows + 1;
94-
for (size_t i = 0; i < dense_buckets; i++) {
95-
new_buckets[i] = buckets[i];
96-
}
97-
for (size_t i = 0; i < sparse_capacity; i++) {
98-
new_buckets[num_buckets - i] = buckets[old_buckets - i];
85+
// Allocate new memory
86+
--num_dense_rows;
87+
num_buckets = num_columns * num_dense_rows + sparse_data_size + 1;
88+
new_buckets = new Bucket[num_buckets];
89+
} else {
90+
// grow dense region by 1 row
91+
// Allocate new memory
92+
++num_dense_rows;
93+
num_buckets = num_columns * num_dense_rows + sparse_data_size + 1;
94+
new_buckets = new Bucket[num_buckets];
95+
}
96+
sparse_buckets = (SparseBucket *) &new_buckets[num_columns * num_dense_rows + 1];
97+
98+
// Copy dense content
99+
for (size_t c = 0; c < num_columns; c++) {
100+
for (size_t r = 0; r < std::min(num_dense_rows, old_rows); r++) {
101+
new_buckets[position_func(c, r, num_dense_rows)] = buckets[position_func(c, r, old_rows)];
99102
}
103+
}
104+
// sparse contents
105+
memcpy(sparse_buckets, old_sparse_pointer, sparse_capacity * sizeof(SparseBucket));
100106

101-
if (num_buckets > old_buckets) {
102-
// 3.5. Scan sparse buckets and move all updates of depth num_dense_rows-1
103-
// to the new dense row
104-
uint16_t depth_mask = 0xFFFF;
105-
for (size_t i = 0; i < sparse_capacity; i++) {
106-
if ((sparse_buckets[i].position & depth_mask) == num_dense_rows - 1) {
107-
size_t column = sparse_buckets[i].position >> 8;
108-
bucket(column, num_dense_rows - 1) = sparse_buckets[i].bkt;
109-
sparse_buckets[i].position = uint16_t(-1); // tombstone
110-
}
107+
108+
if (num_buckets > old_buckets) {
109+
// We shrinking
110+
// Scan sparse buckets and move all updates of depth num_dense_rows-1
111+
// to the new dense row
112+
uint16_t depth_mask = 0xFFFF;
113+
for (size_t i = 0; i < sparse_capacity; i++) {
114+
if ((sparse_buckets[i].position & depth_mask) == num_dense_rows - 1) {
115+
size_t column = sparse_buckets[i].position >> 8;
116+
bucket(column, num_dense_rows - 1) = sparse_buckets[i].bkt;
117+
sparse_buckets[i].position = uint16_t(-1); // tombstone
118+
number_of_sparse_buckets -= 1;
111119
}
112120
}
113-
114-
// 4. Clean up
115-
std::swap(buckets, new_buckets);
116-
delete[] new_buckets;
117121
}
122+
123+
// 4. Clean up
124+
std::swap(buckets, new_buckets);
125+
delete[] new_buckets;
118126
}
119127

120128
// Update a bucket value
121-
// Returns 1 if we added a new bucket value
122-
// 0 if the bucket was found and update (but not cleared)
123-
// -1 if the bucket was found and cleared of all content
124-
int SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t checksum) {
129+
// Changes number_of_sparse_buckets as follows:
130+
// +1 if we added a new bucket value
131+
// 0 if the bucket was found and update (but not cleared)
132+
// -1 if the bucket was found and cleared of all content
133+
void SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t checksum) {
125134
SparseBucket *tombstone = nullptr;
126135
uint16_t tombstone_pos = uint16_t(-1);
127136
for (size_t i = 0; i < num_buckets; i++) {
@@ -135,8 +144,9 @@ int SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t check
135144
// did we clear it out?
136145
if (Bucket_Boruvka::is_empty(sparse_bucket.bkt)) {
137146
sparse_bucket.position = tombstone_pos; // mark it as tombstone
138-
return -1;
147+
number_of_sparse_buckets -= 1;
139148
}
149+
return;
140150
} else {
141151
if (tombstone != nullptr) {
142152
// use the tombstone
@@ -148,22 +158,28 @@ int SparseSketch::update_sparse(uint16_t pos, vec_t update_idx, vec_hash_t check
148158
}
149159

150160
// we created a new sparse bucket
151-
return 1;
161+
number_of_sparse_buckets += 1;
162+
return;
152163
}
153164
} else if (sparse_bucket.position == tombstone_pos && tombstone == nullptr) {
154165
tombstone = &sparse_bucket;
166+
number_of_sparse_buckets += 1;
167+
return;
155168
}
156169
}
157170
// this is an error!
171+
std::cout << "num_sparse: " << number_of_sparse_buckets << std::endl;
172+
std::cout << "capacity: " << sparse_capacity << std::endl;
158173
throw std::runtime_error("update_sparse(): Failed to find update location!");
159174
}
160175

161176
// sample a good bucket from the sparse region if one exists.
162177
// Additionally, specify the column to query from
163178
// TODO: Do we want to include this column thing?
164-
SketchSample SparseSketch::sample_sparse(size_t column) {
179+
SketchSample SparseSketch::sample_sparse(size_t first_col, size_t end_col) {
165180
for (size_t i = 0; i < sparse_capacity; i++) {
166-
if (size_t(sparse_buckets[i].position >> 8) == column &&
181+
if (size_t(sparse_buckets[i].position >> 8) >= first_col &&
182+
size_t(sparse_buckets[i].position >> 8) < end_col &&
167183
Bucket_Boruvka::is_good(sparse_buckets[i].bkt, checksum_seed())) {
168184
return {sparse_buckets[i].bkt.alpha, GOOD};
169185
}
@@ -187,7 +203,7 @@ void SparseSketch::update(const vec_t update_idx) {
187203
likely_if(depth < num_dense_rows) {
188204
Bucket_Boruvka::update(bucket(i, depth), update_idx, checksum);
189205
} else {
190-
number_of_sparse_buckets += update_sparse((i << 8) | depth, update_idx, checksum);
206+
update_sparse((i << 8) | depth, update_idx, checksum);
191207

192208
// based upon this update to sparse matrix, check if we need to reallocate dense region
193209
reallocate_if_needed();
@@ -229,8 +245,8 @@ SketchSample SparseSketch::sample() {
229245
}
230246
}
231247

232-
// TODO: Sample sparse region!
233-
return {0, FAIL};
248+
// Sample sparse region
249+
return sample_sparse(first_column, first_column + cols_per_sample);
234250
}
235251

236252
ExhaustiveSketchSample SparseSketch::exhaustive_sample() {
@@ -258,7 +274,11 @@ ExhaustiveSketchSample SparseSketch::exhaustive_sample() {
258274
}
259275
}
260276

261-
// TODO: Implement this with sparse!
277+
// TODO: How do we do exhaustive sampling properly here?
278+
SketchSample sample = sample_sparse(first_column, first_column + cols_per_sample);
279+
if (sample.result == GOOD) {
280+
ret.insert(sample.idx);
281+
}
262282

263283
unlikely_if (ret.size() == 0)
264284
return {ret, FAIL};
@@ -298,6 +318,8 @@ void SparseSketch::range_merge(const SparseSketch &other, size_t start_sample, s
298318
bucket(i, j).gamma ^= other.bucket(i, j).gamma;
299319
}
300320
}
321+
322+
// TODO: Handle sparse!
301323
}
302324

303325
void SparseSketch::merge_raw_bucket_buffer(const Bucket *raw_buckets) {

0 commit comments

Comments
 (0)