Skip to content

Commit 0797c98

Browse files
committed
ksparse recovery sketch (ba dum tss)
1 parent 678b393 commit 0797c98

1 file changed

Lines changed: 96 additions & 0 deletions

File tree

include/recovery.h

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#include "bucket.h"
2+
#include "sketch.h"
3+
4+
class SparseRecovery {
5+
private:
6+
size_t universe_size;
7+
size_t max_recovery_size;
8+
size_t cleanup_sketch_support;
9+
static constexpr double reduction_factor = 0.82;
10+
// approx 1-1/2e. TODO - can do better. closer to 1-1/e with right
11+
// bounding parameters
12+
// TODO - rewrite this for better locality
13+
// should just be a single array, maybe with a lookup set of pointers for the start of each
14+
std::vector<std::vector<Bucket>> recovery_buckets;
15+
// TODO - see if we want to continue maintaining the deterministic bucket
16+
Bucket deterministic_bucket;
17+
Sketch cleanup_sketch;
18+
public:
19+
SparseRecovery(size_t universe_size, size_t max_recovery_size, double cleanup_sketch_support_factor, uint64_t seed):
20+
// TODO - ugly constructor
21+
cleanup_sketch(universe_size, seed, ceil(cleanup_sketch_support_factor * log2(universe_size)) * 2, 1)
22+
{
23+
universe_size = universe_size;
24+
max_recovery_size = max_recovery_size;
25+
cleanup_sketch_support = ceil(cleanup_sketch_support_factor * log2(universe_size));
26+
size_t current_cfr_size = max_recovery_size;
27+
while (current_cfr_size > cleanup_sketch_support) {
28+
// doing it this way also deals with zero-initialization
29+
recovery_buckets.push_back(std::vector<Bucket>(current_cfr_size));
30+
current_cfr_size = ceil(current_cfr_size * reduction_factor);
31+
}
32+
};
33+
void update(const vec_t update) {
34+
// TODO - checksum seed agreement.
35+
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update,0);
36+
for (size_t cfr_idx=0; cfr_idx < recovery_buckets.size(); cfr_idx++) {
37+
// TODO - get this with an actual function
38+
size_t hash_index = Bucket_Boruvka::get_index_hash(update, cfr_idx * 1231) % recovery_buckets[cfr_idx].size();
39+
// recovery_buckets[cfr_idx][hash_index] ^= update;
40+
Bucket_Boruvka::update(recovery_buckets[cfr_idx][hash_index], update, checksum);
41+
}
42+
cleanup_sketch.update(update);
43+
}
44+
void reset() {
45+
// zero contents of the CFRs
46+
cleanup_sketch.zero_contents();
47+
};
48+
// NOTE THAT THIS IS A DESTRUCTIVE OPERATION AT THE MOMENT.
49+
std::vector<Bucket> recover() {
50+
std::vector<Bucket> recovered_indices;
51+
for (size_t cfr_idx=0; cfr_idx < recovery_buckets.size(); cfr_idx++) {
52+
// first, remove all the already recovered indices
53+
for (auto recov: recovered_indices) {
54+
size_t hash_index = Bucket_Boruvka::get_index_hash(recov.alpha, cfr_idx * 1231) % recovery_buckets[cfr_idx].size();
55+
recovery_buckets[cfr_idx][hash_index] ^= recov;
56+
}
57+
// now go hunting for good buckets
58+
for (size_t bucket_idx=0; bucket_idx < recovery_buckets[cfr_idx].size(); bucket_idx++) {
59+
Bucket &bucket = recovery_buckets[cfr_idx][bucket_idx];
60+
if (Bucket_Boruvka::is_good(bucket, 0)) {
61+
recovered_indices.push_back(bucket);
62+
}
63+
}
64+
// ... repeat until we cleared all the cfrs
65+
}
66+
// now, recover from the sketches
67+
for (auto recov: recovered_indices) {
68+
cleanup_sketch.update(recov.alpha);
69+
}
70+
size_t i=0;
71+
for (; i < cleanup_sketch.get_num_samples(); i++) {
72+
ExhaustiveSketchSample sample = cleanup_sketch.exhaustive_sample();
73+
if (sample.result == ZERO) {
74+
break;
75+
}
76+
for (auto idx: sample.idxs) {
77+
// todo - checksum stuff. tihs is bad code writing but whatever, anything
78+
// to get out of writing psuedocode...
79+
recovered_indices.push_back({idx, Bucket_Boruvka::get_index_hash(idx, 0)});
80+
// todo - this is inefficient. we are recalculating the bucket hash
81+
// for literally no reason
82+
cleanup_sketch.update(idx);
83+
}
84+
}
85+
if (i == cleanup_sketch.get_num_samples()) {
86+
// we ran out of samples
87+
// TODO - UNDO YOUR RECOVERY!!!
88+
}
89+
return recovered_indices;
90+
};
91+
void merge(const SparseRecovery &other) {
92+
// TODO - xor together all the CFRs
93+
cleanup_sketch.merge(other.cleanup_sketch);
94+
};
95+
~SparseRecovery();
96+
};

0 commit comments

Comments
 (0)