1+ #include " bucket.h"
2+ #include " sketch.h"
3+
4+ class SparseRecovery {
5+ private:
6+ size_t universe_size;
7+ size_t max_recovery_size;
8+ size_t cleanup_sketch_support;
9+ static constexpr double reduction_factor = 0.82 ;
10+ // approx 1-1/2e. TODO - can do better. closer to 1-1/e with right
11+ // bounding parameters
12+ // TODO - rewrite this for better locality
13+ // should just be a single array, maybe with a lookup set of pointers for the start of each
14+ std::vector<std::vector<Bucket>> recovery_buckets;
15+ // TODO - see if we want to continue maintaining the deterministic bucket
16+ Bucket deterministic_bucket;
17+ Sketch cleanup_sketch;
18+ public:
19+ SparseRecovery (size_t universe_size, size_t max_recovery_size, double cleanup_sketch_support_factor, uint64_t seed):
20+ // TODO - ugly constructor
21+ cleanup_sketch (universe_size, seed, ceil(cleanup_sketch_support_factor * log2 (universe_size)) * 2, 1)
22+ {
23+ universe_size = universe_size;
24+ max_recovery_size = max_recovery_size;
25+ cleanup_sketch_support = ceil (cleanup_sketch_support_factor * log2 (universe_size));
26+ size_t current_cfr_size = max_recovery_size;
27+ while (current_cfr_size > cleanup_sketch_support) {
28+ // doing it this way also deals with zero-initialization
29+ recovery_buckets.push_back (std::vector<Bucket>(current_cfr_size));
30+ current_cfr_size = ceil (current_cfr_size * reduction_factor);
31+ }
32+ };
33+ void update (const vec_t update) {
34+ // TODO - checksum seed agreement.
35+ vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update,0 );
36+ for (size_t cfr_idx=0 ; cfr_idx < recovery_buckets.size (); cfr_idx++) {
37+ // TODO - get this with an actual function
38+ size_t hash_index = Bucket_Boruvka::get_index_hash (update, cfr_idx * 1231 ) % recovery_buckets[cfr_idx].size ();
39+ // recovery_buckets[cfr_idx][hash_index] ^= update;
40+ Bucket_Boruvka::update (recovery_buckets[cfr_idx][hash_index], update, checksum);
41+ }
42+ cleanup_sketch.update (update);
43+ }
44+ void reset () {
45+ // zero contents of the CFRs
46+ cleanup_sketch.zero_contents ();
47+ };
48+ // NOTE THAT THIS IS A DESTRUCTIVE OPERATION AT THE MOMENT.
49+ std::vector<Bucket> recover () {
50+ std::vector<Bucket> recovered_indices;
51+ for (size_t cfr_idx=0 ; cfr_idx < recovery_buckets.size (); cfr_idx++) {
52+ // first, remove all the already recovered indices
53+ for (auto recov: recovered_indices) {
54+ size_t hash_index = Bucket_Boruvka::get_index_hash (recov.alpha , cfr_idx * 1231 ) % recovery_buckets[cfr_idx].size ();
55+ recovery_buckets[cfr_idx][hash_index] ^= recov;
56+ }
57+ // now go hunting for good buckets
58+ for (size_t bucket_idx=0 ; bucket_idx < recovery_buckets[cfr_idx].size (); bucket_idx++) {
59+ Bucket &bucket = recovery_buckets[cfr_idx][bucket_idx];
60+ if (Bucket_Boruvka::is_good (bucket, 0 )) {
61+ recovered_indices.push_back (bucket);
62+ }
63+ }
64+ // ... repeat until we cleared all the cfrs
65+ }
66+ // now, recover from the sketches
67+ for (auto recov: recovered_indices) {
68+ cleanup_sketch.update (recov.alpha );
69+ }
70+ size_t i=0 ;
71+ for (; i < cleanup_sketch.get_num_samples (); i++) {
72+ ExhaustiveSketchSample sample = cleanup_sketch.exhaustive_sample ();
73+ if (sample.result == ZERO) {
74+ break ;
75+ }
76+ for (auto idx: sample.idxs ) {
77+ // todo - checksum stuff. tihs is bad code writing but whatever, anything
78+ // to get out of writing psuedocode...
79+ recovered_indices.push_back ({idx, Bucket_Boruvka::get_index_hash (idx, 0 )});
80+ // todo - this is inefficient. we are recalculating the bucket hash
81+ // for literally no reason
82+ cleanup_sketch.update (idx);
83+ }
84+ }
85+ if (i == cleanup_sketch.get_num_samples ()) {
86+ // we ran out of samples
87+ // TODO - UNDO YOUR RECOVERY!!!
88+ }
89+ return recovered_indices;
90+ };
91+ void merge (const SparseRecovery &other) {
92+ // TODO - xor together all the CFRs
93+ cleanup_sketch.merge (other.cleanup_sketch );
94+ };
95+ ~SparseRecovery ();
96+ };
0 commit comments