Skip to content

Commit 0936d78

Browse files
committed
optimizations, still debugging
1 parent 6931393 commit 0936d78

6 files changed

Lines changed: 262 additions & 90 deletions

File tree

include/sketch_types.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
#pragma once
2+
3+
#include <exception>
4+
#include <vector>
5+
6+
#include "types.h"
27
// enum SerialType {
38
// FULL,
49
// RANGE,
@@ -17,19 +22,18 @@ struct SketchSample {
1722
};
1823

1924
struct ExhaustiveSketchSample {
20-
std::unordered_set<vec_t> idxs;
25+
std::vector<vec_t> idxs;
2126
SampleResult result;
2227
};
2328

2429
class OutOfSamplesException : public std::exception {
2530
private:
2631
std::string err_msg;
32+
2733
public:
2834
OutOfSamplesException(size_t seed, size_t num_samples, size_t sample_idx)
2935
: err_msg("This sketch (seed=" + std::to_string(seed) +
3036
", max samples=" + std::to_string(num_samples) +
3137
") cannot be sampled more times (cur idx=" + std::to_string(sample_idx) + ")!") {}
32-
virtual const char* what() const throw() {
33-
return err_msg.c_str();
34-
}
38+
virtual const char* what() const throw() { return err_msg.c_str(); }
3539
};

include/sparse_sketch.h

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class SparseSketch {
5656
// Allocated buckets
5757
Bucket* buckets;
5858

59-
static constexpr size_t min_num_dense_rows = 4;
59+
static constexpr size_t min_num_dense_rows = 5;
6060
size_t num_dense_rows = min_num_dense_rows;
6161

6262
// Variables for sparse representation of lower levels of bucket Matrix
@@ -81,6 +81,47 @@ class SparseSketch {
8181
void update_sparse(uint8_t col, SparseBucket to_add, bool realloc_if_needed = true);
8282
SketchSample sample_sparse(size_t first_col, size_t end_col);
8383

84+
inline uint8_t remove_ll_head(size_t col) {
85+
uint8_t temp = ll_metadata[col];
86+
ll_metadata[col] = sparse_buckets[ll_metadata[col]].next;
87+
return temp;
88+
}
89+
inline uint8_t claim_free_bucket() {
90+
assert(ll_metadata[num_columns] != uint8_t(-1));
91+
return remove_ll_head(num_columns);
92+
}
93+
inline void insert_to_ll_head(size_t col, uint8_t add_idx) {
94+
sparse_buckets[add_idx].next = ll_metadata[col];
95+
ll_metadata[col] = add_idx;
96+
}
97+
inline void free_bucket(uint8_t bkt_idx) {
98+
sparse_buckets[bkt_idx].row = 0;
99+
sparse_buckets[bkt_idx].bkt = {0, 0};
100+
insert_to_ll_head(num_columns, bkt_idx);
101+
}
102+
inline void insert_to_ll(uint8_t add_idx, SparseBucket &prev) {
103+
sparse_buckets[add_idx].next = prev.next;
104+
prev.next = add_idx;
105+
}
106+
inline void remove_from_ll(SparseBucket& bkt_to_remove, SparseBucket &prev) {
107+
prev.next = bkt_to_remove.next;
108+
}
109+
inline bool merge_sparse_bkt(uint8_t our_idx, SparseBucket& oth, uint8_t prev_idx, size_t col) {
110+
SparseBucket &ours = sparse_buckets[our_idx];
111+
ours.bkt.alpha ^= oth.bkt.alpha;
112+
ours.bkt.gamma ^= oth.bkt.gamma;
113+
if (Bucket_Boruvka::is_empty(ours.bkt)) {
114+
if (prev_idx == uint8_t(-1))
115+
remove_ll_head(col);
116+
else
117+
remove_from_ll(ours, sparse_buckets[prev_idx]);
118+
119+
free_bucket(our_idx);
120+
return true;
121+
}
122+
return false;
123+
}
124+
84125
inline Bucket& deterministic_bucket() {
85126
return buckets[0];
86127
}
@@ -119,6 +160,11 @@ class SparseSketch {
119160
ll_metadata = (uint8_t *) &buckets[calc_metadata_index(num_dense_rows)];
120161
}
121162

163+
// given another SparseSketch column, merge it into ours
164+
void merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t *oth_ll_metadata, size_t col);
165+
166+
void validate();
167+
122168
public:
123169
/**
124170
* The below constructors use vector length as their input. However, in graph sketching our input

src/cc_sketch_alg.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ inline void CCSketchAlg::create_merge_instructions(std::vector<MergeInstr> &merg
466466
}
467467

468468
void CCSketchAlg::boruvka_emulation() {
469-
// auto start = std::chrono::steady_clock::now();
469+
auto start = std::chrono::steady_clock::now();
470470
update_locked = true;
471471

472472
cc_alg_start = std::chrono::steady_clock::now();
@@ -486,27 +486,27 @@ void CCSketchAlg::boruvka_emulation() {
486486
}
487487
size_t round_num = 0;
488488
bool modified = true;
489-
// std::cout << std::endl;
490-
// std::cout << " pre boruvka processing = "
491-
// << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
492-
// << std::endl;
489+
std::cout << std::endl;
490+
std::cout << " pre boruvka processing = "
491+
<< std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
492+
<< std::endl;
493493

494494
while (true) {
495-
// std::cout << " Round: " << round_num << std::endl;
496-
// start = std::chrono::steady_clock::now();
495+
std::cout << " Round: " << round_num << std::endl;
496+
start = std::chrono::steady_clock::now();
497497
modified = perform_boruvka_round(round_num, merge_instr, global_merges);
498-
// std::cout << " perform_boruvka_round = "
499-
// << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
500-
// << std::endl;
498+
std::cout << " perform_boruvka_round = "
499+
<< std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
500+
<< std::endl;
501501

502502
if (!modified) break;
503503

504504
// calculate updated merge instructions for next round
505-
// start = std::chrono::steady_clock::now();
505+
start = std::chrono::steady_clock::now();
506506
create_merge_instructions(merge_instr);
507-
// std::cout << " create_merge_instructions = "
508-
// << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
509-
// << std::endl;
507+
std::cout << " create_merge_instructions = "
508+
<< std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
509+
<< std::endl;
510510
++round_num;
511511
}
512512
last_query_rounds = round_num;
@@ -534,11 +534,11 @@ ConnectedComponents CCSketchAlg::connected_components() {
534534
bool except = false;
535535
std::exception_ptr err;
536536
try {
537-
// auto start = std::chrono::steady_clock::now();
537+
auto start = std::chrono::steady_clock::now();
538538
boruvka_emulation();
539-
// std::cout << " boruvka's algorithm = "
540-
// << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
541-
// << std::endl;
539+
std::cout << " boruvka's algorithm = "
540+
<< std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
541+
<< std::endl;
542542
} catch (...) {
543543
except = true;
544544
err = std::current_exception();

src/dense_sketch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ ExhaustiveSketchSample DenseSketch::exhaustive_sample() {
124124
if (sample_idx >= num_samples) {
125125
throw OutOfSamplesException(seed, num_samples, sample_idx);
126126
}
127-
std::unordered_set<vec_t> ret;
127+
std::vector<vec_t> ret;
128128

129129
size_t idx = sample_idx++;
130130
size_t first_column = idx * cols_per_sample;
@@ -133,14 +133,14 @@ ExhaustiveSketchSample DenseSketch::exhaustive_sample() {
133133
return {ret, ZERO}; // the "first" bucket is deterministic so if zero then no edges to return
134134

135135
unlikely_if (Bucket_Boruvka::is_good(deterministic_bucket(), checksum_seed())) {
136-
ret.insert(deterministic_bucket().alpha);
136+
ret.push_back(deterministic_bucket().alpha);
137137
return {ret, GOOD};
138138
}
139139

140140
for (size_t i = 0; i < cols_per_sample; ++i) {
141141
for (size_t j = 0; j < bkt_per_col; ++j) {
142142
unlikely_if (Bucket_Boruvka::is_good(bucket(i + first_column, j), checksum_seed())) {
143-
ret.insert(bucket(i + first_column, j).alpha);
143+
ret.push_back(bucket(i + first_column, j).alpha);
144144
}
145145
}
146146
}

0 commit comments

Comments
 (0)