diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index c0d76c1393b..00624b5e032 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -8,6 +8,7 @@ include(SouffleTests) souffle_add_binary_test(binary_relation_test src SOUFFLE_HEADERS_ONLY) souffle_add_binary_test(brie_test src SOUFFLE_HEADERS_ONLY) +souffle_add_binary_test(btree_delete_test src SOUFFLE_HEADERS_ONLY) souffle_add_binary_test(btree_multiset_test src SOUFFLE_HEADERS_ONLY) souffle_add_binary_test(btree_set_test src SOUFFLE_HEADERS_ONLY) souffle_add_binary_test(compiled_tuple_test src SOUFFLE_HEADERS_ONLY) diff --git a/src/tests/btree_delete_test.cpp b/src/tests/btree_delete_test.cpp new file mode 100644 index 00000000000..338060f57ab --- /dev/null +++ b/src/tests/btree_delete_test.cpp @@ -0,0 +1,929 @@ +/* + * Souffle - A Datalog Compiler + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved + * Licensed under the Universal Permissive License v 1.0 as shown at: + * - https://opensource.org/licenses/UPL + * - /licenses/SOUFFLE-UPL.txt + */ + +/************************************************************************ +* +* @file btree_delete_test.cpp +* +* A test case testing the B-trees utilization as sets. +* +***********************************************************************/ + +#include "tests/test.h" + +#include "souffle/datastructure/BTreeDelete.h" +#include "souffle/utility/ContainerUtil.h" +#include "souffle/utility/StreamUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _OPENMP +#include +#endif + +namespace std { + +template +struct hash> { + std::size_t operator()(const tuple& t) const { + auto a = std::hash()(get<0>(t)); + auto b = std::hash()(get<1>(t)); + // from http://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + return a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2)); + } +}; + +template +std::ostream& operator<<(std::ostream& out, const tuple& t) { + return out << "[" << get<0>(t) << "," << get<1>(t) << "]"; +} +} // namespace std + +namespace souffle::test { + +TEST(BTreeDelete, Basic) { + const bool DEBUG = false; + + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + EXPECT_EQ(3, test_set::max_keys_per_node); + + // check initial conditions + EXPECT_EQ(0u, t.size()); + EXPECT_FALSE(t.contains(10)); + EXPECT_FALSE(t.contains(12)); + EXPECT_FALSE(t.contains(14)); + EXPECT_EQ(0, t.getDepth()); + EXPECT_EQ(0, t.getNumNodes()); + + if (DEBUG) t.printTree(); + + // add an element + + t.insert(12); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + + EXPECT_EQ(1u, t.size()); + EXPECT_FALSE(t.contains(10)); + EXPECT_TRUE(t.contains(12)); + EXPECT_FALSE(t.contains(14)); + EXPECT_EQ(1, t.getDepth()); + EXPECT_EQ(1, t.getNumNodes()); + + // add a larger element + + t.insert(14); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + EXPECT_EQ(2u, t.size()); + EXPECT_FALSE(t.contains(10)); + EXPECT_TRUE(t.contains(12)); + EXPECT_TRUE(t.contains(14)); + EXPECT_EQ(1, t.getDepth()); + EXPECT_EQ(1, t.getNumNodes()); + + // add a smaller element + + t.insert(10); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + EXPECT_EQ(3u, t.size()); + EXPECT_TRUE(t.contains(10)); + EXPECT_TRUE(t.contains(12)); + EXPECT_TRUE(t.contains(14)); + EXPECT_EQ(1, t.getDepth()); + EXPECT_EQ(1, t.getNumNodes()); + + // cause a split + t.insert(11); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + EXPECT_EQ(4u, t.size()); + EXPECT_TRUE(t.contains(10)); + EXPECT_TRUE(t.contains(11)); + EXPECT_TRUE(t.contains(12)); + EXPECT_TRUE(t.contains(14)); + + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + + EXPECT_EQ(4u, t.size()); + t.insert(12); + EXPECT_EQ(4u, t.size()); + t.insert(12); + EXPECT_EQ(4u, t.size()); + + t.insert(10); + EXPECT_EQ(4u, t.size()); + + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + + t.insert(15); + EXPECT_EQ(5u, t.size()); + EXPECT_EQ(2, t.getDepth()); + EXPECT_EQ(3, t.getNumNodes()); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } + + t.insert(16); + EXPECT_EQ(6u, t.size()); + if (DEBUG) { + t.printTree(); + std::cout << "\n"; + } +} + +TEST(BTreeDelete, Duplicates) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + for (int i = 0; i < 10; i++) { + t.insert(0); + } + + EXPECT_EQ(1, t.size()); + + EXPECT_EQ(0, *t.begin()); + + // t.printTree(); +} + +TEST(BTreeDelete, Incremental) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + int N = 1000; + + for (int i = 0; i < N; i++) { + // std::cout << "\nBefore:\n"; + // t.printTree(); + // std::cout << "\n"; + t.insert(i); + // std::cout << "\nAfter:\n"; + // std::cout << "\n"; + // t.printTree(); + // std::cout << "\n"; + // std::cout << "\n\n"; + + for (int j = 0; j < N; j++) { + EXPECT_EQ(j <= i, t.contains(j)) << "i=" << i << ", j=" << j; + } + } + + t.printStats(); +} + +TEST(BTreeDelete, Decremental) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + int N = 1000; + + for (int i = N; i >= 0; i--) { + // std::cout << "\nBefore:\n"; + // t.printTree(); + // std::cout << "\n"; + t.insert(i); + // std::cout << "\nAfter:\n"; + // t.printTree(); + // std::cout << "\n"; + // std::cout << "\n\n"; + + for (int j = 0; j < N; j++) { + EXPECT_EQ(j >= i, t.contains(j)) << "i=" << i << ", j=" << j; + } + } +} + +TEST(BTreeDelete, Shuffled) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + int N = 10000; + + std::vector data; + for (int i = 0; i < N; i++) { + data.push_back(i); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(data.begin(), data.end(), generator); + + for (int i = 0; i < N; i++) { + t.insert(data[i]); + } + + // t.printTree(); + + for (int i = 0; i < N; i++) { + EXPECT_TRUE(t.contains(i)) << "i=" << i; + } +} + +TEST(BTreeDelete, Copy) { + using test_set = btree_delete_set; + + test_set t; + + int N = 100000; + + std::vector data; + for (int i = 0; i < N; i++) { + data.push_back(i); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(data.begin(), data.end(), generator); + + for (int i = 0; i < N; i++) { + t.insert(data[i]); + } + + EXPECT_EQ((std::size_t)N, t.size()); + + for (int i = 0; i < N; i++) { + EXPECT_TRUE(t.find(i) != t.end()) << "i=" << i; + } + + test_set t2; + EXPECT_EQ((std::size_t)N, t.size()); + EXPECT_EQ(0, t2.size()); + + t2 = t; + + EXPECT_EQ((std::size_t)N, t.size()); + EXPECT_EQ((std::size_t)N, t2.size()); + + for (int i = 0; i < N; i++) { + EXPECT_TRUE(t.find(i) != t.end()) << "i=" << i; + } + for (int i = 0; i < N; i++) { + EXPECT_TRUE(t2.find(i) != t2.end()) << "i=" << i; + } + + EXPECT_FALSE(t.find(N + 1) != t.end()); + EXPECT_FALSE(t2.find(N + 1) != t2.end()); + t2.insert(N + 1); + EXPECT_FALSE(t.find(N + 1) != t.end()); + EXPECT_TRUE(t2.find(N + 1) != t2.end()); + + for (int i = 0; i < N; i++) { + EXPECT_NE(&*t.find(i), &*t2.find(i)) << "i=" << i; + } +} + +TEST(BTreeDelete, EraseEmpty) { + using test_set = btree_delete_set, std::allocator, 2>; + test_set t; + + t.erase(1); + t.erase(2); + t.erase(3); + + EXPECT_TRUE(t.empty()); +} + +TEST(BTreeDelete, EraseBasic) { + using test_set = btree_delete_set, std::allocator, 2>; + test_set t; + + int N = 10; + + for (int i = 0; i <= N; i++) { + t.insert(i); + } + + for (int i = 0; i <= N; i++) { + EXPECT_EQ((std::size_t)(N + 1 - i), t.size()); + t.erase(i); + } + + EXPECT_TRUE(t.empty()); +} + +TEST(BTreeDelete, EraseStress) { + using test_set = btree_delete_set, std::allocator, 2>; + + test_set t; + + int N = 1000; + + std::vector data; + std::set expected_set; + for (int i = 0; i < N; i++) { + data.push_back(i); + expected_set.insert(i); + t.insert(i); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(data.begin(), data.end(), generator); + + for (int i = 0; i < N; i++) { + EXPECT_EQ(expected_set.size(), t.size()); + + expected_set.erase(data[i]); + t.erase(data[i]); + + EXPECT_TRUE(std::equal(expected_set.begin(), expected_set.end(), t.begin())); + } +} + +TEST(BTreeDelete, Merge) { + using test_set = btree_delete_set; + + test_set a; + test_set b; + + a.insert(1); + a.insert(2); + a.insert(3); + a.insert(4); + + b.insert(2); + b.insert(4); + b.insert(6); + + EXPECT_NE(a, b); + + test_set c = a; + test_set d = b; + + EXPECT_NE(c, d); +} + +TEST(BTreeDelete, IteratorEmpty) { + using test_set = btree_delete_set, std::allocator, 16>; + test_set t; + + EXPECT_EQ(t.begin(), t.end()); +} + +TEST(BTreeDelete, IteratorBasic) { + using test_set = btree_delete_set, std::allocator, 16>; + test_set t; + + int N = 10; + + for (int i = 0; i <= N; i++) { + t.insert(i); + } + + // t.printTree(); + + auto it = t.begin(); + auto e = t.end(); + + EXPECT_NE(it, e); + + int last = -1; + for (int i : t) { + EXPECT_EQ(last + 1, i); + last = i; + } + EXPECT_EQ(last, N); +} + +TEST(BTreeDelete, IteratorStress) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + int N = 1000; + + std::vector data; + for (int i = 0; i < N; i++) { + data.push_back(i); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(data.begin(), data.end(), generator); + + int max = -1; + for (int i = 0; i < N; i++) { + EXPECT_EQ((std::size_t)i, t.size()); + + int last = -1; + for (int i : t) { + EXPECT_LT(last, i); + last = i; + } + EXPECT_EQ(last, max); + + t.insert(data[i]); + max = (data[i] > max) ? data[i] : max; + } +} + +TEST(BTreeDelete, BoundaryTest) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + for (int i = 0; i < 10; i++) { + t.insert(i); + } + + // t.printTree(); + + auto a = t.lower_bound(5); + EXPECT_EQ(5, *a); + + auto b = t.upper_bound(5); + EXPECT_EQ(6, *b); + + // add duplicates + + t.insert(5); + t.insert(5); + t.insert(5); + + // t.printTree(); std::cout << "\n"; + + // test again .. + a = t.lower_bound(5); + EXPECT_EQ(5, *a); + + b = t.upper_bound(5); + EXPECT_EQ(6, *b); + + // check the distance + EXPECT_EQ(++a, b); +} + +TEST(BTreeDelete, BoundaryEmpty) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + EXPECT_EQ(t.end(), t.lower_bound(5)); + EXPECT_EQ(t.end(), t.upper_bound(5)); + + t.insert(4); + + EXPECT_EQ(t.lower_bound(3), t.upper_bound(3)); + EXPECT_EQ(t.lower_bound(5), t.upper_bound(5)); + + t.insert(6); + EXPECT_EQ(t.lower_bound(3), t.upper_bound(3)); + EXPECT_EQ(t.lower_bound(5), t.upper_bound(5)); + + t.insert(5); + + EXPECT_EQ(t.lower_bound(3), t.upper_bound(3)); + EXPECT_NE(t.lower_bound(5), t.upper_bound(5)); +} + +TEST(BTreeDelete, Load) { + using test_set = btree_delete_set, std::allocator, 16>; + + for (int N = 0; N < 100; N++) { + // generate some ordered data + std::vector data; + + for (int i = 0; i < N; i++) { + data.push_back(i); + } + + auto t = test_set::load(data.begin(), data.end()); + // t.printTree(); + + EXPECT_EQ(data.size(), t.size()); + EXPECT_TRUE(t.check()); + + int last = -1; + for (int c : t) { + EXPECT_EQ(last + 1, c); + last = c; + } + EXPECT_EQ(last, N - 1); + } +} + +TEST(BTreeDelete, Clear) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + EXPECT_TRUE(t.empty()); + + t.insert(5); + + EXPECT_FALSE(t.empty()); + t.clear(); + EXPECT_TRUE(t.empty()); + + t.clear(); + EXPECT_TRUE(t.empty()); +} + +TEST(BTreeDelete, ChunkSplit) { + using test_set = btree_delete_set, std::allocator, 16>; + + test_set t; + + // TODO: test for empty + + for (int i = 0; i < 100; i++) { + t.insert(i); + } + + // split chunks + auto chunks = t.getChunks(20); + + // EXPECT_EQ(20, chunks.size()); + + for (const auto& cur : chunks) { + for (int i : cur) { + std::cout << i << ", "; + } + std::cout << "\n"; + } + + int last = -1; + for (const auto& chunk : chunks) { + for (auto current : chunk) { + EXPECT_EQ(last + 1, current); + last = current; + } + } +} + +TEST(BTreeDelete, ChunkSplitStress) { + using test_set = btree_delete_set, std::allocator, 16>; + + for (int i = 0; i < 1000; i++) { + // generate random sequence + std::vector data; + for (int j = 0; j < i; j++) { + data.push_back(j); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(data.begin(), data.end(), generator); + + // fill tree + test_set t; + + for (int x : data) { + t.insert(x); + } + + for (int j = 1; j < 100; j++) { + auto chunks = t.getChunks(j); + // EXPECT_LT(chunks.size(), j * 2); + + if (chunks.empty()) continue; + + // check covered range + EXPECT_EQ(0, *chunks.front().begin()); + + int last = -1; + for (const auto& chunk : chunks) { + for (auto current : chunk) { + EXPECT_EQ(last + 1, current); + last = current; + } + } + + EXPECT_EQ(i - 1, last); + } + } +} + +using Entry = std::tuple; + +std::vector getData(unsigned numEntries) { + std::vector res(numEntries); + int k = 0; + for (unsigned i = 0; i < numEntries; i++) { + res[k++] = Entry(i / 100, i % 100); + } + std::random_device rd; + std::mt19937 generator(rd()); + + shuffle(res.begin(), res.end(), generator); + return res; +} + +using time_point = std::chrono::high_resolution_clock::time_point; + +time_point now() { + return std::chrono::high_resolution_clock::now(); +} + +long duration(const time_point& start, const time_point& end) { + return std::chrono::duration_cast(end - start).count(); +} + +template +long time(const std::string& name, const Op& operation) { + std::cout << "\t" << std::setw(30) << std::setiosflags(std::ios::left) << name + << std::resetiosflags(std::ios::left) << " ... " << std::flush; + auto a = now(); + operation(); + auto b = now(); + long time = duration(a, b); + std::cout << " done [" << std::setw(5) << time << "ms]\n"; + return time; +} + +template +struct reserver { + void operator()(C&, unsigned) const { + // default: no action + } +}; + +template +struct reserver> { + void operator()(std::unordered_set& set, unsigned size) const { + set.reserve(size); + } +}; + +#define checkPerformance(set_type, name, in, out) \ + { \ + std::cout << "Testing: " << name << " ..\n"; \ + set_type set; \ + time("filling set", [&]() { \ + reserver()(set, in.size()); \ + for (const auto& cur : in) { \ + set.insert(cur); \ + } \ + }); \ + EXPECT_EQ(in.size(), set.size()); \ + int counter = 0; \ + time("full scan", [&]() { \ + for (auto it = set.begin(); it != set.end(); ++it) { \ + counter++; \ + } \ + }); \ + EXPECT_EQ(in.size(), (std::size_t)counter); \ + bool allPresent = true; \ + time("membership in", [&]() { \ + for (const auto& cur : in) { \ + allPresent = (set.find(cur) != set.end()) && allPresent; \ + } \ + }); \ + EXPECT_TRUE(allPresent); \ + bool allMissing = true; \ + time("membership out", [&]() { \ + for (const auto& cur : out) { \ + allMissing = (set.find(cur) == set.end()) && allMissing; \ + } \ + }); \ + EXPECT_TRUE(allMissing); \ + bool allFound = true; \ + time("lower_boundaries", [&]() { \ + for (const auto& cur : in) { \ + allFound = (set.lower_bound(cur) == set.find(cur)) && allFound; \ + } \ + }); \ + EXPECT_TRUE(allFound); \ + allFound = true; \ + time("upper_boundaries", [&]() { \ + for (const auto& cur : in) { \ + allFound = (set.upper_bound(cur) == (++set.find(cur))) && allFound; \ + } \ + }); \ + EXPECT_TRUE(allFound); \ + allFound = true; \ + time("boundaries on missing elements", [&]() { \ + for (const auto& cur : out) { \ + allFound = (set.lower_bound(cur) == set.upper_bound(cur)) && allFound; \ + } \ + }); \ + EXPECT_TRUE(allFound); \ + set_type a(in.begin(), in.end()); \ + set_type b(out.begin(), out.end()); \ + time("merge two sets", [&]() { a.insert(b.begin(), b.end()); }); \ + std::cout << "\tDone!\n\n"; \ + } + +TEST(Performance, Basic) { + // int N = 1<<22; + int N = 1 << 18; + + // get list of tuples to be inserted + std::cout << "Generating Test-Data ...\n"; + std::vector in; + std::vector out; + time("generating data", [&]() { + auto data = getData(2 * N); + for (std::size_t i = 0; i < data.size(); i += 2) { + in.push_back(data[i]); + out.push_back(data[i + 1]); + } + }); + + using t1 = std::set; + checkPerformance(t1, " -- warm up -- ", in, out); + + using t2 = btree_delete_set, std::allocator, 256, detail::linear_search>; + checkPerformance(t2, "souffle btree_delete_set - 256 - linear", in, out); + + using t3 = btree_delete_set, std::allocator, 256, detail::binary_search>; + checkPerformance(t3, "souffle btree_delete_set - 256 - binary", in, out); +} + +TEST(Performance, Load) { + // int N = 1<<24; + int N = 1 << 20; + + std::vector data; + for (int i = 0; i < N; i++) { + data.push_back(i); + } + + // take time for conventional load + time("conventional load", [&]() { btree_delete_set t(data.begin(), data.end()); }); + + // take time for structured load + time("bulk-load", [&]() { auto t = btree_delete_set::load(data.begin(), data.end()); }); +} + +TEST(BTreeDelete, Parallel) { + // const int N = 600000000; + // const int N = 100000; + // const int N = 10000; + const int N = 1000; + // const int N = 300; + // const int N = 100; + + // get a unordered list of test data + using entry_t = int; + std::vector list; + btree_delete_set filter; + + for (int i = 0; i < N; i++) { + list.push_back(i); + } + + // the number of times duplicates show up in the input set + for (int dup = 1; dup < 4; dup++) { + // now duplicate this list + std::vector full; + for (int i = 0; i < dup; i++) { + for (const auto& cur : list) { + full.push_back(cur); + } + } + + // shuffle data + std::random_device rd; + std::mt19937 generator(rd()); + + std::shuffle(full.begin(), full.end(), generator); + + // now insert all those values into a new set - in parallel + btree_delete_set res; +#ifdef _OPENMP +#pragma omp parallel for // schedule(static,1) +#endif + for (auto it = full.begin(); it < full.end(); ++it) { + res.insert(*it); + } + + EXPECT_TRUE(res.check()); + + // check resulting values + EXPECT_EQ(N, res.size()); + + std::set should(full.begin(), full.end()); + std::set is(res.begin(), res.end()); + + for (const auto& cur : should) { + EXPECT_TRUE(res.contains(cur)) << "Missing: " << cur << "\n"; + } + + for (const auto& cur : res) { + EXPECT_TRUE(should.find(cur) != should.end()) << "Additional: " << cur << "\n" + << "Contained: " << res.contains(cur) << "\n"; + } + + std::vector extra; + for (const auto& cur : is) { + if (should.find(cur) == should.end()) extra.push_back(cur); + } + EXPECT_TRUE(extra.empty()) << "Extra elments: " << extra << "\n"; + + std::vector missing; + for (const auto& cur : should) { + if (is.find(cur) == is.end()) missing.push_back(cur); + } + EXPECT_TRUE(missing.empty()) << "Missing elments: " << missing << "\n"; + // << "All Elements: " << should << "\n"; + + EXPECT_EQ(N, should.size()); + EXPECT_EQ(N, is.size()); + EXPECT_EQ(should, is); + } +} + +#ifdef _OPENMP + +TEST(BTreeDelete, ParallelScaling) { + using test_set = btree_delete_set; + using op_context_type = test_set::operation_hints; + + // const int N = 60000000; // real benchmark + // const int N = 1000000; // to not run to long for unit testing + // const int N = 100000; // to not run to long for unit testing + const int N = 1000; // to not run to long for unit testing + + // create some random data + std::vector data; + for (int i = 0; i < N; i++) { + data.push_back(i); + } + std::vector data2 = data; + std::random_device rd; + std::mt19937 generator(rd()); + + std::shuffle(data.begin(), data.end(), generator); + std::shuffle(data2.begin(), data2.end(), generator); + + for (int i = 1; i <= 8; i++) { + test_set t; + + omp_set_num_threads(i); + + double start = omp_get_wtime(); + +#pragma omp parallel + { + op_context_type ctxt; + +#pragma omp for + for (int i = 0; i < N; i++) { + t.insert(data[i], ctxt); + t.insert(data2[i], ctxt); + } + } + + double end = omp_get_wtime(); + + std::cout << "Number of threads: " << i << "[" << (end - start) << "ms]\n"; + + // t.printTree(); + + EXPECT_EQ(N, t.size()); + int count = 0; + int last = -1; + for (int i : t) { + EXPECT_EQ(last + 1, i); + last = i; + count++; + } + EXPECT_EQ(N - 1, last); + + EXPECT_EQ(N, count); + } +} + +#endif +} // namespace souffle::test