From 673f3e5a5c895c465f38357f0b568589c7101ec7 Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Sun, 5 Oct 2025 14:38:34 +0200
Subject: [PATCH 01/12] Support any number of threads

---
 include/builder/parallel_sort.hpp | 4 +++-
 src/build.cpp                     | 3 ---
 tools/build.cpp                   | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/include/builder/parallel_sort.hpp b/include/builder/parallel_sort.hpp
index 66668e0..a12848a 100644
--- a/include/builder/parallel_sort.hpp
+++ b/include/builder/parallel_sort.hpp
@@ -65,7 +65,6 @@ void parallel_sort(std::vector<T>& data, const uint64_t num_threads, Compare com
         return;
     }
 
-    assert((num_threads & (num_threads - 1)) == 0);
     const uint64_t data_size = data.size();
     const uint64_t chunk_size = (data_size + num_threads - 1) / num_threads;
     const uint64_t sequential_merge_threshold = data_size / uint64_t(std::log2(num_threads));
@@ -114,6 +113,9 @@ void parallel_sort(std::vector<T>& data, const uint64_t num_threads, Compare com
                 auto merged_begin = output_iterator;
                 auto merged_end = merged_begin + output_size;
                 next_ranges.push_back({merged_begin, merged_end});
+            } else {
+                // Odd range out: carry it forward to the next iteration
+                next_ranges.push_back(ranges[i]);
             }
         }
         ranges.swap(next_ranges);
diff --git a/src/build.cpp b/src/build.cpp
index 759326b..15ab344 100644
--- a/src/build.cpp
+++ b/src/build.cpp
@@ -28,9 +28,6 @@ void dictionary<kmer_t>::build(std::string const& filename,
     if (build_config.l >= constants::max_l) {
         throw std::runtime_error("l must be < " + std::to_string(constants::max_l));
     }
-    if ((build_config.num_threads & (build_config.num_threads - 1)) != 0) {
-        throw std::runtime_error("number of threads must be a power of 2");
-    }
 
     m_k = build_config.k;
     m_m = build_config.m;
diff --git a/tools/build.cpp b/tools/build.cpp
index cb38f14..9c9d738 100644
--- a/tools/build.cpp
+++ b/tools/build.cpp
@@ -21,7 +21,7 @@ int build(int argc, char** argv) {
     parser.add("seed",
                "Seed for construction (default is " + std::to_string(constants::seed) + ").", "-s",
                false);
-    parser.add("t", "Number of threads (default is 1). Must be a power of 2.", "-t", false);
+    parser.add("t", "Number of threads (default is 1).", "-t", false);
     parser.add("l",
                "A (integer) constant that controls the space/time trade-off of the dictionary. "
                "A reasonable values lies in [2.." +

From 9a6ae807979a9de848863d5f6cd23be76aef388a Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Sun, 5 Oct 2025 15:32:58 +0200
Subject: [PATCH 02/12] Fix parallel_sort.hpp

---
 include/builder/parallel_sort.hpp | 36 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/builder/parallel_sort.hpp b/include/builder/parallel_sort.hpp
index a12848a..c1f531e 100644
--- a/include/builder/parallel_sort.hpp
+++ b/include/builder/parallel_sort.hpp
@@ -55,17 +55,18 @@ void parallel_merge(Iterator A_begin, Iterator A_end,                   //
     working memory, which must be of same size as data.
 */
 template <typename T, typename Compare>
-void parallel_sort(std::vector<T>& data, const uint64_t num_threads, Compare comp)  //
+void parallel_sort(std::vector<T>& data, uint64_t num_threads, Compare comp)  //
 {
     std::vector<T> temp_data;
     temp_data.resize(data.size());
 
+    const uint64_t data_size = data.size();
+    num_threads = std::min(num_threads, data_size);
     if (num_threads <= 1) {
         std::sort(data.begin(), data.end(), comp);
         return;
     }
 
-    const uint64_t data_size = data.size();
     const uint64_t chunk_size = (data_size + num_threads - 1) / num_threads;
     const uint64_t sequential_merge_threshold = data_size / uint64_t(std::log2(num_threads));
     assert(sequential_merge_threshold > 0);
@@ -93,30 +94,29 @@ void parallel_sort(std::vector<T>& data, const uint64_t num_threads, Compare com
     while (ranges.size() != 1)  //
     {
         next_ranges.clear();
-        for (uint64_t i = 0; i != ranges.size(); i += 2) {
+        for (uint64_t i = 0; i < ranges.size(); i += 2) {
+            auto [begin1, end1] = ranges[i];
+
+            auto input = data.begin();
+            auto output = temp_data.begin();
+            if (swap) std::swap(input, output);
+            uint64_t offset = std::distance(input, begin1);
+            auto output_iterator = output + offset;
+            assert(offset <= data_size);
+            uint64_t output_size = end1 - begin1;
             if (i + 1 < ranges.size()) {
-                auto [begin1, end1] = ranges[i];
                 auto [begin2, end2] = ranges[i + 1];
-                uint64_t output_size = (end1 - begin1) + (end2 - begin2);
-
-                auto input = data.begin();
-                auto output = temp_data.begin();
-                if (swap) std::swap(input, output);
-                uint64_t offset = std::distance(input, begin1);
-                auto output_iterator = output + offset;
-                assert(offset <= data_size);
+                output_size += end2 - begin2;
 
                 parallel_merge(begin1, end1, begin2, end2, output_iterator, comp,
                                sequential_merge_threshold);
                 assert(std::is_sorted(output_iterator, output_iterator + output_size, comp));
-
-                auto merged_begin = output_iterator;
-                auto merged_end = merged_begin + output_size;
-                next_ranges.push_back({merged_begin, merged_end});
             } else {
-                // Odd range out: carry it forward to the next iteration
-                next_ranges.push_back(ranges[i]);
+                std::move(begin1, end1, output_iterator);
             }
+            auto merged_begin = output_iterator;
+            auto merged_end = merged_begin + output_size;
+            next_ranges.push_back({merged_begin, merged_end});
         }
         ranges.swap(next_ranges);
         swap = !swap;

From 1218e83b7470ded9c2bd1d8dd719e70582c67813 Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Sun, 5 Oct 2025 19:03:10 +0200
Subject: [PATCH 03/12] Multithreading fixes

---
 include/builder/build_sparse_index.hpp | 15 ++++++++-------
 include/builder/parallel_sort.hpp      |  9 +++++----
 include/builder/parse_file.hpp         |  4 ++--
 src/build.cpp                          |  6 +++---
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/builder/build_sparse_index.hpp b/include/builder/build_sparse_index.hpp
index 5db9084..fe9ccdb 100644
--- a/include/builder/build_sparse_index.hpp
+++ b/include/builder/build_sparse_index.hpp
@@ -82,9 +82,9 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
         }
     }
     offsets.push_back(num_super_kmers);
-    assert(offsets.size() == num_threads + 1);
 
-    std::vector<buckets_statistics> threads_buckets_stats(num_threads);
+    std::vector<buckets_statistics> threads_buckets_stats;
+    threads_buckets_stats.reserve(num_threads);
 
     auto exe = [&](const uint64_t thread_id) {
         assert(thread_id + 1 < offsets.size());
@@ -115,11 +115,12 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
         }
     };
 
-    std::vector<std::thread> threads(num_threads);
-    for (uint64_t thread_id = 0; thread_id != num_threads; ++thread_id) {
-        threads_buckets_stats[thread_id] =
-            buckets_statistics(num_buckets, num_kmers, num_minimizer_positions);
-        threads[thread_id] = std::thread(exe, thread_id);
+    std::vector<std::thread> threads;
+    threads.reserve(num_threads);
+    assert(offsets.size() <= num_threads + 1);
+    for (uint64_t thread_id = 0; thread_id + 1 < size(offsets); ++thread_id) {
+        threads_buckets_stats.emplace_back(num_buckets, num_kmers, num_minimizer_positions);
+        threads.emplace_back(exe, thread_id);
     }
     for (auto& t : threads) {
         if (t.joinable()) t.join();
diff --git a/include/builder/parallel_sort.hpp b/include/builder/parallel_sort.hpp
index c1f531e..9b6c63c 100644
--- a/include/builder/parallel_sort.hpp
+++ b/include/builder/parallel_sort.hpp
@@ -75,12 +75,13 @@ void parallel_sort(std::vector<T>& data, uint64_t num_threads, Compare comp)  //
     threads.reserve(num_threads);
 
     using iterator_t = typename std::vector<T>::iterator;
-    std::vector<std::pair<iterator_t, iterator_t>> ranges(num_threads);
+    std::vector<std::pair<iterator_t, iterator_t>> ranges;
+    ranges.reserve(num_threads);
 
-    for (uint64_t i = 0; i != num_threads; ++i) {
+    for (uint64_t i = 0; i * chunk_size < data_size; ++i) {
         uint64_t begin = i * chunk_size;
-        uint64_t end = (i == num_threads - 1) ? data_size : begin + chunk_size;
-        ranges[i] = {data.begin() + begin, data.begin() + end};
+        uint64_t end = std::min(data_size, begin + chunk_size);
+        ranges.emplace_back(data.begin() + begin, data.begin() + end);
         threads.emplace_back(
             [&, begin, end]() { std::sort(data.begin() + begin, data.begin() + end, comp); });
     }
diff --git a/include/builder/parse_file.hpp b/include/builder/parse_file.hpp
index 1e71bb0..f269788 100644
--- a/include/builder/parse_file.hpp
+++ b/include/builder/parse_file.hpp
@@ -214,7 +214,7 @@ void parse_file(std::istream& is, parse_data<kmer_t>& data,
     std::vector<std::thread> threads;
     threads.reserve(num_threads);
 
-    for (uint64_t t = 0; t != num_threads; ++t)  //
+    for (uint64_t t = 0; t * num_sequences_per_thread < num_sequences; ++t)  //
     {
         threads.emplace_back([&, t] {
             std::vector<minimizer_tuple> buffer;
@@ -249,7 +249,7 @@ void parse_file(std::istream& is, parse_data<kmer_t>& data,
             minimizer_iterator<kmer_t> minimizer_it(k, m, hasher);
             minimizer_iterator_rc<kmer_t> minimizer_it_rc(k, m, hasher);
 
-            for (uint64_t i = index_begin; i != index_end; ++i)  //
+            for (uint64_t i = index_begin; i < index_end; ++i)  //
             {
                 const uint64_t begin = data.pieces[i];
                 const uint64_t end = data.pieces[i + 1];
diff --git a/src/build.cpp b/src/build.cpp
index 15ab344..117f2d7 100644
--- a/src/build.cpp
+++ b/src/build.cpp
@@ -123,11 +123,11 @@ void dictionary<kmer_t>::build(std::string const& filename,
             input.read(reinterpret_cast<char*>(buffer.data()),
                        buffer.size() * sizeof(minimizer_tuple));
             const uint64_t chunk_size = (n + num_threads - 1) / num_threads;
-            for (uint64_t t = 0; t != num_threads; ++t) {
+            for (uint64_t t = 0; t * chunk_size < n; ++t) {
                 uint64_t begin = t * chunk_size;
-                uint64_t end = (t == num_threads - 1) ? n : begin + chunk_size;
+                uint64_t end = std::min(n, begin + chunk_size);
                 threads.emplace_back([begin, end, &buffer, &f]() {
-                    for (uint64_t i = begin; i != end; ++i) {
+                    for (uint64_t i = begin; i < end; ++i) {
                         buffer[i].minimizer = f.lookup(buffer[i].minimizer);
                     }
                 });

From 4b3781d3a095e384f2357d2458d7aeff28ba52fe Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 6 Oct 2025 00:11:39 +0200
Subject: [PATCH 04/12] Update pthash

---
 external/pthash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/pthash b/external/pthash
index 725846c..c6f11b1 160000
--- a/external/pthash
+++ b/external/pthash
@@ -1 +1 @@
-Subproject commit 725846c18709efb4d2b55170c70ea174d73d62dc
+Subproject commit c6f11b15816f9c6fb0896ed1936495d53fa24a7b

From 6457e5d45e3b2120b898d3a5e31586ffe4211d71 Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 6 Oct 2025 00:17:38 +0200
Subject: [PATCH 05/12] update pthash

---
 external/pthash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/pthash b/external/pthash
index c6f11b1..53b682e 160000
--- a/external/pthash
+++ b/external/pthash
@@ -1 +1 @@
-Subproject commit c6f11b15816f9c6fb0896ed1936495d53fa24a7b
+Subproject commit 53b682e1396936040fb15489b6307a3a94a27093

From 7f4f9a580c739c78a4403394f89c2b6289182e8a Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 6 Oct 2025 17:26:33 +0200
Subject: [PATCH 06/12] Try safe-guarding offsets_builder.set with mutex

---
 include/builder/build_sparse_index.hpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/builder/build_sparse_index.hpp b/include/builder/build_sparse_index.hpp
index fe9ccdb..17beff9 100644
--- a/include/builder/build_sparse_index.hpp
+++ b/include/builder/build_sparse_index.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <mutex>
 #include "include/buckets_statistics.hpp"
 
 namespace sshash {
@@ -86,6 +87,11 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
     std::vector<buckets_statistics> threads_buckets_stats;
     threads_buckets_stats.reserve(num_threads);
 
+    // Mutex to protect concurrent writes to offsets_builder
+    // Multiple threads can write to the same bucket positions, and compact_vector
+    // packs values into 64-bit words, so nearby writes can race on the same word
+    std::mutex offsets_mutex;
+
     auto exe = [&](const uint64_t thread_id) {
         assert(thread_id + 1 < offsets.size());
         const uint64_t offset_begin = offsets[thread_id];
@@ -106,6 +112,9 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
             uint64_t prev_pos_in_seq = constants::invalid_uint64;
             for (auto mt : bucket) {
                 if (mt.pos_in_seq != prev_pos_in_seq) {
+                    // Protect compact_vector writes - multiple threads can write to
+                    // positions that share the same 64-bit word
+                    std::lock_guard<std::mutex> lock(offsets_mutex);
                     offsets_builder.set(begin + pos++, mt.pos_in_seq);
                     prev_pos_in_seq = mt.pos_in_seq;
                 }

From 20d7d81639e37c24df4f1f5d4c5fc91e3f44ef36 Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 6 Oct 2025 18:57:33 +0200
Subject: [PATCH 07/12] fix kmer_t processing for uint_kmer_bits > 64

---
 include/builder/parse_file.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/builder/parse_file.hpp b/include/builder/parse_file.hpp
index f269788..ef13e01 100644
--- a/include/builder/parse_file.hpp
+++ b/include/builder/parse_file.hpp
@@ -173,7 +173,10 @@ void parse_file(std::istream& is, parse_data<kmer_t>& data,
 
     /* Push a final sentinel (dummy) value to avoid bounds' checking in
        kmer_iterator::fill_buff(). */
-    bvb_strings.append_bits(0, kmer_t::uint_kmer_bits);
+    for (int i = 0; i < kmer_t::uint_kmer_bits / 64; i++) {
+        bvb_strings.append_bits(0, 64);
+    }
+    bvb_strings.append_bits(0, kmer_t::uint_kmer_bits % 64);
 
     bvb_strings.build(data.strings);
 

From c35f3040e40dd6805885605883bed43f7cb558ca Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 6 Oct 2025 18:57:33 +0200
Subject: [PATCH 08/12] update pthash

---
 external/pthash | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/pthash b/external/pthash
index 53b682e..521cc01 160000
--- a/external/pthash
+++ b/external/pthash
@@ -1 +1 @@
-Subproject commit 53b682e1396936040fb15489b6307a3a94a27093
+Subproject commit 521cc018932ea73d63b84e9ac066fe94020edfd2

From 06557f6b85fd8715613137f45d3932ef1c584ebd Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Tue, 7 Oct 2025 18:17:23 +0200
Subject: [PATCH 09/12] Single-threaded build_sparse_index

---
 include/builder/build_sparse_index.hpp | 90 ++++++--------------------
 1 file changed, 18 insertions(+), 72 deletions(-)

diff --git a/include/builder/build_sparse_index.hpp b/include/builder/build_sparse_index.hpp
index 17beff9..32f32f6 100644
--- a/include/builder/build_sparse_index.hpp
+++ b/include/builder/build_sparse_index.hpp
@@ -29,17 +29,12 @@ struct bucket_size_iterator {
 
 template <class kmer_t>
 buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>& m_buckets,
-                                      build_configuration const& build_config)  //
+                                      build_configuration const& /*build_config*/)  //
 {
     const uint64_t num_kmers = data.num_kmers;
     const uint64_t num_minimizer_positions = data.minimizers.num_minimizer_positions();
-    const uint64_t num_super_kmers = data.minimizers.num_super_kmers();
     const uint64_t num_buckets = data.minimizers.num_minimizers();
-    const uint64_t num_threads = build_config.num_threads;
 
-    bits::compact_vector::builder offsets_builder;
-    offsets_builder.resize(num_minimizer_positions,
-                           std::ceil(std::log2(data.strings.num_bits() / kmer_t::bits_per_char)));
 
     std::cout << "bits_per_offset = ceil(log2(" << data.strings.num_bits() / kmer_t::bits_per_char
               << ")) = " << std::ceil(std::log2(data.strings.num_bits() / kmer_t::bits_per_char))
@@ -67,74 +62,25 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
     buckets_statistics buckets_stats(num_buckets, num_kmers, num_minimizer_positions);
 
     timer.start();
-    const uint64_t block_size = (num_super_kmers + num_threads - 1) / num_threads;
-    std::vector<uint64_t> offsets;
-    offsets.reserve(num_threads + 1);
-    for (uint64_t offset = -1; offset != num_super_kmers;) {
-        offsets.push_back(offset + 1);
-        offset = std::min<uint64_t>((offset + 1) + block_size, num_super_kmers);
-        minimizer_tuple const* b = begin + offset;
-        uint64_t curr_minimizer = (*b).minimizer;
-        while (b + 1 < end) {  // adjust offset
-            uint64_t next_minimizer = (*(b + 1)).minimizer;
-            if (curr_minimizer != next_minimizer) break;
-            b += 1;
-            offset += 1;
+
+    bits::compact_vector::builder offsets_builder;
+    offsets_builder.resize(num_minimizer_positions,
+                           std::ceil(std::log2(data.strings.num_bits() / kmer_t::bits_per_char)));
+    uint64_t prev_minimizer = constants::invalid_uint64, prev_pos_in_seq = constants::invalid_uint64, bucket_size = 0;
+    for (auto mt : input) {
+        if (mt.minimizer != prev_minimizer) {
+            auto [bucket_begin, bucket_end] = m_buckets.locate_bucket(mt.minimizer);
+            bucket_size = bucket_end - bucket_begin;
+            buckets_stats.add_bucket_size(bucket_size);
+            prev_minimizer = mt.minimizer;
+            prev_pos_in_seq = constants::invalid_uint64;
         }
-    }
-    offsets.push_back(num_super_kmers);
-
-    std::vector<buckets_statistics> threads_buckets_stats;
-    threads_buckets_stats.reserve(num_threads);
-
-    // Mutex to protect concurrent writes to offsets_builder
-    // Multiple threads can write to the same bucket positions, and compact_vector
-    // packs values into 64-bit words, so nearby writes can race on the same word
-    std::mutex offsets_mutex;
-
-    auto exe = [&](const uint64_t thread_id) {
-        assert(thread_id + 1 < offsets.size());
-        const uint64_t offset_begin = offsets[thread_id];
-        const uint64_t offset_end = offsets[thread_id + 1];
-        auto& tbs = threads_buckets_stats[thread_id];
-        for (minimizers_tuples_iterator it(begin + offset_begin, begin + offset_end);  //
-             it.has_next();                                                            //
-             it.next())                                                                //
-        {
-            const uint64_t bucket_id = it.minimizer();
-            const auto [begin, end] = m_buckets.locate_bucket(bucket_id);
-            assert(end > begin);
-            const uint64_t bucket_size = end - begin;
-            assert(bucket_size == it.bucket().size());
-            tbs.add_bucket_size(bucket_size);
-            uint64_t pos = 0;
-            auto bucket = it.bucket();
-            uint64_t prev_pos_in_seq = constants::invalid_uint64;
-            for (auto mt : bucket) {
-                if (mt.pos_in_seq != prev_pos_in_seq) {
-                    // Protect compact_vector writes - multiple threads can write to
-                    // positions that share the same 64-bit word
-                    std::lock_guard<std::mutex> lock(offsets_mutex);
-                    offsets_builder.set(begin + pos++, mt.pos_in_seq);
-                    prev_pos_in_seq = mt.pos_in_seq;
-                }
-                tbs.add_num_kmers_in_super_kmer(bucket_size, mt.num_kmers_in_super_kmer);
-            }
-            assert(pos == bucket_size);
+        buckets_stats.add_num_kmers_in_super_kmer(bucket_size, mt.num_kmers_in_super_kmer);
+        if (mt.pos_in_seq != prev_pos_in_seq) {
+            offsets_builder.push_back(mt.pos_in_seq);
+            prev_pos_in_seq = mt.pos_in_seq;
         }
-    };
-
-    std::vector<std::thread> threads;
-    threads.reserve(num_threads);
-    assert(offsets.size() <= num_threads + 1);
-    for (uint64_t thread_id = 0; thread_id + 1 < size(offsets); ++thread_id) {
-        threads_buckets_stats.emplace_back(num_buckets, num_kmers, num_minimizer_positions);
-        threads.emplace_back(exe, thread_id);
-    }
-    for (auto& t : threads) {
-        if (t.joinable()) t.join();
     }
-    for (auto const& tbs : threads_buckets_stats) buckets_stats += tbs;
 
     input.close();
     timer.stop();
@@ -154,4 +100,4 @@ buckets_statistics build_sparse_index(parse_data<kmer_t>& data, buckets<kmer_t>&
     return buckets_stats;
 }
 
-}  // namespace sshash
\ No newline at end of file
+}  // namespace sshash

From 64d88946017053d466cc4295d06b8fcf8bf5eb66 Mon Sep 17 00:00:00 2001
From: Mikhail Karasikov <karasikov@users.noreply.github.com>
Date: Wed, 14 Jan 2026 15:56:18 +0100
Subject: [PATCH 10/12] make compiling executables optional

---
 CMakeLists.txt | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8bed87..5384ce0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,11 +48,13 @@ else()
   set(CONDA_BUILD FALSE)
 endif()
 
+option(SSHASH_BUILD_EXECUTABLES "Build sshash executables" ON)
 MESSAGE(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 MESSAGE(STATUS "Conda build: ${CONDA_BUILD}")
 MESSAGE(STATUS "Installation prefix: ${CMAKE_INSTALL_PREFIX}")
 MESSAGE(STATUS "Compiling for processor: ${CMAKE_SYSTEM_PROCESSOR}")
 MESSAGE(STATUS "Compiling with flags:${CMAKE_CXX_FLAGS}")
+MESSAGE(STATUS "Build executables: ${SSHASH_BUILD_EXECUTABLES}")
 
 set(Z_LIB_SOURCES
   external/gz/zip_stream.cpp
@@ -85,27 +87,29 @@ add_library(sshash_static STATIC
 
 target_include_directories(sshash_static PUBLIC ${SSHASH_INCLUDE_DIRS})
 
-add_executable(sshash tools/sshash.cpp)
-target_include_directories(sshash PUBLIC ${SSHASH_INCLUDE_DIRS})
-target_link_libraries(sshash
-  z
-)
+if(SSHASH_BUILD_EXECUTABLES)
+  add_executable(sshash tools/sshash.cpp)
+  target_include_directories(sshash PUBLIC ${SSHASH_INCLUDE_DIRS})
+  target_link_libraries(sshash
+    z
+  )
 
-# tests:
+  # tests:
 
-add_executable(test_alphabet test/test_alphabet.cpp)
-target_link_libraries(test_alphabet
-  sshash_static
-)
-
-add_executable(check test/check.cpp)
-target_link_libraries(check
-  sshash_static
-  z
-)
+  add_executable(test_alphabet test/test_alphabet.cpp)
+  target_link_libraries(test_alphabet
+    sshash_static
+  )
 
-if (CONDA_BUILD)
-  install(TARGETS sshash
-    RUNTIME DESTINATION bin
+  add_executable(check test/check.cpp)
+  target_link_libraries(check
+    sshash_static
+    z
   )
+
+  if (CONDA_BUILD)
+    install(TARGETS sshash
+      RUNTIME DESTINATION bin
+    )
+  endif()
 endif()

From 88153605d5cbfffa90eb5a12c5ad2ddd896375fe Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 16 Feb 2026 14:02:35 +0100
Subject: [PATCH 11/12] Wrap cityhash in namespace and add accessor methods for
 metagraph compatibility

Changes for metagraph integration:

1. Wrap cityhash in namespace to avoid typedef pollution
   - cityhash.hpp and cityhash.cpp: Wrapped in 'cityhash' namespace
   - Prevents conflicts with rollinghashcpp uint64 typedef
   - hash_util.hpp: Use cityhash::CityHash128WithSeed instead of static CityMurmur
   - tools/query.cpp: Changed uint64 to uint64_t (standard type)

2. Add public accessor methods to dictionary
   - Added kmer_type typedef for easier template parameter extraction
   - Added strings() accessor returning m_spss.strings bit vector
   - Added strings_offsets() accessor returning m_spss.strings_offsets
   - Moved strings_offsets() next to string_offsets() for better organization

3. Build system changes
   - CMakeLists.txt: Added cityhash.cpp to SSHASH_SOURCES
   - CMakeLists.txt: Link sshash executable against sshash_static (includes cityhash)

4. Update pthash submodule
   - Now tracking ratschlab/pthash master with compute_empirical_entropy fix
   - .gitmodules: Updated URL to point to ratschlab/pthash
---
 .gitmodules                    | 2 +-
 CMakeLists.txt                 | 6 ++++++
 external/cityhash/cityhash.cpp | 6 +++++-
 external/cityhash/cityhash.hpp | 4 ++++
 external/pthash                | 2 +-
 include/dictionary.hpp         | 6 ++++++
 include/hash_util.hpp          | 6 +++---
 tools/query.cpp                | 2 +-
 8 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index a45b147..9f3579f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "external/pthash"]
 	path = external/pthash
-	url = https://github.com/jermp/pthash.git
+	url = https://github.com/ratschlab/pthash.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ed43720..f917ba9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,10 @@ set(Z_LIB_SOURCES
   external/gz/zip_stream.cpp
 )
 
+set(CITYHASH_SOURCES
+  external/cityhash/cityhash.cpp
+)
+
 set(SSHASH_SOURCES
   src/build.cpp
   src/dictionary.cpp
@@ -81,6 +85,7 @@ set(SSHASH_INCLUDE_DIRS
 # Create a static lib
 add_library(sshash_static STATIC
   ${Z_LIB_SOURCES}
+  ${CITYHASH_SOURCES}
   ${SSHASH_SOURCES}
 )
 
@@ -90,6 +95,7 @@ if(SSHASH_BUILD_EXECUTABLES)
   add_executable(sshash tools/sshash.cpp)
   target_include_directories(sshash PUBLIC ${SSHASH_INCLUDE_DIRS})
   target_link_libraries(sshash
+    sshash_static
     z
   )
 
diff --git a/external/cityhash/cityhash.cpp b/external/cityhash/cityhash.cpp
index 50cafc5..7e81ded 100644
--- a/external/cityhash/cityhash.cpp
+++ b/external/cityhash/cityhash.cpp
@@ -34,6 +34,8 @@
 #include <algorithm>
 #include <string.h>  // for memcpy and memset
 
+namespace cityhash {
+
 using namespace std;
 
 static uint64 UNALIGNED_LOAD64(const char* p) {
@@ -442,4 +444,6 @@ uint128 CityHashCrc128(const char* s, size_t len) {
     }
 }
 
-#endif
\ No newline at end of file
+#endif
+
+}  // namespace cityhash
\ No newline at end of file
diff --git a/external/cityhash/cityhash.hpp b/external/cityhash/cityhash.hpp
index 23fb877..801f919 100644
--- a/external/cityhash/cityhash.hpp
+++ b/external/cityhash/cityhash.hpp
@@ -48,6 +48,8 @@
 #include <stdlib.h>  // for size_t.
 #include <utility>
 
+namespace cityhash {
+
 // Microsoft Visual Studio may not have stdint.h.
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
 typedef unsigned char uint8_t;
@@ -112,4 +114,6 @@ void CityHashCrc256(const char* s, size_t len, uint64* result);
 
 #endif  // __SSE4_2__
 
+}  // namespace cityhash
+
 #endif  // CITY_HASH_H_
\ No newline at end of file
diff --git a/external/pthash b/external/pthash
index cdeee6e..ee5ced4 160000
--- a/external/pthash
+++ b/external/pthash
@@ -1 +1 @@
-Subproject commit cdeee6ef1cc97d7cdb022e5d2d0013981335ee3e
+Subproject commit ee5ced44af81b8a5b01762252363b2b9d9287c6e
diff --git a/include/dictionary.hpp b/include/dictionary.hpp
index d0fa3db..a30b8c4 100644
--- a/include/dictionary.hpp
+++ b/include/dictionary.hpp
@@ -68,6 +68,9 @@ struct dictionary  //
     /* Return the string of the kmer whose id is kmer_id. */
     void access(uint64_t kmer_id, char* string_kmer) const;
 
+    /* Accessor for internal bit vector */
+    bits::bit_vector const& strings() const { return m_spss.strings; }
+
     /* Membership queries. */
     bool is_member(char const* string_kmer, bool check_reverse_complement = true) const;
     bool is_member(Kmer uint_kmer, bool check_reverse_complement = true) const;
@@ -104,6 +107,9 @@ struct dictionary  //
         return m_spss.string_offsets(string_id);
     }
 
+    /* Accessor for internal offsets structure */
+    Offsets const& strings_offsets() const { return m_spss.strings_offsets; }
+
     iterator at_string_id(const uint64_t string_id) const {
         assert(string_id < num_strings());
         auto [begin, end] = string_offsets(string_id);
diff --git a/include/hash_util.hpp b/include/hash_util.hpp
index d6b4fbc..dddb0c2 100644
--- a/include/hash_util.hpp
+++ b/include/hash_util.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "external/pthash/include/pthash.hpp"
-#include "external/cityhash/cityhash.cpp"
+#include "external/cityhash/cityhash.hpp"
 #include "constants.hpp"
 
 namespace sshash {
@@ -10,7 +10,7 @@ struct minimizers_city_hasher_128 {
     typedef pthash::hash128 hash_type;
 
     static inline pthash::hash128 hash(uint64_t const minimizer, uint64_t seed) {
-        auto ret = CityMurmur(reinterpret_cast<char const*>(&minimizer),  //
+        auto ret = cityhash::CityHash128WithSeed(reinterpret_cast<char const*>(&minimizer),  //
                               sizeof(minimizer), {seed, ~seed});
         return {ret.first, ret.second};
     }
@@ -60,7 +60,7 @@ struct kmers_city_hasher_128 {
     typedef pthash::hash128 hash_type;
 
     static inline pthash::hash128 hash(Kmer const x, uint64_t seed) {
-        auto ret = CityMurmur(reinterpret_cast<char const*>(&(x.bits)),  //
+        auto ret = cityhash::CityHash128WithSeed(reinterpret_cast<char const*>(&(x.bits)),  //
                               sizeof(x.bits), {seed, ~seed});
         return {ret.first, ret.second};
     }
diff --git a/tools/query.cpp b/tools/query.cpp
index 67eec13..0ae64ff 100644
--- a/tools/query.cpp
+++ b/tools/query.cpp
@@ -41,7 +41,7 @@ int query(int argc, char** argv) {
     query_stats.add("num_invalid_kmers", report.num_invalid_kmers);
     query_stats.add("num_searches", report.num_searches);
     query_stats.add("num_extensions", report.num_extensions);
-    query_stats.add("elapsed_millisec", uint64(t.elapsed()));
+    query_stats.add("elapsed_millisec", uint64_t(t.elapsed()));
 
     std::cout << "==== query report:\n";
     std::cout << "num_kmers = " << report.num_kmers << std::endl;

From 964d891b0d95838ef937e7e8c6e33bcc1496931f Mon Sep 17 00:00:00 2001
From: Oleksandr Kulkov <adamant.pwn@gmail.com>
Date: Mon, 16 Feb 2026 20:14:33 +0100
Subject: [PATCH 12/12] Fix sparse index bit budget calculation and verbose
 flag handling

Fix two critical issues discovered during metagraph integration:

1. Sparse index encoding bug:
   - The sparse index encoder uses list_id in control codewords for
     buckets of size 2-64, but was allocating bits based on offset
     size (num_bits_per_offset + 1) instead of list_id range.
   - list_id counts buckets within each size category and can reach
     the total number of minimizers in worst case (all same size).
   - Fixed by tracking max_buckets_per_size during statistics
     collection and using: max(num_bits_per_offset + 1,
     2 + 6 + ceil(log2(max_buckets_per_size + 1)))
   - This provides tight bound with minimal overhead (~2-3% worst case).

2. Verbose flag handling:
   - build_stats.print() was called unconditionally in
     dictionary_builder.hpp, ignoring build_config.verbose flag.
   - Fixed by making print() conditional: if (build_config.verbose)
   - This respects the library's own configuration mechanism.

Implementation:
- Added m_max_buckets_per_size member to buckets_statistics class
- Modified add_bucket_size() to track maximum incrementally
- Zero time overhead (uses std::max with existing data)
- Clean encapsulation within statistics class
---
 include/buckets_statistics.hpp                | 18 +++--
 .../builder/build_sparse_and_skew_index.cpp   | 65 ++++++++++++-------
 include/builder/dictionary_builder.hpp        |  2 +-
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/include/buckets_statistics.hpp b/include/buckets_statistics.hpp
index 05e7a3e..da77124 100644
--- a/include/buckets_statistics.hpp
+++ b/include/buckets_statistics.hpp
@@ -11,14 +11,16 @@ struct buckets_statistics {
         , m_num_kmers(0)
         , m_num_minimizer_positions(0)
         , m_max_num_kmers_in_super_kmer(0)
-        , m_max_bucket_size(0) {}
+        , m_max_bucket_size(0)
+        , m_max_buckets_per_size(0) {}
 
     buckets_statistics(uint64_t num_buckets, uint64_t num_kmers, uint64_t num_minimizer_positions)
         : m_num_buckets(num_buckets)
         , m_num_kmers(num_kmers)
         , m_num_minimizer_positions(num_minimizer_positions)
         , m_max_num_kmers_in_super_kmer(0)
-        , m_max_bucket_size(0)  //
+        , m_max_bucket_size(0)
+        , m_max_buckets_per_size(0)  //
     {
         m_bucket_sizes.resize(MAX_BUCKET_SIZE + 1, 0);
         m_total_kmers.resize(MAX_BUCKET_SIZE + 1, 0);
@@ -26,8 +28,11 @@ struct buckets_statistics {
     }
 
     void add_bucket_size(uint64_t bucket_size) {
-        if (bucket_size < MAX_BUCKET_SIZE + 1) { m_bucket_sizes[bucket_size] += 1; }
-        if (bucket_size > m_max_bucket_size) { m_max_bucket_size = bucket_size; }
+        if (bucket_size < MAX_BUCKET_SIZE + 1) {
+            m_bucket_sizes[bucket_size] += 1;
+            m_max_buckets_per_size = std::max(m_max_buckets_per_size, m_bucket_sizes[bucket_size]);
+        }
+        m_max_bucket_size = std::max(m_max_bucket_size, bucket_size);
     }
 
     void add_num_kmers_in_super_kmer(uint64_t bucket_size,
@@ -49,6 +54,7 @@ struct buckets_statistics {
     uint64_t num_minimizer_positions() const { return m_num_minimizer_positions; }
     uint64_t max_num_kmers_in_super_kmer() const { return m_max_num_kmers_in_super_kmer; }
     uint64_t max_bucket_size() const { return m_max_bucket_size; }
+    uint64_t max_buckets_per_size() const { return m_max_buckets_per_size; }
 
     void print_full() const {
         std::cout << "=== bucket statistics (full) === \n";
@@ -138,6 +144,9 @@ struct buckets_statistics {
         if (rhs.max_bucket_size() > m_max_bucket_size) {
             m_max_bucket_size = rhs.max_bucket_size();
         }
+        if (rhs.max_buckets_per_size() > m_max_buckets_per_size) {
+            m_max_buckets_per_size = rhs.max_buckets_per_size();
+        }
 
         assert(m_bucket_sizes.size() == rhs.m_bucket_sizes.size());
         for (uint64_t i = 0; i != m_bucket_sizes.size(); ++i) {
@@ -160,6 +169,7 @@ struct buckets_statistics {
 
     uint64_t m_max_num_kmers_in_super_kmer;
     uint64_t m_max_bucket_size;
+    uint64_t m_max_buckets_per_size;
 
     std::vector<uint64_t> m_bucket_sizes;
     std::vector<uint64_t> m_total_kmers;
diff --git a/include/builder/build_sparse_and_skew_index.cpp b/include/builder/build_sparse_and_skew_index.cpp
index 9e9f1ce..26ee343 100644
--- a/include/builder/build_sparse_and_skew_index.cpp
+++ b/include/builder/build_sparse_and_skew_index.cpp
@@ -15,13 +15,6 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
     const uint64_t min_size = 1ULL << constants::min_l;
     const uint64_t num_bits_per_offset = strings_offsets_builder.num_bits_per_offset();
 
-    if (build_config.verbose) {
-        std::cout << "num_bits_per_offset = " << num_bits_per_offset << std::endl;
-    }
-
-    bits::compact_vector::builder control_codewords_builder;
-    control_codewords_builder.resize(num_minimizers, num_bits_per_offset + 1);
-
     mm::file_source<minimizer_tuple> input(minimizers.get_minimizers_filename(),
                                            mm::advice::sequential);
 
@@ -33,11 +26,10 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
     uint64_t num_minimizer_positions_of_buckets_larger_than_1 = 0;
     uint64_t num_minimizer_positions_of_buckets_in_skew_index = 0;
 
+    // First pass: collect bucket statistics to compute tighter bound
     for (minimizers_tuples_iterator it(input.data(), input.data() + input.size());  //
          it.has_next(); it.next())                                                  //
     {
-        const uint64_t bucket_id = it.minimizer();
-        assert(bucket_id < num_minimizers);
         auto bucket = it.bucket();
         const uint64_t bucket_size = bucket.size();
         buckets_stats.add_bucket_size(bucket_size);
@@ -53,24 +45,31 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
             num_super_kmers_in_buckets_larger_than_1 += bucket.num_super_kmers();
         }
 
-        uint64_t prev_pos_in_seq = constants::invalid_uint64;
         for (auto mt : bucket) {
-            if (bucket_size == 1 and mt.pos_in_seq != prev_pos_in_seq) {
-                /*
-                    For minimizers occurring once, store a (log(N)+1)-bit
-                    code, as follows: |offset|0|, i.e., the LSB is 0.
-                */
-                uint64_t code = mt.pos_in_seq << 1;  // first LS bit encodes status code: 0
-                assert(code < (uint64_t(1) << (num_bits_per_offset + 1)));
-                control_codewords_builder.set(bucket_id, code);
-                prev_pos_in_seq = mt.pos_in_seq;
-            }
             buckets_stats.add_num_kmers_in_super_kmer(bucket_size, mt.num_kmers_in_super_kmer);
         }
     }
 
     assert(buckets_stats.num_buckets() == num_minimizers);
 
+    // Calculate bits needed for control codewords encoding:
+    // Encoding format: ((list_id << min_l) | (bucket_size - 2)) << 2 | status_code
+    // We need: 2 bits (status) + min_l bits (bucket_size) + bits for list_id
+    // list_id is bounded by the maximum number of buckets sharing the same size
+    const uint64_t bits_for_list_id = std::ceil(std::log2(buckets_stats.max_buckets_per_size() + 1));
+    const uint64_t num_bits_for_control = std::max(num_bits_per_offset + 1,
+                                                     2 + constants::min_l + bits_for_list_id);
+
+    if (build_config.verbose) {
+        std::cout << "num_bits_per_offset = " << num_bits_per_offset << std::endl;
+        std::cout << "max_list_id = " << buckets_stats.max_buckets_per_size() << std::endl;
+        std::cout << "bits_for_list_id = " << bits_for_list_id << std::endl;
+        std::cout << "num_bits_for_control = " << num_bits_for_control << std::endl;
+    }
+
+    bits::compact_vector::builder control_codewords_builder;
+    control_codewords_builder.resize(num_minimizers, num_bits_for_control);
+
     strings_offsets_builder.build(d.m_spss.strings_offsets);
     strings_builder.build(d.m_spss.strings);
 
@@ -100,11 +99,31 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
     std::vector<minimizer_tuple> tuples;  // backed memory
     tuples.reserve(num_super_kmers_in_buckets_larger_than_1);
 
+    // Second pass: collect buckets > 1 for sorting AND handle size-1 buckets
     for (minimizers_tuples_iterator it(input.data(), input.data() + input.size());  //
          it.has_next(); it.next())                                                  //
     {
+        const uint64_t bucket_id = it.minimizer();
         auto bucket = it.bucket();
-        if (bucket.size() > 1) {
+        const uint64_t bucket_size = bucket.size();
+        
+        if (bucket_size == 1) {
+            // Handle size-1 buckets: encode directly into control codewords
+            uint64_t prev_pos_in_seq = constants::invalid_uint64;
+            for (auto mt : bucket) {
+                if (mt.pos_in_seq != prev_pos_in_seq) {
+                    /*
+                        For minimizers occurring once, store a (log(N)+1)-bit
+                        code, as follows: |offset|0|, i.e., the LSB is 0.
+                    */
+                    uint64_t code = mt.pos_in_seq << 1;  // first LS bit encodes status code: 0
+                    assert(code < (uint64_t(1) << num_bits_for_control));
+                    control_codewords_builder.set(bucket_id, code);
+                    prev_pos_in_seq = mt.pos_in_seq;
+                }
+            }
+        } else {
+            // Collect buckets > 1 for later processing
             minimizer_tuple const* begin = tuples.data() + tuples.size();
             std::copy(bucket.begin_ptr(), bucket.end_ptr(), std::back_inserter(tuples));
             minimizer_tuple const* end = tuples.data() + tuples.size();
@@ -187,7 +206,7 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
                     if (prev_pos_in_seq == constants::invalid_uint64) {  // only once
                         uint64_t p = (list_id << constants::min_l) | (curr_bucket_size - 2);
                         uint64_t code = (p << 2) | 1;  // first two LS bits encode status code: 01
-                        assert(code < (uint64_t(1) << (num_bits_per_offset + 1)));
+                        assert(code < (uint64_t(1) << num_bits_for_control));
                         control_codewords_builder.set(mt.minimizer, code);
                     }
                     if (mt.pos_in_seq != prev_pos_in_seq) {
@@ -204,7 +223,7 @@ void dictionary_builder<Kmer, Offsets>::build_sparse_and_skew_index(
                         assert(partition_id < 8);
                         uint64_t p = (heavy_load_buckets_size << 3) | partition_id;
                         uint64_t code = (p << 2) | 3;  // first two LS bits encode status code: 11
-                        assert(code < (uint64_t(1) << (num_bits_per_offset + 1)));
+                        assert(code < (uint64_t(1) << num_bits_for_control));
                         control_codewords_builder.set(mt.minimizer, code);
                     }
                     if (mt.pos_in_seq != prev_pos_in_seq) {
diff --git a/include/builder/dictionary_builder.hpp b/include/builder/dictionary_builder.hpp
index 65703c0..efc2230 100644
--- a/include/builder/dictionary_builder.hpp
+++ b/include/builder/dictionary_builder.hpp
@@ -75,7 +75,7 @@ struct dictionary_builder  //
         build_stats.add("index_size_in_bytes", (d.num_bits() + 7) / 8);
         build_stats.add("num_kmers", d.num_kmers());
 
-        build_stats.print();
+        if (build_config.verbose) build_stats.print();
     }
 
     build_configuration build_config;