From d45a532f06dad71482037d80632cde53588b7026 Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Mon, 16 Mar 2026 19:03:57 +0100 Subject: [PATCH 1/9] more logs --- .../l4/k31/canon-bench.json | 6 + .../l4/k31/canon-bench.log | 30 + .../l4/k31/canon-build.json | 2 + .../l4/k31/canon-build.log | 301 ++ .../l4/k31/canon-build.time.log | 46 + .../l4/k31/regular-bench.json | 6 + .../l4/k31/regular-bench.log | 30 + .../l4/k31/regular-build.json | 2 + .../l4/k31/regular-build.log | 286 ++ .../l4/k31/regular-build.time.log | 46 + .../l4/k63/canon-bench.json | 6 + .../l4/k63/canon-bench.log | 30 + .../l4/k63/canon-build.json | 2 + .../l4/k63/canon-build.log | 272 ++ .../l4/k63/canon-build.time.log | 46 + .../l4/k63/regular-bench.json | 6 + .../l4/k63/regular-bench.log | 30 + .../l4/k63/regular-build.json | 2 + .../l4/k63/regular-build.log | 272 ++ .../l4/k63/regular-build.time.log | 46 + .../l5/k31/canon-bench.json | 6 + .../l5/k31/canon-bench.log | 30 + .../l5/k31/canon-build.json | 2 + .../l5/k31/canon-build.log | 301 ++ .../l5/k31/canon-build.time.log | 46 + .../l5/k31/regular-bench.json | 6 + .../l5/k31/regular-bench.log | 30 + .../l5/k31/regular-build.json | 2 + .../l5/k31/regular-build.log | 286 ++ .../l5/k31/regular-build.time.log | 46 + .../l5/k63/canon-bench.json | 6 + .../l5/k63/canon-bench.log | 30 + .../l5/k63/canon-build.json | 2 + .../l5/k63/canon-build.log | 272 ++ .../l5/k63/canon-build.time.log | 46 + .../l5/k63/regular-bench.json | 6 + .../l5/k63/regular-bench.log | 30 + .../l5/k63/regular-build.json | 2 + .../l5/k63/regular-build.log | 272 ++ .../l5/k63/regular-build.time.log | 46 + .../l6/k31/canon-bench.json | 6 + .../l6/k31/canon-bench.log | 30 + .../l6/k31/canon-build.json | 2 + .../l6/k31/canon-build.log | 301 ++ .../l6/k31/canon-build.time.log | 46 + .../l6/k31/regular-bench.json | 6 + .../l6/k31/regular-bench.log | 30 + .../l6/k31/regular-build.json | 2 + .../l6/k31/regular-build.log | 286 ++ .../l6/k31/regular-build.time.log | 46 + .../l6/k63/canon-bench.json | 6 + .../l6/k63/canon-bench.log | 30 + .../l6/k63/canon-build.json | 2 + .../l6/k63/canon-build.log | 272 ++ .../l6/k63/canon-build.time.log | 46 + .../l6/k63/regular-bench.json | 6 + .../l6/k63/regular-bench.log | 30 + .../l6/k63/regular-build.json | 2 + .../l6/k63/regular-build.log | 272 ++ .../l6/k63/regular-build.time.log | 46 + .../l7/k31/canon-bench.json | 6 + .../l7/k31/canon-bench.log | 30 + .../l7/k31/canon-build.json | 2 + .../l7/k31/canon-build.log | 301 ++ .../l7/k31/canon-build.time.log | 46 + .../l7/k31/regular-bench.json | 6 + .../l7/k31/regular-bench.log | 30 + .../l7/k31/regular-build.json | 2 + .../l7/k31/regular-build.log | 286 ++ .../l7/k31/regular-build.time.log | 46 + .../l7/k63/canon-bench.json | 6 + .../l7/k63/canon-bench.log | 30 + .../l7/k63/canon-build.json | 2 + .../l7/k63/canon-build.log | 272 ++ .../l7/k63/canon-build.time.log | 46 + .../l7/k63/regular-bench.json | 6 + .../l7/k63/regular-bench.log | 30 + .../l7/k63/regular-build.json | 2 + .../l7/k63/regular-build.log | 272 ++ .../l7/k63/regular-build.time.log | 46 + .../l8/k31/canon-bench.json | 6 + .../l8/k31/canon-bench.log | 30 + .../l8/k31/canon-build.json | 2 + .../l8/k31/canon-build.log | 291 ++ .../l8/k31/canon-build.time.log | 46 + .../l8/k31/regular-bench.json | 6 + .../l8/k31/regular-bench.log | 30 + .../l8/k31/regular-build.json | 2 + .../l8/k31/regular-build.log | 281 ++ .../l8/k31/regular-build.time.log | 46 + .../l8/k63/canon-bench.json | 6 + .../l8/k63/canon-bench.log | 30 + .../l8/k63/canon-build.json | 2 + .../l8/k63/canon-build.log | 272 ++ .../l8/k63/canon-build.time.log | 46 + .../l8/k63/regular-bench.json | 6 + .../l8/k63/regular-bench.log | 30 + .../l8/k63/regular-build.json | 2 + .../l8/k63/regular-build.log | 272 ++ .../l8/k63/regular-build.time.log | 46 + .../m17/k31/canon-bench.json | 6 + .../m17/k31/canon-bench.log | 30 + .../m17/k31/canon-build.json | 2 + .../m17/k31/canon-build.log | 285 ++ .../m17/k31/canon-build.time.log | 46 + .../m17/k31/regular-bench.json | 6 + .../m17/k31/regular-bench.log | 30 + .../m17/k31/regular-build.json | 2 + .../m17/k31/regular-build.log | 283 ++ .../m17/k31/regular-build.time.log | 46 + .../m19/k31/canon-bench.json | 6 + .../m19/k31/canon-bench.log | 30 + .../m19/k31/canon-build.json | 2 + .../m19/k31/canon-build.log | 287 ++ .../m19/k31/canon-build.time.log | 46 + .../m19/k31/regular-bench.json | 6 + .../m19/k31/regular-bench.log | 30 + .../m19/k31/regular-build.json | 2 + .../m19/k31/regular-build.log | 284 ++ .../m19/k31/regular-build.time.log | 46 + .../m21/k31/canon-bench.json | 6 + .../m21/k31/canon-bench.log | 30 + .../m21/k31/canon-build.json | 2 + .../m21/k31/canon-build.log | 301 ++ .../m21/k31/canon-build.time.log | 46 + .../m21/k31/regular-bench.json | 6 + .../m21/k31/regular-bench.log | 30 + .../m21/k31/regular-build.json | 2 + .../m21/k31/regular-build.log | 286 ++ .../m21/k31/regular-build.time.log | 46 + .../m21/k63/canon-bench.json | 3 + .../m21/k63/canon-bench.log | 15 + .../m21/k63/canon-build.json | 1 + .../m21/k63/canon-build.log | 129 + .../m21/k63/canon-build.time.log | 23 + .../m21/k63/regular-bench.json | 3 + .../m21/k63/regular-bench.log | 15 + .../m21/k63/regular-build.json | 1 + .../m21/k63/regular-build.log | 129 + .../m21/k63/regular-build.time.log | 23 + .../m23/k31/canon-bench.json | 6 + .../m23/k31/canon-bench.log | 30 + .../m23/k31/canon-build.json | 2 + .../m23/k31/canon-build.log | 312 ++ .../m23/k31/canon-build.time.log | 46 + .../m23/k31/regular-bench.json | 6 + .../m23/k31/regular-bench.log | 30 + .../m23/k31/regular-build.json | 2 + .../m23/k31/regular-build.log | 292 ++ .../m23/k31/regular-build.time.log | 46 + .../m23/k63/canon-bench.json | 6 + .../m23/k63/canon-bench.log | 30 + .../m23/k63/canon-build.json | 2 + .../m23/k63/canon-build.log | 272 ++ .../m23/k63/canon-build.time.log | 46 + .../m23/k63/regular-bench.json | 6 + .../m23/k63/regular-bench.log | 30 + .../m23/k63/regular-build.json | 2 + .../m23/k63/regular-build.log | 271 ++ .../m23/k63/regular-build.time.log | 46 + .../m25/k31/canon-bench.json | 6 + .../m25/k31/canon-bench.log | 30 + .../m25/k31/canon-build.json | 2 + .../m25/k31/canon-build.log | 300 ++ .../m25/k31/canon-build.time.log | 46 + .../m25/k31/regular-bench.json | 6 + .../m25/k31/regular-bench.log | 30 + .../m25/k31/regular-build.json | 2 + .../m25/k31/regular-build.log | 292 ++ .../m25/k31/regular-build.time.log | 46 + .../m25/k63/canon-bench.json | 6 + .../m25/k63/canon-bench.log | 30 + .../m25/k63/canon-build.json | 2 + .../m25/k63/canon-build.log | 272 ++ .../m25/k63/canon-build.time.log | 46 + .../m25/k63/regular-bench.json | 6 + .../m25/k63/regular-bench.log | 30 + .../m25/k63/regular-build.json | 2 + .../m25/k63/regular-build.log | 271 ++ .../m25/k63/regular-build.time.log | 46 + .../m27/k63/canon-bench.json | 6 + .../m27/k63/canon-bench.log | 30 + .../m27/k63/canon-build.json | 2 + .../m27/k63/canon-build.log | 272 ++ .../m27/k63/canon-build.time.log | 46 + .../m27/k63/regular-bench.json | 6 + .../m27/k63/regular-bench.log | 30 + .../m27/k63/regular-build.json | 2 + .../m27/k63/regular-build.log | 271 ++ .../m27/k63/regular-build.time.log | 46 + .../m29/k63/canon-bench.json | 6 + .../m29/k63/canon-bench.log | 30 + .../m29/k63/canon-build.json | 2 + .../m29/k63/canon-build.log | 273 ++ .../m29/k63/canon-build.time.log | 46 + .../m29/k63/regular-bench.json | 6 + .../m29/k63/regular-bench.log | 30 + .../m29/k63/regular-build.json | 2 + .../m29/k63/regular-build.log | 271 ++ .../m29/k63/regular-build.time.log | 46 + .../m31/k63/canon-bench.json | 3 + .../m31/k63/canon-bench.log | 15 + .../m31/k63/canon-build.json | 1 + .../m31/k63/canon-build.log | 143 + .../m31/k63/canon-build.time.log | 23 + .../m31/k63/regular-bench.json | 3 + .../m31/k63/regular-bench.log | 15 + .../m31/k63/regular-build.json | 1 + .../m31/k63/regular-build.log | 143 + .../m31/k63/regular-build.time.log | 23 + benchmarks/rss_anon.k31.log | 2527 +++++++++++++++++ benchmarks/rss_anon.k63.log | 2331 +++++++++++++++ script/rss_anon.py | 63 + script/sweep-m.py | 159 ++ script/sweep-min-l.py | 192 ++ 215 files changed, 19911 insertions(+) create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.json create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.log create mode 100644 benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.time.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.json create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.log create mode 100644 benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.time.log create mode 100644 benchmarks/rss_anon.k31.log create mode 100644 benchmarks/rss_anon.k63.log create mode 100644 script/rss_anon.py create mode 100644 script/sweep-m.py create mode 100644 script/sweep-min-l.py diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.json new file mode 100644 index 0000000..311e192 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "544.248915", "negative lookup (avg_nanosec_per_kmer)": "420.775146", "access (avg_nanosec_per_kmer)": "350.000089", "iterator (avg_nanosec_per_kmer)": "2.497983"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "550.859348", "negative lookup (avg_nanosec_per_kmer)": "418.309105", "access (avg_nanosec_per_kmer)": "353.610897", "iterator (avg_nanosec_per_kmer)": "2.467604"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "551.242788", "negative lookup (avg_nanosec_per_kmer)": "420.143889", "access (avg_nanosec_per_kmer)": "348.932060", "iterator (avg_nanosec_per_kmer)": "2.576926"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "590.035675", "negative lookup (avg_nanosec_per_kmer)": "402.789329", "access (avg_nanosec_per_kmer)": "361.099896", "iterator (avg_nanosec_per_kmer)": "2.418812"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "596.526311", "negative lookup (avg_nanosec_per_kmer)": "399.537171", "access (avg_nanosec_per_kmer)": "358.647995", "iterator (avg_nanosec_per_kmer)": "2.393446"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "591.752320", "negative lookup (avg_nanosec_per_kmer)": "400.940378", "access (avg_nanosec_per_kmer)": "359.092055", "iterator (avg_nanosec_per_kmer)": "2.437187"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.log new file mode 100644 index 0000000..c55b5fb --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 544.249 +negative lookup (avg_nanosec_per_kmer) 420.775 +access (avg_nanosec_per_kmer) = 350 +iterator (avg_nanosec_per_kmer) = 2.49798 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 550.859 +negative lookup (avg_nanosec_per_kmer) 418.309 +access (avg_nanosec_per_kmer) = 353.611 +iterator (avg_nanosec_per_kmer) = 2.4676 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 551.243 +negative lookup (avg_nanosec_per_kmer) 420.144 +access (avg_nanosec_per_kmer) = 348.932 +iterator (avg_nanosec_per_kmer) = 2.57693 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 590.036 +negative lookup (avg_nanosec_per_kmer) 402.789 +access (avg_nanosec_per_kmer) = 361.1 +iterator (avg_nanosec_per_kmer) = 2.41881 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 596.526 +negative lookup (avg_nanosec_per_kmer) 399.537 +access (avg_nanosec_per_kmer) = 358.648 +iterator (avg_nanosec_per_kmer) = 2.39345 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 591.752 +negative lookup (avg_nanosec_per_kmer) 400.94 +access (avg_nanosec_per_kmer) = 359.092 +iterator (avg_nanosec_per_kmer) = 2.43719 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.json new file mode 100644 index 0000000..65316b5 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7236066", "step 2 (compute minimizer tuples)": "5084102", "step 3 (merging minimizer tuples)": "20234739", "step 4 (build mphf)": "22814429", "step 5 (replacing minimizer values with MPHF hashes)": "18813199", "step 6 (merging minimizers tuples)": "70856698", "step 7.1 (build sparse index)": "6611169", "step 7.2 (build skew index)": "13822636", "step 7 (build sparse and skew index)": "21688206", "total_build_time_in_microsec": "166727439", "index_size_in_bytes": "3181693112", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4568469", "step 2 (compute minimizer tuples)": "2055860", "step 3 (merging minimizer tuples)": "10220969", "step 4 (build mphf)": "7407015", "step 5 (replacing minimizer values with MPHF hashes)": "6738639", "step 6 (merging minimizers tuples)": "29782597", "step 7.1 (build sparse index)": "4082947", "step 7.2 (build skew index)": "9266938", "step 7 (build sparse and skew index)": "13855607", "total_build_time_in_microsec": "74629156", "index_size_in_bytes": "1331419748", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.log new file mode 100644 index 0000000..d49e321 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.log @@ -0,0 +1,301 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash --canonical +2026-03-12 16:39:54: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23607 [sec] (2.88787 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 5.0841 [sec] (2.02903 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.2347 [sec] (8.07555 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 22.8144 [sec] (9.10509 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.8132 [sec] (7.50822 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329994834943844.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 70.8567 [sec] (28.2784 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 13916247/462224926 (3.01071%) +num_buckets_in_skew_index 410816/462224926 (0.0888779%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 43136923/511201278 (8.43834%) +num_minimizer_positions_of_buckets_in_skew_index 20166492/511201278 (3.94492%) +=== step 7.1 (build sparse index): 6.61117 [sec] (2.63847 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 25473200 + partition = 1: num kmers in buckets of size > 32 and <= 64: 19782579 + partition = 2: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 3: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 4: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 7: num kmers in buckets of size > 2048 and <= 22085: 2829179 +num kmers in skew index = 88834374 (3.54532%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 25473200 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 25473200 kmers; bits/key = 2.53676 + built positions[0] for 25473200 kmers; bits/key = 5.00001 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 19782579 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19782579 kmers; bits/key = 2.53997 + built positions[1] for 19782579 kmers; bits/key = 6.00002 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 14957205 kmers; bits/key = 2.56583 + built positions[2] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[3] for 10906495 kmers; bits/key = 2.61744 + built positions[3] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 7473094 kmers; bits/key = 2.65359 + built positions[4] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 4774535 kmers; bits/key = 2.75085 + built positions[5] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[6] for 2638087 kmers; bits/key = 2.55989 + built positions[6] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 2829179 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2829179)... + built mphs[7] for 2829179 kmers; bits/key = 2.55984 + built positions[7] for 2829179 kmers; bits/key = 15.0001 +=== step 7.2 (build skew index): 13.8226 [sec] (5.51652 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 21.6882 [sec] (8.65562 [ns/kmer]) +=== total time: 166.727 [sec] (66.5398 [ns/kmer]) +total index size: 3181693112 [B] -- 3181.69 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.1508% + strings_offsets: 0.153147 [bits/kmer] -- 1.50759% + control_codewords: 6.08754 [bits/kmer] -- 59.9265% + mid_load_buckets: 0.550901 [bits/kmer] -- 5.42314% + begin_buckets_of_size: 2.42649e-07 [bits/kmer] -- 2.38867e-06% + strings: 2.24545 [bits/kmer] -- 22.1045% + skew_index: 0.598064 [bits/kmer] -- 5.88742% + weights: 5.87466e-07 [bits/kmer] -- 5.78308e-06% + -------------- + total: 10.1583 [bits/kmer] +2026-03-12 16:42:41: saving data structure to disk... +2026-03-12 16:42:43: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash --canonical +2026-03-12 16:42:43: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56847 [sec] (5.10837 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.05586 [sec] (2.29882 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.221 [sec] (11.4289 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.40702 [sec] (8.28238 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330163294764243.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.73864 [sec] (7.53501 [ns/kmer]) +=== step 6 (merging minimizers tuples): 29.7826 [sec] (33.3023 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 17751201/143418843 (12.3772%) +num_buckets_in_skew_index 387939/143418843 (0.270494%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 56575932/193511241 (29.2365%) +num_minimizer_positions_of_buckets_in_skew_index 11655606/193511241 (6.02322%) +=== step 7.1 (build sparse index): 4.08295 [sec] (4.56547 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 34586058 + partition = 1: num kmers in buckets of size > 32 and <= 64: 10649499 + partition = 2: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 3: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 4: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 7: num kmers in buckets of size > 2048 and <= 30655: 1323734 +num kmers in skew index = 55155195 (6.16735%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 34586058 + building MPHF with 16 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[0] for 34586058 kmers; bits/key = 2.5358 + built positions[0] for 34586058 kmers; bits/key = 5.00001 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 10649499 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10649499 kmers; bits/key = 2.67056 + built positions[1] for 10649499 kmers; bits/key = 6.00003 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 4240400 kmers; bits/key = 2.84282 + built positions[2] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[3] for 1914946 kmers; bits/key = 2.56017 + built positions[3] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[4] for 1106165 kmers; bits/key = 2.56093 + built positions[4] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[5] for 771672 kmers; bits/key = 2.41893 + built positions[5] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[6] for 562721 kmers; bits/key = 2.56295 + built positions[6] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 1323734 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1323734)... + built mphs[7] for 1323734 kmers; bits/key = 2.56067 + built positions[7] for 1323734 kmers; bits/key = 15.0003 +=== step 7.2 (build skew index): 9.26694 [sec] (10.3621 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 13.8556 [sec] (15.4931 [ns/kmer]) +=== total time: 74.6292 [sec] (83.4489 [ns/kmer]) +total index size: 1331419748 [B] -- 1331.42 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.8059% + strings_offsets: 0.333373 [bits/kmer] -- 2.79907% + control_codewords: 5.13178 [bits/kmer] -- 43.0875% + mid_load_buckets: 1.96113 [bits/kmer] -- 16.466% + begin_buckets_of_size: 6.79854e-07 [bits/kmer] -- 5.70819e-06% + strings: 3.10303 [bits/kmer] -- 26.0537% + skew_index: 0.927538 [bits/kmer] -- 7.7878% + weights: 1.64596e-06 [bits/kmer] -- 1.38198e-05% + -------------- + total: 11.9101 [bits/kmer] +2026-03-12 16:43:57: saving data structure to disk... +2026-03-12 16:43:58: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.time.log new file mode 100644 index 0000000..1f636dc --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.canon.sshash --canonical" + User time (seconds): 395.85 + System time (seconds): 60.38 + Percent of CPU this job got: 270% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:48.45 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18259272 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16626964 + Voluntary context switches: 42624 + Involuntary context switches: 24759 + Swaps: 0 + File system inputs: 480 + File system outputs: 97272352 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.canon.sshash --canonical" + User time (seconds): 120.14 + System time (seconds): 22.10 + Percent of CPU this job got: 188% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:15.36 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7638680 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7269482 + Voluntary context switches: 2894 + Involuntary context switches: 2628 + Swaps: 0 + File system inputs: 256 + File system outputs: 28808120 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.json new file mode 100644 index 0000000..4bd667a --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "738.019789", "negative lookup (avg_nanosec_per_kmer)": "793.194880", "access (avg_nanosec_per_kmer)": "353.161620", "iterator (avg_nanosec_per_kmer)": "2.485179"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.240779", "negative lookup (avg_nanosec_per_kmer)": "798.799254", "access (avg_nanosec_per_kmer)": "351.167180", "iterator (avg_nanosec_per_kmer)": "2.487887"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "743.594349", "negative lookup (avg_nanosec_per_kmer)": "790.752239", "access (avg_nanosec_per_kmer)": "350.291935", "iterator (avg_nanosec_per_kmer)": "2.491264"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "769.631096", "negative lookup (avg_nanosec_per_kmer)": "737.246644", "access (avg_nanosec_per_kmer)": "357.384788", "iterator (avg_nanosec_per_kmer)": "2.429336"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "770.150031", "negative lookup (avg_nanosec_per_kmer)": "735.562334", "access (avg_nanosec_per_kmer)": "357.316455", "iterator (avg_nanosec_per_kmer)": "2.422943"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "773.492057", "negative lookup (avg_nanosec_per_kmer)": "738.653443", "access (avg_nanosec_per_kmer)": "361.692426", "iterator (avg_nanosec_per_kmer)": "2.390429"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.log new file mode 100644 index 0000000..2edef2f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 738.02 +negative lookup (avg_nanosec_per_kmer) 793.195 +access (avg_nanosec_per_kmer) = 353.162 +iterator (avg_nanosec_per_kmer) = 2.48518 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 744.241 +negative lookup (avg_nanosec_per_kmer) 798.799 +access (avg_nanosec_per_kmer) = 351.167 +iterator (avg_nanosec_per_kmer) = 2.48789 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 743.594 +negative lookup (avg_nanosec_per_kmer) 790.752 +access (avg_nanosec_per_kmer) = 350.292 +iterator (avg_nanosec_per_kmer) = 2.49126 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 769.631 +negative lookup (avg_nanosec_per_kmer) 737.247 +access (avg_nanosec_per_kmer) = 357.385 +iterator (avg_nanosec_per_kmer) = 2.42934 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 770.15 +negative lookup (avg_nanosec_per_kmer) 735.562 +access (avg_nanosec_per_kmer) = 357.316 +iterator (avg_nanosec_per_kmer) = 2.42294 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 773.492 +negative lookup (avg_nanosec_per_kmer) 738.653 +access (avg_nanosec_per_kmer) = 361.692 +iterator (avg_nanosec_per_kmer) = 2.39043 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.json new file mode 100644 index 0000000..d4ba361 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7641656", "step 2 (compute minimizer tuples)": "3617230", "step 3 (merging minimizer tuples)": "15582913", "step 4 (build mphf)": "19029895", "step 5 (replacing minimizer values with MPHF hashes)": "15119306", "step 6 (merging minimizers tuples)": "49127857", "step 7.1 (build sparse index)": "4938415", "step 7.2 (build skew index)": "10858442", "step 7 (build sparse and skew index)": "16705466", "total_build_time_in_microsec": "126824323", "index_size_in_bytes": "2754389812", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4764795", "step 2 (compute minimizer tuples)": "1365260", "step 3 (merging minimizer tuples)": "8089134", "step 4 (build mphf)": "6253166", "step 5 (replacing minimizer values with MPHF hashes)": "5419789", "step 6 (merging minimizers tuples)": "16378184", "step 7.1 (build sparse index)": "3122361", "step 7.2 (build skew index)": "6534672", "step 7 (build sparse and skew index)": "10050564", "total_build_time_in_microsec": "52320892", "index_size_in_bytes": "1156305346", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.log new file mode 100644 index 0000000..9e264ed --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.log @@ -0,0 +1,286 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash +2026-03-12 16:35:46: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.64166 [sec] (3.04974 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.61723 [sec] (1.44361 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 15.5829 [sec] (6.21904 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 19.0299 [sec] (7.59471 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329746685340395.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.1193 [sec] (6.03402 [ns/kmer]) +=== step 6 (merging minimizers tuples): 49.1279 [sec] (19.6066 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10558142/386687326 (2.73041%) +num_buckets_in_skew_index 300982/386687326 (0.077836%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 33036809/423023926 (7.80968%) +num_minimizer_positions_of_buckets_in_skew_index 14158915/423023926 (3.34707%) +=== step 7.1 (build sparse index): 4.93841 [sec] (1.97089 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 21296467 + partition = 1: num kmers in buckets of size > 32 and <= 64: 16103300 + partition = 2: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 3: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 4: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 7: num kmers in buckets of size > 2048 and <= 22972: 1591458 +num kmers in skew index = 69463513 (2.77224%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 21296467 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 21296467 kmers; bits/key = 2.67082 + built positions[0] for 21296467 kmers; bits/key = 5.00002 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 16103300 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16103300 kmers; bits/key = 2.65249 + built positions[1] for 16103300 kmers; bits/key = 6.00002 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[2] for 11807213 kmers; bits/key = 2.55841 + built positions[2] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8389556 kmers; bits/key = 2.61352 + built positions[3] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 5343660 kmers; bits/key = 2.66261 + built positions[4] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3076413 kmers; bits/key = 3.34298 + built positions[5] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[6] for 1855446 kmers; bits/key = 2.56022 + built positions[6] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 1591458 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1591458)... + built mphs[7] for 1591458 kmers; bits/key = 2.56042 + built positions[7] for 1591458 kmers; bits/key = 15.0002 +=== step 7.2 (build skew index): 10.8584 [sec] (4.33353 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 16.7055 [sec] (6.66704 [ns/kmer]) +=== total time: 126.824 [sec] (50.6148 [ns/kmer]) +total index size: 2754389812 [B] -- 2754.39 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 4.96014% + strings_offsets: 0.153147 [bits/kmer] -- 1.74147% + control_codewords: 5.0927 [bits/kmer] -- 57.9107% + mid_load_buckets: 0.421913 [bits/kmer] -- 4.7977% + begin_buckets_of_size: 2.42649e-07 [bits/kmer] -- 2.75923e-06% + strings: 2.24545 [bits/kmer] -- 25.5337% + skew_index: 0.444654 [bits/kmer] -- 5.0563% + weights: 5.87466e-07 [bits/kmer] -- 6.68025e-06% + -------------- + total: 8.79407 [bits/kmer] +2026-03-12 16:37:53: saving data structure to disk... +2026-03-12 16:37:54: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash +2026-03-12 16:37:54: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.7648 [sec] (5.3279 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.36526 [sec] (1.52661 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.08913 [sec] (9.04511 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.25317 [sec] (6.99217 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773329874997673192.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.41979 [sec] (6.0603 [ns/kmer]) +=== step 6 (merging minimizers tuples): 16.3782 [sec] (18.3138 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 13915814/126246665 (11.0227%) +num_buckets_in_skew_index 151720/126246665 (0.120177%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 44567509/162006751 (27.5097%) +num_minimizer_positions_of_buckets_in_skew_index 5260111/162006751 (3.24685%) +=== step 7.1 (build sparse index): 3.12236 [sec] (3.49136 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 14047334 + partition = 1: num kmers in buckets of size > 32 and <= 64: 5296071 + partition = 2: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 3: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 4: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 7: num kmers in buckets of size > 2048 and <= 36894: 1100639 +num kmers in skew index = 25810173 (2.88604%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 14047334 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 14047334 kmers; bits/key = 2.55253 + built positions[0] for 14047334 kmers; bits/key = 5.00002 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 5296071 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 5296071 kmers; bits/key = 2.60187 + built positions[1] for 5296071 kmers; bits/key = 6.00006 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[2] for 2254325 kmers; bits/key = 2.56001 + built positions[2] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[3] for 1183762 kmers; bits/key = 2.56081 + built positions[3] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[4] for 885561 kmers; bits/key = 2.56147 + built positions[4] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[5] for 591648 kmers; bits/key = 2.56263 + built positions[5] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[6] for 450833 kmers; bits/key = 2.42098 + built positions[6] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 1100639 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1100639)... + built mphs[7] for 1100639 kmers; bits/key = 2.56101 + built positions[7] for 1100639 kmers; bits/key = 16.0003 +=== step 7.2 (build skew index): 6.53467 [sec] (7.30694 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 10.0506 [sec] (11.2383 [ns/kmer]) +=== total time: 52.3209 [sec] (58.5042 [ns/kmer]) +total index size: 1156305346 [B] -- 1156.31 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.92222% + strings_offsets: 0.333373 [bits/kmer] -- 3.22297% + control_codewords: 4.51733 [bits/kmer] -- 43.6724% + mid_load_buckets: 1.54487 [bits/kmer] -- 14.9354% + begin_buckets_of_size: 6.79854e-07 [bits/kmer] -- 6.57266e-06% + strings: 3.10303 [bits/kmer] -- 29.9994% + skew_index: 0.439355 [bits/kmer] -- 4.24757% + weights: 1.64596e-06 [bits/kmer] -- 1.59128e-05% + -------------- + total: 10.3437 [bits/kmer] +2026-03-12 16:38:47: saving data structure to disk... +2026-03-12 16:38:47: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.time.log new file mode 100644 index 0000000..bb85eef --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l4.sshash" + User time (seconds): 291.43 + System time (seconds): 46.43 + Percent of CPU this job got: 263% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:08.30 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16072988 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 14000641 + Voluntary context switches: 48098 + Involuntary context switches: 8744 + Swaps: 0 + File system inputs: 1725528 + File system outputs: 63675600 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l4.sshash" + User time (seconds): 85.96 + System time (seconds): 17.71 + Percent of CPU this job got: 195% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:52.93 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6411896 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5669476 + Voluntary context switches: 2812 + Involuntary context switches: 2052 + Swaps: 0 + File system inputs: 996928 + File system outputs: 23819792 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.json new file mode 100644 index 0000000..45ba3e1 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "635.610818", "negative lookup (avg_nanosec_per_kmer)": "520.930918", "access (avg_nanosec_per_kmer)": "362.061009", "iterator (avg_nanosec_per_kmer)": "2.782965"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "638.942368", "negative lookup (avg_nanosec_per_kmer)": "515.385966", "access (avg_nanosec_per_kmer)": "358.662701", "iterator (avg_nanosec_per_kmer)": "2.727004"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "626.027419", "negative lookup (avg_nanosec_per_kmer)": "514.044951", "access (avg_nanosec_per_kmer)": "356.754167", "iterator (avg_nanosec_per_kmer)": "2.736238"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "812.841547", "negative lookup (avg_nanosec_per_kmer)": "497.692178", "access (avg_nanosec_per_kmer)": "408.706052", "iterator (avg_nanosec_per_kmer)": "2.793953"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "818.078727", "negative lookup (avg_nanosec_per_kmer)": "497.792027", "access (avg_nanosec_per_kmer)": "406.788476", "iterator (avg_nanosec_per_kmer)": "2.755769"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "815.640033", "negative lookup (avg_nanosec_per_kmer)": "500.082052", "access (avg_nanosec_per_kmer)": "406.233349", "iterator (avg_nanosec_per_kmer)": "2.754554"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.log new file mode 100644 index 0000000..53ec7f4 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 635.611 +negative lookup (avg_nanosec_per_kmer) 520.931 +access (avg_nanosec_per_kmer) = 362.061 +iterator (avg_nanosec_per_kmer) = 2.78296 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 638.942 +negative lookup (avg_nanosec_per_kmer) 515.386 +access (avg_nanosec_per_kmer) = 358.663 +iterator (avg_nanosec_per_kmer) = 2.727 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 626.027 +negative lookup (avg_nanosec_per_kmer) 514.045 +access (avg_nanosec_per_kmer) = 356.754 +iterator (avg_nanosec_per_kmer) = 2.73624 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 812.842 +negative lookup (avg_nanosec_per_kmer) 497.692 +access (avg_nanosec_per_kmer) = 408.706 +iterator (avg_nanosec_per_kmer) = 2.79395 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 818.079 +negative lookup (avg_nanosec_per_kmer) 497.792 +access (avg_nanosec_per_kmer) = 406.788 +iterator (avg_nanosec_per_kmer) = 2.75577 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 815.64 +negative lookup (avg_nanosec_per_kmer) 500.082 +access (avg_nanosec_per_kmer) = 406.233 +iterator (avg_nanosec_per_kmer) = 2.75455 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.json new file mode 100644 index 0000000..826f5d3 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6788822", "step 2 (compute minimizer tuples)": "3518715", "step 3 (merging minimizer tuples)": "12730452", "step 4 (build mphf)": "7767442", "step 5 (replacing minimizer values with MPHF hashes)": "6153910", "step 6 (merging minimizers tuples)": "21696588", "step 7.1 (build sparse index)": "2211342", "step 7.2 (build skew index)": "29622642", "step 7 (build sparse and skew index)": "32224776", "total_build_time_in_microsec": "90880705", "index_size_in_bytes": "1923401352", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7221476", "step 2 (compute minimizer tuples)": "2527353", "step 3 (merging minimizer tuples)": "6917726", "step 4 (build mphf)": "3803078", "step 5 (replacing minimizer values with MPHF hashes)": "4190748", "step 6 (merging minimizers tuples)": "10346915", "step 7.1 (build sparse index)": "3137730", "step 7.2 (build skew index)": "54032737", "step 7 (build sparse and skew index)": "57530018", "total_build_time_in_microsec": "92537314", "index_size_in_bytes": "1691798182", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.log new file mode 100644 index 0000000..378a457 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash --canonical +2026-03-12 16:48:48: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.78882 [sec] (2.44967 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.51871 [sec] (1.26969 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.7305 [sec] (4.59365 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.76744 [sec] (2.8028 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330528880078465.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.15391 [sec] (2.22057 [ns/kmer]) +=== step 6 (merging minimizers tuples): 21.6966 [sec] (7.82898 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3951815/149769567 (2.6386%) +num_buckets_in_skew_index 166715/149769567 (0.111314%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12598880/173272792 (7.27112%) +num_minimizer_positions_of_buckets_in_skew_index 15022875/173272792 (8.67007%) +=== step 7.1 (build sparse index): 2.21134 [sec] (0.797939 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 31484487 + partition = 1: num kmers in buckets of size > 32 and <= 64: 29536289 + partition = 2: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 3: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 4: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 7: num kmers in buckets of size > 2048 and <= 284250: 56449362 +num kmers in skew index = 227051224 (8.1929%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 31484487 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 31484487 kmers; bits/key = 2.54036 + built positions[0] for 31484487 kmers; bits/key = 5.00001 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 29536289 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[1] for 29536289 kmers; bits/key = 2.56433 + built positions[1] for 29536289 kmers; bits/key = 6.00001 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 26742724 kmers; bits/key = 2.56429 + built positions[2] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[3] for 24475836 kmers; bits/key = 2.62316 + built positions[3] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[4] for 21113117 kmers; bits/key = 2.6904 + built positions[4] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[5] for 19260150 kmers; bits/key = 2.59757 + built positions[5] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[6] for 17989259 kmers; bits/key = 2.53715 + built positions[6] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 56449362 + building MPHF with 16 threads and 19 partitions (avg. partition size = 3000000)... + built mphs[7] for 56449362 kmers; bits/key = 2.55023 + built positions[7] for 56449362 kmers; bits/key = 19 +=== step 7.2 (build skew index): 29.6226 [sec] (10.689 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 32.2248 [sec] (11.628 [ns/kmer]) +=== total time: 90.8807 [sec] (32.7933 [ns/kmer]) +total index size: 1923401352 [B] -- 1923.4 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.7492% + strings_offsets: 0.11255 [bits/kmer] -- 2.02709% + control_codewords: 1.78341 [bits/kmer] -- 32.1202% + mid_load_buckets: 0.145478 [bits/kmer] -- 2.62013% + begin_buckets_of_size: 2.1939e-07 [bits/kmer] -- 3.95133e-06% + strings: 2.11826 [bits/kmer] -- 38.1509% + skew_index: 1.23997 [bits/kmer] -- 22.3325% + weights: 5.31156e-07 [bits/kmer] -- 9.56639e-06% + -------------- + total: 5.55231 [bits/kmer] +2026-03-12 16:50:19: saving data structure to disk... +2026-03-12 16:50:20: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash --canonical +2026-03-12 16:50:20: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22148 [sec] (4.73569 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.52735 [sec] (1.65738 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.91773 [sec] (4.5365 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.80308 [sec] (2.49398 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330620825644408.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.19075 [sec] (2.7482 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.3469 [sec] (6.78529 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 11613161/69577229 (16.691%) +num_buckets_in_skew_index 664147/69577229 (0.954546%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 39511380/126350163 (31.2713%) +num_minimizer_positions_of_buckets_in_skew_index 29538862/126350163 (23.3786%) +=== step 7.1 (build sparse index): 3.13773 [sec] (2.05766 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 85561796 + partition = 1: num kmers in buckets of size > 32 and <= 64: 118193461 + partition = 2: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 3: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 4: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 7: num kmers in buckets of size > 2048 and <= 245177: 15704475 +num kmers in skew index = 347378041 (22.7803%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 85561796 + building MPHF with 16 threads and 29 partitions (avg. partition size = 3000000)... + built mphs[0] for 85561796 kmers; bits/key = 2.55564 + built positions[0] for 85561796 kmers; bits/key = 5 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 118193461 + building MPHF with 16 threads and 40 partitions (avg. partition size = 3000000)... + built mphs[1] for 118193461 kmers; bits/key = 2.56344 + built positions[1] for 118193461 kmers; bits/key = 6 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 16 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[2] for 77399356 kmers; bits/key = 2.54863 + built positions[2] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[3] for 26776297 kmers; bits/key = 2.54559 + built positions[3] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 13974034 kmers; bits/key = 2.56375 + built positions[4] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[5] for 6504754 kmers; bits/key = 2.78902 + built positions[5] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3263868 kmers; bits/key = 3.30619 + built positions[6] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 15704475 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[7] for 15704475 kmers; bits/key = 2.62741 + built positions[7] for 15704475 kmers; bits/key = 18 +=== step 7.2 (build skew index): 54.0327 [sec] (35.4335 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 57.53 [sec] (37.727 [ns/kmer]) +=== total time: 92.5373 [sec] (60.684 [ns/kmer]) +total index size: 1691798182 [B] -- 1691.8 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.49103% + strings_offsets: 0.274587 [bits/kmer] -- 3.09374% + control_codewords: 1.5057 [bits/kmer] -- 16.9646% + mid_load_buckets: 0.829144 [bits/kmer] -- 9.34187% + begin_buckets_of_size: 3.98714e-07 [bits/kmer] -- 4.49226e-06% + strings: 3.35283 [bits/kmer] -- 37.7759% + skew_index: 2.78097 [bits/kmer] -- 31.3329% + weights: 9.65307e-07 [bits/kmer] -- 1.0876e-05% + -------------- + total: 8.87556 [bits/kmer] +2026-03-12 16:51:53: saving data structure to disk... +2026-03-12 16:51:54: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.time.log new file mode 100644 index 0000000..b68bd8d --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.canon.sshash --canonical" + User time (seconds): 232.95 + System time (seconds): 24.01 + Percent of CPU this job got: 279% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:31.94 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7393664 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 9159337 + Voluntary context switches: 2606 + Involuntary context switches: 3425 + Swaps: 0 + File system inputs: 216 + File system outputs: 28310696 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.canon.sshash --canonical" + User time (seconds): 276.48 + System time (seconds): 23.06 + Percent of CPU this job got: 320% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:33.50 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 12830608 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 10337728 + Voluntary context switches: 4217 + Involuntary context switches: 3173 + Swaps: 0 + File system inputs: 152 + File system outputs: 19691728 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.json new file mode 100644 index 0000000..21760db --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "814.016788", "negative lookup (avg_nanosec_per_kmer)": "861.851985", "access (avg_nanosec_per_kmer)": "358.255342", "iterator (avg_nanosec_per_kmer)": "2.717304"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "813.221598", "negative lookup (avg_nanosec_per_kmer)": "861.891856", "access (avg_nanosec_per_kmer)": "362.755346", "iterator (avg_nanosec_per_kmer)": "2.717924"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "817.601476", "negative lookup (avg_nanosec_per_kmer)": "859.679079", "access (avg_nanosec_per_kmer)": "358.837136", "iterator (avg_nanosec_per_kmer)": "2.718424"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "995.580232", "negative lookup (avg_nanosec_per_kmer)": "839.554332", "access (avg_nanosec_per_kmer)": "410.885903", "iterator (avg_nanosec_per_kmer)": "2.762757"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "993.441180", "negative lookup (avg_nanosec_per_kmer)": "851.107553", "access (avg_nanosec_per_kmer)": "405.772719", "iterator (avg_nanosec_per_kmer)": "2.754868"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "992.694265", "negative lookup (avg_nanosec_per_kmer)": "841.393509", "access (avg_nanosec_per_kmer)": "405.068348", "iterator (avg_nanosec_per_kmer)": "2.761216"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.log new file mode 100644 index 0000000..da7b15a --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 814.017 +negative lookup (avg_nanosec_per_kmer) 861.852 +access (avg_nanosec_per_kmer) = 358.255 +iterator (avg_nanosec_per_kmer) = 2.7173 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 813.222 +negative lookup (avg_nanosec_per_kmer) 861.892 +access (avg_nanosec_per_kmer) = 362.755 +iterator (avg_nanosec_per_kmer) = 2.71792 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 817.601 +negative lookup (avg_nanosec_per_kmer) 859.679 +access (avg_nanosec_per_kmer) = 358.837 +iterator (avg_nanosec_per_kmer) = 2.71842 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 995.58 +negative lookup (avg_nanosec_per_kmer) 839.554 +access (avg_nanosec_per_kmer) = 410.886 +iterator (avg_nanosec_per_kmer) = 2.76276 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 993.441 +negative lookup (avg_nanosec_per_kmer) 851.108 +access (avg_nanosec_per_kmer) = 405.773 +iterator (avg_nanosec_per_kmer) = 2.75487 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash +positive lookup (avg_nanosec_per_kmer) = 992.694 +negative lookup (avg_nanosec_per_kmer) 841.394 +access (avg_nanosec_per_kmer) = 405.068 +iterator (avg_nanosec_per_kmer) = 2.76122 diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.json new file mode 100644 index 0000000..4b7f7c3 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7244980", "step 2 (compute minimizer tuples)": "2053307", "step 3 (merging minimizer tuples)": "6313395", "step 4 (build mphf)": "6224635", "step 5 (replacing minimizer values with MPHF hashes)": "4777537", "step 6 (merging minimizers tuples)": "11129921", "step 7.1 (build sparse index)": "1633149", "step 7.2 (build skew index)": "23008708", "step 7 (build sparse and skew index)": "24941097", "total_build_time_in_microsec": "62684872", "index_size_in_bytes": "1722229050", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7573828", "step 2 (compute minimizer tuples)": "1491258", "step 3 (merging minimizer tuples)": "5462627", "step 4 (build mphf)": "3495417", "step 5 (replacing minimizer values with MPHF hashes)": "3447553", "step 6 (merging minimizers tuples)": "16989207", "step 7.1 (build sparse index)": "2414758", "step 7.2 (build skew index)": "44474740", "step 7 (build sparse and skew index)": "47162560", "total_build_time_in_microsec": "85622450", "index_size_in_bytes": "1513549540", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.log new file mode 100644 index 0000000..e6eb6eb --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash +2026-03-12 16:45:03: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 7.24498 [sec] (2.61427 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.05331 [sec] (0.740914 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.31339 [sec] (2.27812 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.22464 [sec] (2.24609 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330303437333265.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.77754 [sec] (1.72392 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.1299 [sec] (4.01611 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 2993104/122838669 (2.43661%) +num_buckets_in_skew_index 132289/122838669 (0.107693%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 9641142/140756047 (6.84954%) +num_minimizer_positions_of_buckets_in_skew_index 11401629/140756047 (8.10028%) +=== step 7.1 (build sparse index): 1.63315 [sec] (0.589304 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 30228065 + partition = 1: num kmers in buckets of size > 32 and <= 64: 27528004 + partition = 2: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 3: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 4: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 7: num kmers in buckets of size > 2048 and <= 147936: 43637094 +num kmers in skew index = 203214197 (7.33277%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 30228065 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 30228065 kmers; bits/key = 2.60031 + built positions[0] for 30228065 kmers; bits/key = 5.00001 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 27528004 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[1] for 27528004 kmers; bits/key = 2.5965 + built positions[1] for 27528004 kmers; bits/key = 6.00001 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 25196923 kmers; bits/key = 2.56001 + built positions[2] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[3] for 21919654 kmers; bits/key = 2.60671 + built positions[3] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[4] for 19634878 kmers; bits/key = 2.55594 + built positions[4] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[5] for 18051454 kmers; bits/key = 2.60116 + built positions[5] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[6] for 17018125 kmers; bits/key = 2.58264 + built positions[6] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 43637094 + building MPHF with 16 threads and 15 partitions (avg. partition size = 3000000)... + built mphs[7] for 43637094 kmers; bits/key = 2.53825 + built positions[7] for 43637094 kmers; bits/key = 18 +=== step 7.2 (build skew index): 23.0087 [sec] (8.30245 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 24.9411 [sec] (8.99973 [ns/kmer]) +=== total time: 62.6849 [sec] (22.6192 [ns/kmer]) +total index size: 1722229050 [B] -- 1722.23 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.51854% + strings_offsets: 0.11255 [bits/kmer] -- 2.26387% + control_codewords: 1.46273 [bits/kmer] -- 29.4217% + mid_load_buckets: 0.111325 [bits/kmer] -- 2.23923% + begin_buckets_of_size: 2.1939e-07 [bits/kmer] -- 4.41289e-06% + strings: 2.11826 [bits/kmer] -- 42.6072% + skew_index: 1.04152 [bits/kmer] -- 20.9494% + weights: 5.31156e-07 [bits/kmer] -- 1.06838e-05% + -------------- + total: 4.97158 [bits/kmer] +2026-03-12 16:46:06: saving data structure to disk... +2026-03-12 16:46:06: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash +2026-03-12 16:46:07: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.57383 [sec] (4.96676 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.49126 [sec] (0.977936 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.46263 [sec] (3.58228 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.49542 [sec] (2.29222 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330367069255034.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.44755 [sec] (2.26083 [ns/kmer]) +=== step 6 (merging minimizers tuples): 16.9892 [sec] (11.1412 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9471927/61951224 (15.2893%) +num_buckets_in_skew_index 564326/61951224 (0.91092%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 32602433/105337248 (30.9505%) +num_minimizer_positions_of_buckets_in_skew_index 20819844/105337248 (19.7649%) +=== step 7.1 (build sparse index): 2.41476 [sec] (1.58355 [ns/kmer]) + partition = 0: num kmers in buckets of size > 16 and <= 32: 119883146 + partition = 1: num kmers in buckets of size > 32 and <= 64: 95113795 + partition = 2: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 3: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 4: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 5: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 6: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 7: num kmers in buckets of size > 2048 and <= 144478: 13215365 +num kmers in skew index = 290793628 (19.0696%) + lower = 16; upper = 32; num_bits_per_pos = 5; num_kmers_in_partition = 119883146 + building MPHF with 16 threads and 40 partitions (avg. partition size = 3000000)... + built mphs[0] for 119883146 kmers; bits/key = 2.55462 + built positions[0] for 119883146 kmers; bits/key = 5 + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 95113795 + building MPHF with 16 threads and 32 partitions (avg. partition size = 3000000)... + built mphs[1] for 95113795 kmers; bits/key = 2.53911 + built positions[1] for 95113795 kmers; bits/key = 6 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[2] for 32078766 kmers; bits/key = 2.51437 + built positions[2] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 16092632 kmers; bits/key = 2.65398 + built positions[3] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 8174536 kmers; bits/key = 2.61889 + built positions[4] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3454318 kmers; bits/key = 3.14686 + built positions[5] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[6] for 2781070 kmers; bits/key = 2.55987 + built positions[6] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 13215365 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[7] for 13215365 kmers; bits/key = 2.68701 + built positions[7] for 13215365 kmers; bits/key = 18 +=== step 7.2 (build skew index): 44.4747 [sec] (29.1656 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 47.1626 [sec] (30.9282 [ns/kmer]) +=== total time: 85.6225 [sec] (56.1494 [ns/kmer]) +total index size: 1513549540 [B] -- 1513.55 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.46198% + strings_offsets: 0.274587 [bits/kmer] -- 3.45809% + control_codewords: 1.34067 [bits/kmer] -- 16.8841% + mid_load_buckets: 0.68416 [bits/kmer] -- 8.61616% + begin_buckets_of_size: 3.98714e-07 [bits/kmer] -- 5.02131e-06% + strings: 3.35283 [bits/kmer] -- 42.2247% + skew_index: 2.1721 [bits/kmer] -- 27.355% + weights: 9.65307e-07 [bits/kmer] -- 1.21569e-05% + -------------- + total: 7.94043 [bits/kmer] +2026-03-12 16:47:32: saving data structure to disk... +2026-03-12 16:47:33: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.time.log new file mode 100644 index 0000000..fc1502b --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l4/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l4.sshash" + User time (seconds): 178.23 + System time (seconds): 19.32 + Percent of CPU this job got: 310% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:03.62 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6209252 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7237873 + Voluntary context switches: 2187 + Involuntary context switches: 2872 + Swaps: 0 + File system inputs: 1667432 + File system outputs: 22555200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l4.sshash" + User time (seconds): 227.18 + System time (seconds): 18.47 + Percent of CPU this job got: 284% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:26.48 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 10486796 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7993662 + Voluntary context switches: 2745 + Involuntary context switches: 3061 + Swaps: 0 + File system inputs: 1679872 + File system outputs: 16260880 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.json new file mode 100644 index 0000000..0bfc089 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "548.060515", "negative lookup (avg_nanosec_per_kmer)": "420.762877", "access (avg_nanosec_per_kmer)": "349.545751", "iterator (avg_nanosec_per_kmer)": "2.512821"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "554.913650", "negative lookup (avg_nanosec_per_kmer)": "419.157099", "access (avg_nanosec_per_kmer)": "348.897306", "iterator (avg_nanosec_per_kmer)": "2.472259"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "560.958894", "negative lookup (avg_nanosec_per_kmer)": "423.772273", "access (avg_nanosec_per_kmer)": "348.639435", "iterator (avg_nanosec_per_kmer)": "2.514347"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "600.500893", "negative lookup (avg_nanosec_per_kmer)": "397.610404", "access (avg_nanosec_per_kmer)": "360.331386", "iterator (avg_nanosec_per_kmer)": "2.435022"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "605.805132", "negative lookup (avg_nanosec_per_kmer)": "399.587061", "access (avg_nanosec_per_kmer)": "359.167831", "iterator (avg_nanosec_per_kmer)": "2.420246"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "599.172732", "negative lookup (avg_nanosec_per_kmer)": "401.301704", "access (avg_nanosec_per_kmer)": "362.165754", "iterator (avg_nanosec_per_kmer)": "2.470805"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.log new file mode 100644 index 0000000..bdf30d3 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 548.061 +negative lookup (avg_nanosec_per_kmer) 420.763 +access (avg_nanosec_per_kmer) = 349.546 +iterator (avg_nanosec_per_kmer) = 2.51282 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 554.914 +negative lookup (avg_nanosec_per_kmer) 419.157 +access (avg_nanosec_per_kmer) = 348.897 +iterator (avg_nanosec_per_kmer) = 2.47226 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 560.959 +negative lookup (avg_nanosec_per_kmer) 423.772 +access (avg_nanosec_per_kmer) = 348.639 +iterator (avg_nanosec_per_kmer) = 2.51435 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 600.501 +negative lookup (avg_nanosec_per_kmer) 397.61 +access (avg_nanosec_per_kmer) = 360.331 +iterator (avg_nanosec_per_kmer) = 2.43502 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 605.805 +negative lookup (avg_nanosec_per_kmer) 399.587 +access (avg_nanosec_per_kmer) = 359.168 +iterator (avg_nanosec_per_kmer) = 2.42025 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 599.173 +negative lookup (avg_nanosec_per_kmer) 401.302 +access (avg_nanosec_per_kmer) = 362.166 +iterator (avg_nanosec_per_kmer) = 2.47081 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.json new file mode 100644 index 0000000..36d4509 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7223312", "step 2 (compute minimizer tuples)": "5230309", "step 3 (merging minimizer tuples)": "27623872", "step 4 (build mphf)": "22908814", "step 5 (replacing minimizer values with MPHF hashes)": "18853647", "step 6 (merging minimizers tuples)": "69788822", "step 7.1 (build sparse index)": "6480327", "step 7.2 (build skew index)": "11054491", "step 7 (build sparse and skew index)": "18698185", "total_build_time_in_microsec": "170326961", "index_size_in_bytes": "3157075724", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4554952", "step 2 (compute minimizer tuples)": "2051092", "step 3 (merging minimizer tuples)": "10183202", "step 4 (build mphf)": "7056092", "step 5 (replacing minimizer values with MPHF hashes)": "6717591", "step 6 (merging minimizers tuples)": "26146418", "step 7.1 (build sparse index)": "4125195", "step 7.2 (build skew index)": "5218831", "step 7 (build sparse and skew index)": "9851084", "total_build_time_in_microsec": "66560431", "index_size_in_bytes": "1298647366", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.log new file mode 100644 index 0000000..af889a8 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.log @@ -0,0 +1,301 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash --canonical +2026-03-12 16:57:19: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22331 [sec] (2.88278 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 5.23031 [sec] (2.08738 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 27.6239 [sec] (11.0245 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 22.9088 [sec] (9.14276 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.8536 [sec] (7.52437 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331039145806158.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 69.7888 [sec] (27.8523 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14166677/462224926 (3.06489%) +num_buckets_in_skew_index 160386/462224926 (0.0346987%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48754830/511201278 (9.53731%) +num_minimizer_positions_of_buckets_in_skew_index 14548585/511201278 (2.84596%) +=== step 7.1 (build sparse index): 6.48033 [sec] (2.58626 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 19782579 + partition = 1: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 2: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 3: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 7: num kmers in buckets of size > 4096 and <= 22085: 1235918 +num kmers in skew index = 63361174 (2.5287%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 19782579 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[0] for 19782579 kmers; bits/key = 2.53997 + built positions[0] for 19782579 kmers; bits/key = 6.00002 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[1] for 14957205 kmers; bits/key = 2.56583 + built positions[1] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[2] for 10906495 kmers; bits/key = 2.61744 + built positions[2] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 7473094 kmers; bits/key = 2.65359 + built positions[3] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4774535 kmers; bits/key = 2.75085 + built positions[4] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[5] for 2638087 kmers; bits/key = 2.55989 + built positions[5] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[6] for 1593261 kmers; bits/key = 2.56041 + built positions[6] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 1235918 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1235918)... + built mphs[7] for 1235918 kmers; bits/key = 2.41795 + built positions[7] for 1235918 kmers; bits/key = 15.0003 +=== step 7.2 (build skew index): 11.0545 [sec] (4.41178 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 18.6982 [sec] (7.46232 [ns/kmer]) +=== total time: 170.327 [sec] (67.9764 [ns/kmer]) +total index size: 3157075724 [B] -- 3157.08 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.19096% + strings_offsets: 0.153147 [bits/kmer] -- 1.51935% + control_codewords: 6.08754 [bits/kmer] -- 60.3938% + mid_load_buckets: 0.622648 [bits/kmer] -- 6.17722% + begin_buckets_of_size: 4.46985e-07 [bits/kmer] -- 4.43448e-06% + strings: 2.24545 [bits/kmer] -- 22.2769% + skew_index: 0.44772 [bits/kmer] -- 4.44178% + weights: 5.87466e-07 [bits/kmer] -- 5.82818e-06% + -------------- + total: 10.0797 [bits/kmer] +2026-03-12 17:00:09: saving data structure to disk... +2026-03-12 17:00:10: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash --canonical +2026-03-12 17:00:11: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55495 [sec] (5.09326 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.05109 [sec] (2.29349 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.1832 [sec] (11.3867 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.05609 [sec] (7.88998 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331211216483397.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.71759 [sec] (7.51148 [ns/kmer]) +=== step 6 (merging minimizers tuples): 26.1464 [sec] (29.2364 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 18072184/143418843 (12.601%) +num_buckets_in_skew_index 66956/143418843 (0.0466856%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 63456916/193511241 (32.7924%) +num_minimizer_positions_of_buckets_in_skew_index 4774622/193511241 (2.46736%) +=== step 7.1 (build sparse index): 4.12519 [sec] (4.61271 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 10649499 + partition = 1: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 2: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 3: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 7: num kmers in buckets of size > 4096 and <= 30655: 848080 +num kmers in skew index = 20569137 (2.3%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 10649499 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 10649499 kmers; bits/key = 2.67056 + built positions[0] for 10649499 kmers; bits/key = 6.00003 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 4240400 kmers; bits/key = 2.84282 + built positions[1] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[2] for 1914946 kmers; bits/key = 2.56017 + built positions[2] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[3] for 1106165 kmers; bits/key = 2.56093 + built positions[3] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[4] for 771672 kmers; bits/key = 2.41893 + built positions[4] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[5] for 562721 kmers; bits/key = 2.56295 + built positions[5] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 16 threads and 1 partitions (avg. partition size = 475654)... + built mphs[6] for 475654 kmers; bits/key = 2.56365 + built positions[6] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 848080 + building MPHF with 16 threads and 1 partitions (avg. partition size = 848080)... + built mphs[7] for 848080 kmers; bits/key = 2.41872 + built positions[7] for 848080 kmers; bits/key = 15.0004 +=== step 7.2 (build skew index): 5.21883 [sec] (5.83559 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 9.85108 [sec] (11.0153 [ns/kmer]) +=== total time: 66.5604 [sec] (74.4266 [ns/kmer]) +total index size: 1298647366 [B] -- 1298.65 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.90194% + strings_offsets: 0.333373 [bits/kmer] -- 2.8697% + control_codewords: 5.13178 [bits/kmer] -- 44.1748% + mid_load_buckets: 2.19965 [bits/kmer] -- 18.9347% + begin_buckets_of_size: 1.25236e-06 [bits/kmer] -- 1.07804e-05% + strings: 3.10303 [bits/kmer] -- 26.7112% + skew_index: 0.395855 [bits/kmer] -- 3.40755% + weights: 1.64596e-06 [bits/kmer] -- 1.41686e-05% + -------------- + total: 11.617 [bits/kmer] +2026-03-12 17:01:17: saving data structure to disk... +2026-03-12 17:01:18: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.time.log new file mode 100644 index 0000000..733eda4 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.canon.sshash --canonical" + User time (seconds): 385.23 + System time (seconds): 59.04 + Percent of CPU this job got: 258% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:52.06 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18402996 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16332406 + Voluntary context switches: 31034 + Involuntary context switches: 21766 + Swaps: 0 + File system inputs: 200 + File system outputs: 97224280 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.canon.sshash --canonical" + User time (seconds): 100.19 + System time (seconds): 21.03 + Percent of CPU this job got: 180% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:07.27 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7640904 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6607673 + Voluntary context switches: 2917 + Involuntary context switches: 2311 + Swaps: 0 + File system inputs: 80 + File system outputs: 28744112 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.json new file mode 100644 index 0000000..acabc38 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "740.167893", "negative lookup (avg_nanosec_per_kmer)": "788.267148", "access (avg_nanosec_per_kmer)": "347.947111", "iterator (avg_nanosec_per_kmer)": "2.487946"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.548056", "negative lookup (avg_nanosec_per_kmer)": "783.108166", "access (avg_nanosec_per_kmer)": "352.164466", "iterator (avg_nanosec_per_kmer)": "2.484300"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "748.413816", "negative lookup (avg_nanosec_per_kmer)": "788.583234", "access (avg_nanosec_per_kmer)": "352.089947", "iterator (avg_nanosec_per_kmer)": "2.497555"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "770.003032", "negative lookup (avg_nanosec_per_kmer)": "737.381879", "access (avg_nanosec_per_kmer)": "356.142629", "iterator (avg_nanosec_per_kmer)": "2.401925"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "772.012866", "negative lookup (avg_nanosec_per_kmer)": "743.466146", "access (avg_nanosec_per_kmer)": "372.470911", "iterator (avg_nanosec_per_kmer)": "2.448382"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "765.836811", "negative lookup (avg_nanosec_per_kmer)": "743.956163", "access (avg_nanosec_per_kmer)": "359.902573", "iterator (avg_nanosec_per_kmer)": "2.521846"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.log new file mode 100644 index 0000000..734b421 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 740.168 +negative lookup (avg_nanosec_per_kmer) 788.267 +access (avg_nanosec_per_kmer) = 347.947 +iterator (avg_nanosec_per_kmer) = 2.48795 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 744.548 +negative lookup (avg_nanosec_per_kmer) 783.108 +access (avg_nanosec_per_kmer) = 352.164 +iterator (avg_nanosec_per_kmer) = 2.4843 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 748.414 +negative lookup (avg_nanosec_per_kmer) 788.583 +access (avg_nanosec_per_kmer) = 352.09 +iterator (avg_nanosec_per_kmer) = 2.49755 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 770.003 +negative lookup (avg_nanosec_per_kmer) 737.382 +access (avg_nanosec_per_kmer) = 356.143 +iterator (avg_nanosec_per_kmer) = 2.40192 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 772.013 +negative lookup (avg_nanosec_per_kmer) 743.466 +access (avg_nanosec_per_kmer) = 372.471 +iterator (avg_nanosec_per_kmer) = 2.44838 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 765.837 +negative lookup (avg_nanosec_per_kmer) 743.956 +access (avg_nanosec_per_kmer) = 359.903 +iterator (avg_nanosec_per_kmer) = 2.52185 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.json new file mode 100644 index 0000000..1b000fa --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7245450", "step 2 (compute minimizer tuples)": "3624032", "step 3 (merging minimizer tuples)": "14726038", "step 4 (build mphf)": "18904458", "step 5 (replacing minimizer values with MPHF hashes)": "15023352", "step 6 (merging minimizers tuples)": "49792462", "step 7.1 (build sparse index)": "4951482", "step 7.2 (build skew index)": "8711874", "step 7 (build sparse and skew index)": "14570958", "total_build_time_in_microsec": "123886750", "index_size_in_bytes": "2733573892", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4547674", "step 2 (compute minimizer tuples)": "1364826", "step 3 (merging minimizer tuples)": "8076903", "step 4 (build mphf)": "6281809", "step 5 (replacing minimizer values with MPHF hashes)": "5521421", "step 6 (merging minimizers tuples)": "24327903", "step 7.1 (build sparse index)": "3170963", "step 7.2 (build skew index)": "4677189", "step 7 (build sparse and skew index)": "8261706", "total_build_time_in_microsec": "58382242", "index_size_in_bytes": "1142844198", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.log new file mode 100644 index 0000000..5f5cf1f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.log @@ -0,0 +1,286 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash +2026-03-12 16:53:07: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.24545 [sec] (2.89161 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.62403 [sec] (1.44633 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 14.726 [sec] (5.87707 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.9045 [sec] (7.54465 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330787612989557.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.0234 [sec] (5.99572 [ns/kmer]) +=== step 6 (merging minimizers tuples): 49.7925 [sec] (19.8718 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10743740/386687326 (2.77841%) +num_buckets_in_skew_index 115384/386687326 (0.0298391%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 37193394/423023926 (8.79227%) +num_minimizer_positions_of_buckets_in_skew_index 10002330/423023926 (2.36448%) +=== step 7.1 (build sparse index): 4.95148 [sec] (1.9761 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 16103300 + partition = 1: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 2: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 3: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 7: num kmers in buckets of size > 4096 and <= 22972: 583280 +num kmers in skew index = 48167046 (1.92232%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 16103300 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[0] for 16103300 kmers; bits/key = 2.65249 + built positions[0] for 16103300 kmers; bits/key = 6.00002 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 11807213 kmers; bits/key = 2.55841 + built positions[1] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8389556 kmers; bits/key = 2.61352 + built positions[2] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 5343660 kmers; bits/key = 2.66261 + built positions[3] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3076413 kmers; bits/key = 3.34298 + built positions[4] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[5] for 1855446 kmers; bits/key = 2.56022 + built positions[5] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[6] for 1008178 kmers; bits/key = 2.41829 + built positions[6] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 583280 + building MPHF with 16 threads and 1 partitions (avg. partition size = 583280)... + built mphs[7] for 583280 kmers; bits/key = 2.56272 + built positions[7] for 583280 kmers; bits/key = 15.0006 +=== step 7.2 (build skew index): 8.71187 [sec] (3.47685 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 14.571 [sec] (5.81517 [ns/kmer]) +=== total time: 123.887 [sec] (49.4424 [ns/kmer]) +total index size: 2733573892 [B] -- 2733.57 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 4.99791% + strings_offsets: 0.153147 [bits/kmer] -- 1.75474% + control_codewords: 5.0927 [bits/kmer] -- 58.3516% + mid_load_buckets: 0.474997 [bits/kmer] -- 5.44246% + begin_buckets_of_size: 4.46985e-07 [bits/kmer] -- 5.1215e-06% + strings: 2.24545 [bits/kmer] -- 25.7282% + skew_index: 0.32511 [bits/kmer] -- 3.72508% + weights: 5.87466e-07 [bits/kmer] -- 6.73111e-06% + -------------- + total: 8.72761 [bits/kmer] +2026-03-12 16:55:11: saving data structure to disk... +2026-03-12 16:55:12: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash +2026-03-12 16:55:12: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.54767 [sec] (5.08512 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.36483 [sec] (1.52612 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.0769 [sec] (9.03143 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.28181 [sec] (7.0242 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773330912976431325.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.52142 [sec] (6.17394 [ns/kmer]) +=== step 6 (merging minimizers tuples): 24.3279 [sec] (27.203 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14034979/126246665 (11.1171%) +num_buckets_in_skew_index 32555/126246665 (0.0257868%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47120970/162006751 (29.0858%) +num_minimizer_positions_of_buckets_in_skew_index 2706650/162006751 (1.6707%) +=== step 7.1 (build sparse index): 3.17096 [sec] (3.54571 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 5296071 + partition = 1: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 2: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 3: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 7: num kmers in buckets of size > 4096 and <= 36894: 726908 +num kmers in skew index = 11762839 (1.3153%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 5296071 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 5296071 kmers; bits/key = 2.60187 + built positions[0] for 5296071 kmers; bits/key = 6.00006 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[1] for 2254325 kmers; bits/key = 2.56001 + built positions[1] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[2] for 1183762 kmers; bits/key = 2.56081 + built positions[2] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[3] for 885561 kmers; bits/key = 2.56147 + built positions[3] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[4] for 591648 kmers; bits/key = 2.56263 + built positions[4] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[5] for 450833 kmers; bits/key = 2.42098 + built positions[5] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 16 threads and 1 partitions (avg. partition size = 373731)... + built mphs[6] for 373731 kmers; bits/key = 2.56484 + built positions[6] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 726908 + building MPHF with 16 threads and 1 partitions (avg. partition size = 726908)... + built mphs[7] for 726908 kmers; bits/key = 2.41906 + built positions[7] for 726908 kmers; bits/key = 16.0004 +=== step 7.2 (build skew index): 4.67719 [sec] (5.22994 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 8.26171 [sec] (9.23808 [ns/kmer]) +=== total time: 58.3822 [sec] (65.2819 [ns/kmer]) +total index size: 1142844198 [B] -- 1142.84 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.96842% + strings_offsets: 0.333373 [bits/kmer] -- 3.26093% + control_codewords: 4.51733 [bits/kmer] -- 44.1868% + mid_load_buckets: 1.63338 [bits/kmer] -- 15.9771% + begin_buckets_of_size: 1.25236e-06 [bits/kmer] -- 1.22501e-05% + strings: 3.10303 [bits/kmer] -- 30.3527% + skew_index: 0.230426 [bits/kmer] -- 2.25394% + weights: 1.64596e-06 [bits/kmer] -- 1.61002e-05% + -------------- + total: 10.2232 [bits/kmer] +2026-03-12 16:56:11: saving data structure to disk... +2026-03-12 16:56:11: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.time.log new file mode 100644 index 0000000..a79ed10 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l5.sshash" + User time (seconds): 284.74 + System time (seconds): 44.70 + Percent of CPU this job got: 262% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:05.36 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16072064 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13770397 + Voluntary context switches: 39462 + Involuntary context switches: 7794 + Swaps: 0 + File system inputs: 368 + File system outputs: 63634952 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l5.sshash" + User time (seconds): 80.23 + System time (seconds): 17.57 + Percent of CPU this job got: 165% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.00 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6406064 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5398832 + Voluntary context switches: 2675 + Involuntary context switches: 2110 + Swaps: 0 + File system inputs: 56 + File system outputs: 23793488 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.json new file mode 100644 index 0000000..9d7370d --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "633.090757", "negative lookup (avg_nanosec_per_kmer)": "515.587492", "access (avg_nanosec_per_kmer)": "357.124169", "iterator (avg_nanosec_per_kmer)": "2.785771"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "644.604158", "negative lookup (avg_nanosec_per_kmer)": "520.243355", "access (avg_nanosec_per_kmer)": "356.645603", "iterator (avg_nanosec_per_kmer)": "2.713720"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "630.020732", "negative lookup (avg_nanosec_per_kmer)": "516.525946", "access (avg_nanosec_per_kmer)": "358.020857", "iterator (avg_nanosec_per_kmer)": "2.722192"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "848.373956", "negative lookup (avg_nanosec_per_kmer)": "503.612178", "access (avg_nanosec_per_kmer)": "404.335205", "iterator (avg_nanosec_per_kmer)": "2.756626"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "852.210113", "negative lookup (avg_nanosec_per_kmer)": "505.162584", "access (avg_nanosec_per_kmer)": "405.141205", "iterator (avg_nanosec_per_kmer)": "2.756046"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "865.426219", "negative lookup (avg_nanosec_per_kmer)": "498.824128", "access (avg_nanosec_per_kmer)": "406.199599", "iterator (avg_nanosec_per_kmer)": "2.779943"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.log new file mode 100644 index 0000000..6c302c9 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 633.091 +negative lookup (avg_nanosec_per_kmer) 515.587 +access (avg_nanosec_per_kmer) = 357.124 +iterator (avg_nanosec_per_kmer) = 2.78577 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 644.604 +negative lookup (avg_nanosec_per_kmer) 520.243 +access (avg_nanosec_per_kmer) = 356.646 +iterator (avg_nanosec_per_kmer) = 2.71372 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 630.021 +negative lookup (avg_nanosec_per_kmer) 516.526 +access (avg_nanosec_per_kmer) = 358.021 +iterator (avg_nanosec_per_kmer) = 2.72219 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 848.374 +negative lookup (avg_nanosec_per_kmer) 503.612 +access (avg_nanosec_per_kmer) = 404.335 +iterator (avg_nanosec_per_kmer) = 2.75663 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 852.21 +negative lookup (avg_nanosec_per_kmer) 505.163 +access (avg_nanosec_per_kmer) = 405.141 +iterator (avg_nanosec_per_kmer) = 2.75605 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 865.426 +negative lookup (avg_nanosec_per_kmer) 498.824 +access (avg_nanosec_per_kmer) = 406.2 +iterator (avg_nanosec_per_kmer) = 2.77994 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.json new file mode 100644 index 0000000..c0732f6 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6818181", "step 2 (compute minimizer tuples)": "3499190", "step 3 (merging minimizer tuples)": "12872722", "step 4 (build mphf)": "7963073", "step 5 (replacing minimizer values with MPHF hashes)": "6219187", "step 6 (merging minimizers tuples)": "14228994", "step 7.1 (build sparse index)": "2246382", "step 7.2 (build skew index)": "24209208", "step 7 (build sparse and skew index)": "26842947", "total_build_time_in_microsec": "78444294", "index_size_in_bytes": "1880427400", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7250052", "step 2 (compute minimizer tuples)": "2552759", "step 3 (merging minimizer tuples)": "6938050", "step 4 (build mphf)": "3795763", "step 5 (replacing minimizer values with MPHF hashes)": "4111037", "step 6 (merging minimizers tuples)": "10188819", "step 7.1 (build sparse index)": "3106775", "step 7.2 (build skew index)": "41503620", "step 7 (build sparse and skew index)": "44966727", "total_build_time_in_microsec": "79803207", "index_size_in_bytes": "1609181156", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.log new file mode 100644 index 0000000..b1d36d3 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash --canonical +2026-03-12 17:05:45: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81818 [sec] (2.46027 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.49919 [sec] (1.26265 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.8727 [sec] (4.64499 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.96307 [sec] (2.87339 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331545566892261.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.21919 [sec] (2.24413 [ns/kmer]) +=== step 6 (merging minimizers tuples): 14.229 [sec] (5.13438 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4040348/149769567 (2.69771%) +num_buckets_in_skew_index 78182/149769567 (0.0522015%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 14600696/173272792 (8.42642%) +num_minimizer_positions_of_buckets_in_skew_index 13021059/173272792 (7.51477%) +=== step 7.1 (build sparse index): 2.24638 [sec] (0.810583 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 29536289 + partition = 1: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 2: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 3: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 7: num kmers in buckets of size > 4096 and <= 284250: 41005919 +num kmers in skew index = 195566737 (7.05682%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 29536289 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 29536289 kmers; bits/key = 2.56433 + built positions[0] for 29536289 kmers; bits/key = 6.00001 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26742724 kmers; bits/key = 2.56429 + built positions[1] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 24475836 kmers; bits/key = 2.62316 + built positions[2] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[3] for 21113117 kmers; bits/key = 2.6904 + built positions[3] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[4] for 19260150 kmers; bits/key = 2.59757 + built positions[4] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 17989259 kmers; bits/key = 2.53715 + built positions[5] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[6] for 15443443 kmers; bits/key = 2.69254 + built positions[6] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 41005919 + building MPHF with 16 threads and 14 partitions (avg. partition size = 3000000)... + built mphs[7] for 41005919 kmers; bits/key = 2.53849 + built positions[7] for 41005919 kmers; bits/key = 19 +=== step 7.2 (build skew index): 24.2092 [sec] (8.73564 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 26.8429 [sec] (9.68599 [ns/kmer]) +=== total time: 78.4443 [sec] (28.3058 [ns/kmer]) +total index size: 1880427400 [B] -- 1880.43 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.81203% + strings_offsets: 0.11255 [bits/kmer] -- 2.07341% + control_codewords: 1.78341 [bits/kmer] -- 32.8542% + mid_load_buckets: 0.168592 [bits/kmer] -- 3.10583% + begin_buckets_of_size: 4.0414e-07 [bits/kmer] -- 7.44512e-06% + strings: 2.11826 [bits/kmer] -- 39.0227% + skew_index: 1.0928 [bits/kmer] -- 20.1318% + weights: 5.31156e-07 [bits/kmer] -- 9.78501e-06% + -------------- + total: 5.42826 [bits/kmer] +2026-03-12 17:07:04: saving data structure to disk... +2026-03-12 17:07:04: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash --canonical +2026-03-12 17:07:05: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.25005 [sec] (4.75443 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.55276 [sec] (1.67405 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.93805 [sec] (4.54983 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.79576 [sec] (2.48918 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331625052817876.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.11104 [sec] (2.69593 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.1888 [sec] (6.68161 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 11974334/69577229 (17.2101%) +num_buckets_in_skew_index 302974/69577229 (0.43545%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47794811/126350163 (37.8273%) +num_minimizer_positions_of_buckets_in_skew_index 21255431/126350163 (16.8226%) +=== step 7.1 (build sparse index): 3.10677 [sec] (2.03736 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 118193461 + partition = 1: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 2: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 3: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 7: num kmers in buckets of size > 4096 and <= 245177: 13176958 +num kmers in skew index = 261816245 (17.1694%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 118193461 + building MPHF with 16 threads and 40 partitions (avg. partition size = 3000000)... + built mphs[0] for 118193461 kmers; bits/key = 2.56344 + built positions[0] for 118193461 kmers; bits/key = 6 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 16 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[1] for 77399356 kmers; bits/key = 2.54863 + built positions[1] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 26776297 kmers; bits/key = 2.54559 + built positions[2] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 13974034 kmers; bits/key = 2.56375 + built positions[3] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 6504754 kmers; bits/key = 2.78902 + built positions[4] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3263868 kmers; bits/key = 3.30619 + built positions[5] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[6] for 2527517 kmers; bits/key = 2.55992 + built positions[6] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 13176958 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[7] for 13176958 kmers; bits/key = 2.69365 + built positions[7] for 13176958 kmers; bits/key = 18 +=== step 7.2 (build skew index): 41.5036 [sec] (27.2172 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 44.9667 [sec] (29.4882 [ns/kmer]) +=== total time: 79.8032 [sec] (52.3333 [ns/kmer]) +total index size: 1609181156 [B] -- 1609.18 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.56758% + strings_offsets: 0.274587 [bits/kmer] -- 3.25258% + control_codewords: 1.5057 [bits/kmer] -- 17.8355% + mid_load_buckets: 1.00297 [bits/kmer] -- 11.8805% + begin_buckets_of_size: 7.34472e-07 [bits/kmer] -- 8.70008e-06% + strings: 3.35283 [bits/kmer] -- 39.7154% + skew_index: 2.17371 [bits/kmer] -- 25.7484% + weights: 9.65307e-07 [bits/kmer] -- 1.14344e-05% + -------------- + total: 8.44214 [bits/kmer] +2026-03-12 17:08:24: saving data structure to disk... +2026-03-12 17:08:25: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.time.log new file mode 100644 index 0000000..2a260c6 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.canon.sshash --canonical" + User time (seconds): 212.22 + System time (seconds): 22.93 + Percent of CPU this job got: 295% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:19.48 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7403600 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8544677 + Voluntary context switches: 2326 + Involuntary context switches: 2895 + Swaps: 0 + File system inputs: 56 + File system outputs: 28226744 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.canon.sshash --canonical" + User time (seconds): 223.42 + System time (seconds): 20.33 + Percent of CPU this job got: 301% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:20.74 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 10868756 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8643973 + Voluntary context switches: 2560 + Involuntary context switches: 2896 + Swaps: 0 + File system inputs: 96 + File system outputs: 19530368 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.json new file mode 100644 index 0000000..c8e16a1 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "816.616223", "negative lookup (avg_nanosec_per_kmer)": "872.007323", "access (avg_nanosec_per_kmer)": "361.428303", "iterator (avg_nanosec_per_kmer)": "2.725529"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "818.119396", "negative lookup (avg_nanosec_per_kmer)": "868.124969", "access (avg_nanosec_per_kmer)": "356.860526", "iterator (avg_nanosec_per_kmer)": "2.725758"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "808.702766", "negative lookup (avg_nanosec_per_kmer)": "870.608233", "access (avg_nanosec_per_kmer)": "357.992329", "iterator (avg_nanosec_per_kmer)": "2.730240"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1039.778508", "negative lookup (avg_nanosec_per_kmer)": "848.513935", "access (avg_nanosec_per_kmer)": "410.450263", "iterator (avg_nanosec_per_kmer)": "2.753267"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1029.766781", "negative lookup (avg_nanosec_per_kmer)": "843.196274", "access (avg_nanosec_per_kmer)": "409.829896", "iterator (avg_nanosec_per_kmer)": "2.753821"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1039.863301", "negative lookup (avg_nanosec_per_kmer)": "848.093712", "access (avg_nanosec_per_kmer)": "405.482157", "iterator (avg_nanosec_per_kmer)": "2.767060"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.log new file mode 100644 index 0000000..eed1941 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 816.616 +negative lookup (avg_nanosec_per_kmer) 872.007 +access (avg_nanosec_per_kmer) = 361.428 +iterator (avg_nanosec_per_kmer) = 2.72553 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 818.119 +negative lookup (avg_nanosec_per_kmer) 868.125 +access (avg_nanosec_per_kmer) = 356.861 +iterator (avg_nanosec_per_kmer) = 2.72576 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 808.703 +negative lookup (avg_nanosec_per_kmer) 870.608 +access (avg_nanosec_per_kmer) = 357.992 +iterator (avg_nanosec_per_kmer) = 2.73024 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 1039.78 +negative lookup (avg_nanosec_per_kmer) 848.514 +access (avg_nanosec_per_kmer) = 410.45 +iterator (avg_nanosec_per_kmer) = 2.75327 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 1029.77 +negative lookup (avg_nanosec_per_kmer) 843.196 +access (avg_nanosec_per_kmer) = 409.83 +iterator (avg_nanosec_per_kmer) = 2.75382 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash +positive lookup (avg_nanosec_per_kmer) = 1039.86 +negative lookup (avg_nanosec_per_kmer) 848.094 +access (avg_nanosec_per_kmer) = 405.482 +iterator (avg_nanosec_per_kmer) = 2.76706 diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.json new file mode 100644 index 0000000..96ae517 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6828005", "step 2 (compute minimizer tuples)": "2036279", "step 3 (merging minimizer tuples)": "12545471", "step 4 (build mphf)": "6204838", "step 5 (replacing minimizer values with MPHF hashes)": "4732996", "step 6 (merging minimizers tuples)": "11448434", "step 7.1 (build sparse index)": "1650379", "step 7.2 (build skew index)": "20112384", "step 7 (build sparse and skew index)": "22069167", "total_build_time_in_microsec": "65865190", "index_size_in_bytes": "1683000746", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7214793", "step 2 (compute minimizer tuples)": "1494379", "step 3 (merging minimizer tuples)": "5485201", "step 4 (build mphf)": "3570623", "step 5 (replacing minimizer values with MPHF hashes)": "3449854", "step 6 (merging minimizers tuples)": "8223945", "step 7.1 (build sparse index)": "2452503", "step 7.2 (build skew index)": "26460907", "step 7 (build sparse and skew index)": "29195822", "total_build_time_in_microsec": "58634617", "index_size_in_bytes": "1397938738", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.log new file mode 100644 index 0000000..c8fb67f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash +2026-03-12 17:02:23: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.82801 [sec] (2.46381 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.03628 [sec] (0.73477 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.5455 [sec] (4.5269 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.20484 [sec] (2.23895 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331343315676385.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.733 [sec] (1.70785 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.4484 [sec] (4.13105 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3064113/122838669 (2.49442%) +num_buckets_in_skew_index 61280/122838669 (0.0498866%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 11246578/140756047 (7.99012%) +num_minimizer_positions_of_buckets_in_skew_index 9796193/140756047 (6.9597%) +=== step 7.1 (build sparse index): 1.65038 [sec] (0.595522 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 27528004 + partition = 1: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 2: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 3: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 7: num kmers in buckets of size > 4096 and <= 147936: 29551525 +num kmers in skew index = 172986132 (6.24202%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 27528004 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 27528004 kmers; bits/key = 2.5965 + built positions[0] for 27528004 kmers; bits/key = 6.00001 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 25196923 kmers; bits/key = 2.56001 + built positions[1] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 21919654 kmers; bits/key = 2.60671 + built positions[2] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 19634878 kmers; bits/key = 2.55594 + built positions[3] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[4] for 18051454 kmers; bits/key = 2.60116 + built positions[4] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 17018125 kmers; bits/key = 2.58264 + built positions[5] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 14085569 kmers; bits/key = 2.54674 + built positions[6] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 29551525 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 29551525 kmers; bits/key = 2.54872 + built positions[7] for 29551525 kmers; bits/key = 18 +=== step 7.2 (build skew index): 20.1124 [sec] (7.25734 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 22.0692 [sec] (7.96342 [ns/kmer]) +=== total time: 65.8652 [sec] (23.7668 [ns/kmer]) +total index size: 1683000746 [B] -- 1683 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.57725% + strings_offsets: 0.11255 [bits/kmer] -- 2.31664% + control_codewords: 1.46273 [bits/kmer] -- 30.1075% + mid_load_buckets: 0.129863 [bits/kmer] -- 2.67298% + begin_buckets_of_size: 4.0414e-07 [bits/kmer] -- 8.31848e-06% + strings: 2.11826 [bits/kmer] -- 43.6004% + skew_index: 0.909737 [bits/kmer] -- 18.7253% + weights: 5.31156e-07 [bits/kmer] -- 1.09329e-05% + -------------- + total: 4.85834 [bits/kmer] +2026-03-12 17:03:29: saving data structure to disk... +2026-03-12 17:03:29: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash +2026-03-12 17:03:30: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.21479 [sec] (4.73131 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.49438 [sec] (0.979982 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.4852 [sec] (3.59708 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.57062 [sec] (2.34154 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331410140374541.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.44985 [sec] (2.26234 [ns/kmer]) +=== step 6 (merging minimizers tuples): 8.22395 [sec] (5.39309 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9851637/61951224 (15.9022%) +num_buckets_in_skew_index 184616/61951224 (0.298002%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 41331085/105337248 (39.2369%) +num_minimizer_positions_of_buckets_in_skew_index 12091192/105337248 (11.4786%) +=== step 7.1 (build sparse index): 2.4525 [sec] (1.6083 [ns/kmer]) + partition = 0: num kmers in buckets of size > 32 and <= 64: 95113795 + partition = 1: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 2: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 3: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 4: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 5: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 6: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 7: num kmers in buckets of size > 4096 and <= 144478: 10233435 +num kmers in skew index = 170910482 (11.2079%) + lower = 32; upper = 64; num_bits_per_pos = 6; num_kmers_in_partition = 95113795 + building MPHF with 16 threads and 32 partitions (avg. partition size = 3000000)... + built mphs[0] for 95113795 kmers; bits/key = 2.53911 + built positions[0] for 95113795 kmers; bits/key = 6 + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[1] for 32078766 kmers; bits/key = 2.51437 + built positions[1] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 16092632 kmers; bits/key = 2.65398 + built positions[2] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8174536 kmers; bits/key = 2.61889 + built positions[3] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3454318 kmers; bits/key = 3.14686 + built positions[4] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[5] for 2781070 kmers; bits/key = 2.55987 + built positions[5] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[6] for 2981930 kmers; bits/key = 2.55978 + built positions[6] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 10233435 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10233435 kmers; bits/key = 2.5947 + built positions[7] for 10233435 kmers; bits/key = 18 +=== step 7.2 (build skew index): 26.4609 [sec] (17.3525 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 29.1958 [sec] (19.146 [ns/kmer]) +=== total time: 58.6346 [sec] (38.4513 [ns/kmer]) +total index size: 1397938738 [B] -- 1397.94 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.58289% + strings_offsets: 0.274587 [bits/kmer] -- 3.74407% + control_codewords: 1.34067 [bits/kmer] -- 18.2804% + mid_load_buckets: 0.86733 [bits/kmer] -- 11.8263% + begin_buckets_of_size: 7.34472e-07 [bits/kmer] -- 1.00147e-05% + strings: 3.35283 [bits/kmer] -- 45.7168% + skew_index: 1.38241 [bits/kmer] -- 18.8496% + weights: 9.65307e-07 [bits/kmer] -- 1.31622e-05% + -------------- + total: 7.33391 [bits/kmer] +2026-03-12 17:04:28: saving data structure to disk... +2026-03-12 17:04:29: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.time.log new file mode 100644 index 0000000..58500dd --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l5/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l5.sshash" + User time (seconds): 163.20 + System time (seconds): 18.36 + Percent of CPU this job got: 271% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:06.82 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6214132 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6770642 + Voluntary context switches: 2582 + Involuntary context switches: 2849 + Swaps: 0 + File system inputs: 48 + File system outputs: 22478584 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l5.sshash" + User time (seconds): 148.39 + System time (seconds): 15.33 + Percent of CPU this job got: 275% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.42 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8833496 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6289314 + Voluntary context switches: 1968 + Involuntary context switches: 2155 + Swaps: 0 + File system inputs: 48 + File system outputs: 16035080 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.json new file mode 100644 index 0000000..53ba6ad --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "561.761895", "negative lookup (avg_nanosec_per_kmer)": "424.115472", "access (avg_nanosec_per_kmer)": "351.976281", "iterator (avg_nanosec_per_kmer)": "2.486907"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "570.082382", "negative lookup (avg_nanosec_per_kmer)": "424.243833", "access (avg_nanosec_per_kmer)": "355.690804", "iterator (avg_nanosec_per_kmer)": "2.465672"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "567.723526", "negative lookup (avg_nanosec_per_kmer)": "422.522443", "access (avg_nanosec_per_kmer)": "353.906452", "iterator (avg_nanosec_per_kmer)": "2.555958"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "619.628745", "negative lookup (avg_nanosec_per_kmer)": "399.776010", "access (avg_nanosec_per_kmer)": "360.051223", "iterator (avg_nanosec_per_kmer)": "2.413980"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "616.787765", "negative lookup (avg_nanosec_per_kmer)": "401.714490", "access (avg_nanosec_per_kmer)": "367.743021", "iterator (avg_nanosec_per_kmer)": "2.412043"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "620.968041", "negative lookup (avg_nanosec_per_kmer)": "403.240145", "access (avg_nanosec_per_kmer)": "360.098464", "iterator (avg_nanosec_per_kmer)": "2.435878"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.log new file mode 100644 index 0000000..b523f79 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 561.762 +negative lookup (avg_nanosec_per_kmer) 424.115 +access (avg_nanosec_per_kmer) = 351.976 +iterator (avg_nanosec_per_kmer) = 2.48691 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 570.082 +negative lookup (avg_nanosec_per_kmer) 424.244 +access (avg_nanosec_per_kmer) = 355.691 +iterator (avg_nanosec_per_kmer) = 2.46567 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 567.724 +negative lookup (avg_nanosec_per_kmer) 422.522 +access (avg_nanosec_per_kmer) = 353.906 +iterator (avg_nanosec_per_kmer) = 2.55596 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 619.629 +negative lookup (avg_nanosec_per_kmer) 399.776 +access (avg_nanosec_per_kmer) = 360.051 +iterator (avg_nanosec_per_kmer) = 2.41398 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 616.788 +negative lookup (avg_nanosec_per_kmer) 401.714 +access (avg_nanosec_per_kmer) = 367.743 +iterator (avg_nanosec_per_kmer) = 2.41204 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 620.968 +negative lookup (avg_nanosec_per_kmer) 403.24 +access (avg_nanosec_per_kmer) = 360.098 +iterator (avg_nanosec_per_kmer) = 2.43588 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.json new file mode 100644 index 0000000..cdb3844 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7250790", "step 2 (compute minimizer tuples)": "5021499", "step 3 (merging minimizer tuples)": "27678900", "step 4 (build mphf)": "22803076", "step 5 (replacing minimizer values with MPHF hashes)": "18827187", "step 6 (merging minimizers tuples)": "71171488", "step 7.1 (build sparse index)": "6555898", "step 7.2 (build skew index)": "8756677", "step 7 (build sparse and skew index)": "16499306", "total_build_time_in_microsec": "169252246", "index_size_in_bytes": "3135788878", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4554258", "step 2 (compute minimizer tuples)": "2060701", "step 3 (merging minimizer tuples)": "10212175", "step 4 (build mphf)": "7110734", "step 5 (replacing minimizer values with MPHF hashes)": "6730295", "step 6 (merging minimizers tuples)": "29236509", "step 7.1 (build sparse index)": "4069811", "step 7.2 (build skew index)": "3719890", "step 7 (build sparse and skew index)": "8298556", "total_build_time_in_microsec": "68203228", "index_size_in_bytes": "1287008500", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.log new file mode 100644 index 0000000..11248d4 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.log @@ -0,0 +1,301 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash --canonical +2026-03-12 17:13:47: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.25079 [sec] (2.89374 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 5.0215 [sec] (2.00405 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 27.6789 [sec] (11.0465 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 22.8031 [sec] (9.10056 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.8272 [sec] (7.51381 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332028000180071.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 71.1715 [sec] (28.4041 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14266506/462224926 (3.08649%) +num_buckets_in_skew_index 60557/462224926 (0.0131012%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 53165974/511201278 (10.4002%) +num_minimizer_positions_of_buckets_in_skew_index 10137441/511201278 (1.98306%) +=== step 7.1 (build sparse index): 6.5559 [sec] (2.61642 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 2: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 7: num kmers in buckets of size > 8192 and <= 22085: 506148 +num kmers in skew index = 43578595 (1.73919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 14957205 kmers; bits/key = 2.56583 + built positions[0] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10906495 kmers; bits/key = 2.61744 + built positions[1] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 7473094 kmers; bits/key = 2.65359 + built positions[2] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 4774535 kmers; bits/key = 2.75085 + built positions[3] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[4] for 2638087 kmers; bits/key = 2.55989 + built positions[4] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[5] for 1593261 kmers; bits/key = 2.56041 + built positions[5] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 729770)... + built mphs[6] for 729770 kmers; bits/key = 2.56195 + built positions[6] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 506148 + building MPHF with 16 threads and 1 partitions (avg. partition size = 506148)... + built mphs[7] for 506148 kmers; bits/key = 2.42048 + built positions[7] for 506148 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 8.75668 [sec] (3.49473 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 16.4993 [sec] (6.58477 [ns/kmer]) +=== total time: 169.252 [sec] (67.5475 [ns/kmer]) +total index size: 3135788878 [B] -- 3135.79 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.2262% + strings_offsets: 0.153147 [bits/kmer] -- 1.52966% + control_codewords: 6.08754 [bits/kmer] -- 60.8038% + mid_load_buckets: 0.678982 [bits/kmer] -- 6.78183% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 8.54649e-06% + strings: 2.24545 [bits/kmer] -- 22.4281% + skew_index: 0.323422 [bits/kmer] -- 3.23041% + weights: 5.87466e-07 [bits/kmer] -- 5.86774e-06% + -------------- + total: 10.0118 [bits/kmer] +2026-03-12 17:16:37: saving data structure to disk... +2026-03-12 17:16:38: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash --canonical +2026-03-12 17:16:38: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55426 [sec] (5.09248 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.0607 [sec] (2.30424 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.2122 [sec] (11.4191 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.11073 [sec] (7.95108 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332198982214565.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.73029 [sec] (7.52568 [ns/kmer]) +=== step 6 (merging minimizers tuples): 29.2365 [sec] (32.6917 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 18123968/143418843 (12.6371%) +num_buckets_in_skew_index 15172/143418843 (0.0105788%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 65664193/193511241 (33.933%) +num_minimizer_positions_of_buckets_in_skew_index 2567345/193511241 (1.32672%) +=== step 7.1 (build sparse index): 4.06981 [sec] (4.55078 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 7: num kmers in buckets of size > 8192 and <= 30655: 459571 +num kmers in skew index = 9919638 (1.10919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4240400 kmers; bits/key = 2.84282 + built positions[0] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[1] for 1914946 kmers; bits/key = 2.56017 + built positions[1] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[2] for 1106165 kmers; bits/key = 2.56093 + built positions[2] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[3] for 771672 kmers; bits/key = 2.41893 + built positions[3] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[4] for 562721 kmers; bits/key = 2.56295 + built positions[4] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 16 threads and 1 partitions (avg. partition size = 475654)... + built mphs[5] for 475654 kmers; bits/key = 2.56365 + built positions[5] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388509)... + built mphs[6] for 388509 kmers; bits/key = 2.42185 + built positions[6] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 459571 + building MPHF with 16 threads and 1 partitions (avg. partition size = 459571)... + built mphs[7] for 459571 kmers; bits/key = 2.4208 + built positions[7] for 459571 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 3.71989 [sec] (4.15951 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 8.29856 [sec] (9.27928 [ns/kmer]) +=== total time: 68.2032 [sec] (76.2635 [ns/kmer]) +total index size: 1287008500 [B] -- 1287.01 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.93723% + strings_offsets: 0.333373 [bits/kmer] -- 2.89565% + control_codewords: 5.13178 [bits/kmer] -- 44.5743% + mid_load_buckets: 2.27616 [bits/kmer] -- 19.7706% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.08235e-05% + strings: 3.10303 [bits/kmer] -- 26.9527% + skew_index: 0.215227 [bits/kmer] -- 1.86944% + weights: 1.64596e-06 [bits/kmer] -- 1.42967e-05% + -------------- + total: 11.5129 [bits/kmer] +2026-03-12 17:17:47: saving data structure to disk... +2026-03-12 17:17:47: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.time.log new file mode 100644 index 0000000..d94589d --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.canon.sshash --canonical" + User time (seconds): 376.75 + System time (seconds): 58.47 + Percent of CPU this job got: 254% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:50.97 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18414820 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16183869 + Voluntary context switches: 34490 + Involuntary context switches: 20189 + Swaps: 0 + File system inputs: 232 + File system outputs: 97182688 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.canon.sshash --canonical" + User time (seconds): 97.41 + System time (seconds): 20.76 + Percent of CPU this job got: 171% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:08.91 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7633168 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6437162 + Voluntary context switches: 2467 + Involuntary context switches: 2147 + Swaps: 0 + File system inputs: 72 + File system outputs: 28721376 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.json new file mode 100644 index 0000000..925a8a3 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "755.213645", "negative lookup (avg_nanosec_per_kmer)": "791.863355", "access (avg_nanosec_per_kmer)": "349.324706", "iterator (avg_nanosec_per_kmer)": "2.499605"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "748.032804", "negative lookup (avg_nanosec_per_kmer)": "795.887306", "access (avg_nanosec_per_kmer)": "351.370614", "iterator (avg_nanosec_per_kmer)": "2.540400"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "757.396150", "negative lookup (avg_nanosec_per_kmer)": "796.232962", "access (avg_nanosec_per_kmer)": "353.669200", "iterator (avg_nanosec_per_kmer)": "2.501229"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "777.552634", "negative lookup (avg_nanosec_per_kmer)": "745.298506", "access (avg_nanosec_per_kmer)": "371.286293", "iterator (avg_nanosec_per_kmer)": "2.402319"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "771.004138", "negative lookup (avg_nanosec_per_kmer)": "741.472753", "access (avg_nanosec_per_kmer)": "358.822213", "iterator (avg_nanosec_per_kmer)": "2.415725"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "777.188423", "negative lookup (avg_nanosec_per_kmer)": "740.436991", "access (avg_nanosec_per_kmer)": "358.290822", "iterator (avg_nanosec_per_kmer)": "2.424651"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.log new file mode 100644 index 0000000..ace70d1 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 755.214 +negative lookup (avg_nanosec_per_kmer) 791.863 +access (avg_nanosec_per_kmer) = 349.325 +iterator (avg_nanosec_per_kmer) = 2.4996 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 748.033 +negative lookup (avg_nanosec_per_kmer) 795.887 +access (avg_nanosec_per_kmer) = 351.371 +iterator (avg_nanosec_per_kmer) = 2.5404 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 757.396 +negative lookup (avg_nanosec_per_kmer) 796.233 +access (avg_nanosec_per_kmer) = 353.669 +iterator (avg_nanosec_per_kmer) = 2.50123 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 777.553 +negative lookup (avg_nanosec_per_kmer) 745.299 +access (avg_nanosec_per_kmer) = 371.286 +iterator (avg_nanosec_per_kmer) = 2.40232 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 771.004 +negative lookup (avg_nanosec_per_kmer) 741.473 +access (avg_nanosec_per_kmer) = 358.822 +iterator (avg_nanosec_per_kmer) = 2.41573 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 777.188 +negative lookup (avg_nanosec_per_kmer) 740.437 +access (avg_nanosec_per_kmer) = 358.291 +iterator (avg_nanosec_per_kmer) = 2.42465 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.json new file mode 100644 index 0000000..71f5ee5 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7233489", "step 2 (compute minimizer tuples)": "3545775", "step 3 (merging minimizer tuples)": "20699256", "step 4 (build mphf)": "19003689", "step 5 (replacing minimizer values with MPHF hashes)": "15042817", "step 6 (merging minimizers tuples)": "42715818", "step 7.1 (build sparse index)": "4953084", "step 7.2 (build skew index)": "6909115", "step 7 (build sparse and skew index)": "12766208", "total_build_time_in_microsec": "121007052", "index_size_in_bytes": "2716053224", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4582946", "step 2 (compute minimizer tuples)": "1373532", "step 3 (merging minimizer tuples)": "8073197", "step 4 (build mphf)": "6293087", "step 5 (replacing minimizer values with MPHF hashes)": "5483252", "step 6 (merging minimizers tuples)": "25110500", "step 7.1 (build sparse index)": "3161821", "step 7.2 (build skew index)": "3523407", "step 7 (build sparse and skew index)": "7093976", "total_build_time_in_microsec": "58010490", "index_size_in_bytes": "1137030140", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.log new file mode 100644 index 0000000..51458d9 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.log @@ -0,0 +1,286 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash +2026-03-12 17:09:39: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23349 [sec] (2.88684 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.54577 [sec] (1.4151 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.6993 [sec] (8.26094 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 19.0037 [sec] (7.58425 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331779475761794.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.0428 [sec] (6.00349 [ns/kmer]) +=== step 6 (merging minimizers tuples): 42.7158 [sec] (17.0476 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10816752/386687326 (2.79729%) +num_buckets_in_skew_index 42372/386687326 (0.0109577%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 40422973/423023926 (9.55572%) +num_minimizer_positions_of_buckets_in_skew_index 6772751/423023926 (1.60103%) +=== step 7.1 (build sparse index): 4.95308 [sec] (1.97674 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 1: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 2: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 7: num kmers in buckets of size > 8192 and <= 22972: 207510 +num kmers in skew index = 32063746 (1.27964%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 11807213 kmers; bits/key = 2.55841 + built positions[0] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 8389556 kmers; bits/key = 2.61352 + built positions[1] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 5343660 kmers; bits/key = 2.66261 + built positions[2] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3076413 kmers; bits/key = 3.34298 + built positions[3] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[4] for 1855446 kmers; bits/key = 2.56022 + built positions[4] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[5] for 1008178 kmers; bits/key = 2.41829 + built positions[5] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 375770)... + built mphs[6] for 375770 kmers; bits/key = 2.42182 + built positions[6] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 207510 + building MPHF with 16 threads and 1 partitions (avg. partition size = 207510)... + built mphs[7] for 207510 kmers; bits/key = 2.42618 + built positions[7] for 207510 kmers; bits/key = 15.0018 +=== step 7.2 (build skew index): 6.90911 [sec] (2.75738 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 12.7662 [sec] (5.09491 [ns/kmer]) +=== total time: 121.007 [sec] (48.2931 [ns/kmer]) +total index size: 2716053224 [B] -- 2716.05 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.03015% + strings_offsets: 0.153147 [bits/kmer] -- 1.76606% + control_codewords: 5.0927 [bits/kmer] -- 58.7281% + mid_load_buckets: 0.516242 [bits/kmer] -- 5.95319% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.86726e-06% + strings: 2.24545 [bits/kmer] -- 25.8941% + skew_index: 0.227926 [bits/kmer] -- 2.6284% + weights: 5.87466e-07 [bits/kmer] -- 6.77454e-06% + -------------- + total: 8.67167 [bits/kmer] +2026-03-12 17:11:40: saving data structure to disk... +2026-03-12 17:11:41: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash +2026-03-12 17:11:41: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.58295 [sec] (5.12456 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.37353 [sec] (1.53586 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.0732 [sec] (9.02729 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.29309 [sec] (7.03681 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773331901926084001.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.48325 [sec] (6.13126 [ns/kmer]) +=== step 6 (merging minimizers tuples): 25.1105 [sec] (28.0781 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14059268/126246665 (11.1363%) +num_buckets_in_skew_index 8266/126246665 (0.0065475%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48164669/162006751 (29.73%) +num_minimizer_positions_of_buckets_in_skew_index 1662951/162006751 (1.02647%) +=== step 7.1 (build sparse index): 3.16182 [sec] (3.53549 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 2: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 7: num kmers in buckets of size > 8192 and <= 36894: 388502 +num kmers in skew index = 6466768 (0.723101%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[0] for 2254325 kmers; bits/key = 2.56001 + built positions[0] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[1] for 1183762 kmers; bits/key = 2.56081 + built positions[1] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[2] for 885561 kmers; bits/key = 2.56147 + built positions[2] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[3] for 591648 kmers; bits/key = 2.56263 + built positions[3] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[4] for 450833 kmers; bits/key = 2.42098 + built positions[4] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 16 threads and 1 partitions (avg. partition size = 373731)... + built mphs[5] for 373731 kmers; bits/key = 2.56484 + built positions[5] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 16 threads and 1 partitions (avg. partition size = 338406)... + built mphs[6] for 338406 kmers; bits/key = 2.4226 + built positions[6] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 388502 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388502)... + built mphs[7] for 388502 kmers; bits/key = 2.56472 + built positions[7] for 388502 kmers; bits/key = 16.0009 +=== step 7.2 (build skew index): 3.52341 [sec] (3.9398 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 7.09398 [sec] (7.93234 [ns/kmer]) +=== total time: 58.0105 [sec] (64.8662 [ns/kmer]) +total index size: 1137030140 [B] -- 1137.03 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.98871% + strings_offsets: 0.333373 [bits/kmer] -- 3.2776% + control_codewords: 4.51733 [bits/kmer] -- 44.4128% + mid_load_buckets: 1.66956 [bits/kmer] -- 16.4145% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.35702e-05% + strings: 3.10303 [bits/kmer] -- 30.5079% + skew_index: 0.142237 [bits/kmer] -- 1.39842% + weights: 1.64596e-06 [bits/kmer] -- 1.61825e-05% + -------------- + total: 10.1712 [bits/kmer] +2026-03-12 17:12:39: saving data structure to disk... +2026-03-12 17:12:40: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.time.log new file mode 100644 index 0000000..d725e97 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l6.sshash" + User time (seconds): 280.76 + System time (seconds): 44.19 + Percent of CPU this job got: 265% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:02.44 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16072008 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13585156 + Voluntary context switches: 27376 + Involuntary context switches: 9373 + Swaps: 0 + File system inputs: 160 + File system outputs: 63600712 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l6.sshash" + User time (seconds): 77.89 + System time (seconds): 17.46 + Percent of CPU this job got: 162% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:58.61 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6410196 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5330498 + Voluntary context switches: 2645 + Involuntary context switches: 1889 + Swaps: 0 + File system inputs: 72 + File system outputs: 23782128 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.json new file mode 100644 index 0000000..08e5f35 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "663.620155", "negative lookup (avg_nanosec_per_kmer)": "511.175862", "access (avg_nanosec_per_kmer)": "353.949270", "iterator (avg_nanosec_per_kmer)": "2.714958"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "653.999464", "negative lookup (avg_nanosec_per_kmer)": "514.211697", "access (avg_nanosec_per_kmer)": "355.109239", "iterator (avg_nanosec_per_kmer)": "2.734072"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "652.549549", "negative lookup (avg_nanosec_per_kmer)": "509.840616", "access (avg_nanosec_per_kmer)": "356.907395", "iterator (avg_nanosec_per_kmer)": "2.714895"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "975.508244", "negative lookup (avg_nanosec_per_kmer)": "494.869437", "access (avg_nanosec_per_kmer)": "401.942954", "iterator (avg_nanosec_per_kmer)": "2.782899"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "970.380133", "negative lookup (avg_nanosec_per_kmer)": "499.291625", "access (avg_nanosec_per_kmer)": "401.910198", "iterator (avg_nanosec_per_kmer)": "2.751313"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "965.885490", "negative lookup (avg_nanosec_per_kmer)": "493.409972", "access (avg_nanosec_per_kmer)": "406.213662", "iterator (avg_nanosec_per_kmer)": "2.833440"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.log new file mode 100644 index 0000000..da5bb91 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 663.62 +negative lookup (avg_nanosec_per_kmer) 511.176 +access (avg_nanosec_per_kmer) = 353.949 +iterator (avg_nanosec_per_kmer) = 2.71496 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 653.999 +negative lookup (avg_nanosec_per_kmer) 514.212 +access (avg_nanosec_per_kmer) = 355.109 +iterator (avg_nanosec_per_kmer) = 2.73407 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 652.55 +negative lookup (avg_nanosec_per_kmer) 509.841 +access (avg_nanosec_per_kmer) = 356.907 +iterator (avg_nanosec_per_kmer) = 2.7149 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 975.508 +negative lookup (avg_nanosec_per_kmer) 494.869 +access (avg_nanosec_per_kmer) = 401.943 +iterator (avg_nanosec_per_kmer) = 2.7829 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 970.38 +negative lookup (avg_nanosec_per_kmer) 499.292 +access (avg_nanosec_per_kmer) = 401.91 +iterator (avg_nanosec_per_kmer) = 2.75131 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 965.885 +negative lookup (avg_nanosec_per_kmer) 493.41 +access (avg_nanosec_per_kmer) = 406.214 +iterator (avg_nanosec_per_kmer) = 2.83344 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.json new file mode 100644 index 0000000..42fa182 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6839599", "step 2 (compute minimizer tuples)": "3491978", "step 3 (merging minimizer tuples)": "12783491", "step 4 (build mphf)": "7756104", "step 5 (replacing minimizer values with MPHF hashes)": "6184212", "step 6 (merging minimizers tuples)": "21981409", "step 7.1 (build sparse index)": "2224041", "step 7.2 (build skew index)": "20260695", "step 7 (build sparse and skew index)": "22870864", "total_build_time_in_microsec": "81907657", "index_size_in_bytes": "1839839296", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7203403", "step 2 (compute minimizer tuples)": "2538455", "step 3 (merging minimizer tuples)": "12638198", "step 4 (build mphf)": "3797248", "step 5 (replacing minimizer values with MPHF hashes)": "4113302", "step 6 (merging minimizers tuples)": "10491494", "step 7.1 (build sparse index)": "3086020", "step 7.2 (build skew index)": "23193958", "step 7 (build sparse and skew index)": "26628915", "total_build_time_in_microsec": "67411015", "index_size_in_bytes": "1481048960", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.log new file mode 100644 index 0000000..0c251ed --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash --canonical +2026-03-12 17:22:02: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.8396 [sec] (2.468 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.49198 [sec] (1.26004 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.7835 [sec] (4.61279 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.7561 [sec] (2.79871 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332522629501569.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.18421 [sec] (2.23151 [ns/kmer]) +=== step 6 (merging minimizers tuples): 21.9814 [sec] (7.93176 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4082749/149769567 (2.72602%) +num_buckets_in_skew_index 35781/149769567 (0.0238907%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 16495406/173272792 (9.51991%) +num_minimizer_positions_of_buckets_in_skew_index 11126349/173272792 (6.42129%) +=== step 7.1 (build sparse index): 2.22404 [sec] (0.802522 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 1: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 2: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 7: num kmers in buckets of size > 8192 and <= 284250: 28690575 +num kmers in skew index = 166030448 (5.99103%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 26742724 kmers; bits/key = 2.56429 + built positions[0] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 24475836 kmers; bits/key = 2.62316 + built positions[1] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 21113117 kmers; bits/key = 2.6904 + built positions[2] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 19260150 kmers; bits/key = 2.59757 + built positions[3] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17989259 kmers; bits/key = 2.53715 + built positions[4] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 15443443 kmers; bits/key = 2.69254 + built positions[5] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 12315344 kmers; bits/key = 2.67895 + built positions[6] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 28690575 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 28690575 kmers; bits/key = 2.55295 + built positions[7] for 28690575 kmers; bits/key = 19 +=== step 7.2 (build skew index): 20.2607 [sec] (7.31086 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 22.8709 [sec] (8.25271 [ns/kmer]) +=== total time: 81.9077 [sec] (29.5555 [ns/kmer]) +total index size: 1839839296 [B] -- 1839.84 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.87406% + strings_offsets: 0.11255 [bits/kmer] -- 2.11915% + control_codewords: 1.78341 [bits/kmer] -- 33.579% + mid_load_buckets: 0.19047 [bits/kmer] -- 3.58627% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.45665e-05% + strings: 2.11826 [bits/kmer] -- 39.8836% + skew_index: 0.95376 [bits/kmer] -- 17.9579% + weights: 5.31156e-07 [bits/kmer] -- 1.00009e-05% + -------------- + total: 5.31109 [bits/kmer] +2026-03-12 17:23:24: saving data structure to disk... +2026-03-12 17:23:25: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash --canonical +2026-03-12 17:23:25: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.2034 [sec] (4.72384 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.53845 [sec] (1.66467 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.6382 [sec] (8.28786 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.79725 [sec] (2.49016 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332605555170764.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.1133 [sec] (2.69742 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.4915 [sec] (6.8801 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 12190335/69577229 (17.5206%) +num_buckets_in_skew_index 86973/69577229 (0.125002%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 57511599/126350163 (45.5176%) +num_minimizer_positions_of_buckets_in_skew_index 11538643/126350163 (9.13227%) +=== step 7.1 (build sparse index): 3.08602 [sec] (2.02375 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 7: num kmers in buckets of size > 8192 and <= 245177: 10756294 +num kmers in skew index = 143622784 (9.41848%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 16 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[0] for 77399356 kmers; bits/key = 2.54863 + built positions[0] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26776297 kmers; bits/key = 2.54559 + built positions[1] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13974034 kmers; bits/key = 2.56375 + built positions[2] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6504754 kmers; bits/key = 2.78902 + built positions[3] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3263868 kmers; bits/key = 3.30619 + built positions[4] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[5] for 2527517 kmers; bits/key = 2.55992 + built positions[5] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[6] for 2420664 kmers; bits/key = 2.55997 + built positions[6] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 10756294 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10756294 kmers; bits/key = 2.64819 + built positions[7] for 10756294 kmers; bits/key = 18 +=== step 7.2 (build skew index): 23.194 [sec] (15.2101 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 26.6289 [sec] (17.4627 [ns/kmer]) +=== total time: 67.411 [sec] (44.2067 [ns/kmer]) +total index size: 1481048960 [B] -- 1481.05 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.7032% + strings_offsets: 0.274587 [bits/kmer] -- 3.53397% + control_codewords: 1.5057 [bits/kmer] -- 19.3786% + mid_load_buckets: 1.20688 [bits/kmer] -- 15.5327% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.80953e-05% + strings: 3.35283 [bits/kmer] -- 43.1513% + skew_index: 1.2976 [bits/kmer] -- 16.7002% + weights: 9.65307e-07 [bits/kmer] -- 1.24236e-05% + -------------- + total: 7.76993 [bits/kmer] +2026-03-12 17:24:32: saving data structure to disk... +2026-03-12 17:24:33: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.time.log new file mode 100644 index 0000000..3b099ad --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.canon.sshash --canonical" + User time (seconds): 189.77 + System time (seconds): 22.03 + Percent of CPU this job got: 255% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:22.92 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7396956 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7927096 + Voluntary context switches: 2228 + Involuntary context switches: 2910 + Swaps: 0 + File system inputs: 80 + File system outputs: 28147472 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.canon.sshash --canonical" + User time (seconds): 149.21 + System time (seconds): 17.24 + Percent of CPU this job got: 243% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:08.25 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8151536 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6721856 + Voluntary context switches: 2589 + Involuntary context switches: 2367 + Swaps: 0 + File system inputs: 64 + File system outputs: 19280104 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.json new file mode 100644 index 0000000..7f281fd --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "829.975591", "negative lookup (avg_nanosec_per_kmer)": "862.257955", "access (avg_nanosec_per_kmer)": "356.048627", "iterator (avg_nanosec_per_kmer)": "2.728972"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "829.677400", "negative lookup (avg_nanosec_per_kmer)": "863.940807", "access (avg_nanosec_per_kmer)": "359.940523", "iterator (avg_nanosec_per_kmer)": "2.716083"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "831.392323", "negative lookup (avg_nanosec_per_kmer)": "866.251606", "access (avg_nanosec_per_kmer)": "357.273991", "iterator (avg_nanosec_per_kmer)": "2.712411"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1097.495626", "negative lookup (avg_nanosec_per_kmer)": "841.972457", "access (avg_nanosec_per_kmer)": "418.475003", "iterator (avg_nanosec_per_kmer)": "2.907694"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1102.100809", "negative lookup (avg_nanosec_per_kmer)": "842.347128", "access (avg_nanosec_per_kmer)": "408.866337", "iterator (avg_nanosec_per_kmer)": "2.767597"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1095.947192", "negative lookup (avg_nanosec_per_kmer)": "833.498418", "access (avg_nanosec_per_kmer)": "404.094169", "iterator (avg_nanosec_per_kmer)": "2.836015"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.log new file mode 100644 index 0000000..2eef06b --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 829.976 +negative lookup (avg_nanosec_per_kmer) 862.258 +access (avg_nanosec_per_kmer) = 356.049 +iterator (avg_nanosec_per_kmer) = 2.72897 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 829.677 +negative lookup (avg_nanosec_per_kmer) 863.941 +access (avg_nanosec_per_kmer) = 359.941 +iterator (avg_nanosec_per_kmer) = 2.71608 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 831.392 +negative lookup (avg_nanosec_per_kmer) 866.252 +access (avg_nanosec_per_kmer) = 357.274 +iterator (avg_nanosec_per_kmer) = 2.71241 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 1097.5 +negative lookup (avg_nanosec_per_kmer) 841.972 +access (avg_nanosec_per_kmer) = 418.475 +iterator (avg_nanosec_per_kmer) = 2.90769 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 1102.1 +negative lookup (avg_nanosec_per_kmer) 842.347 +access (avg_nanosec_per_kmer) = 408.866 +iterator (avg_nanosec_per_kmer) = 2.7676 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash +positive lookup (avg_nanosec_per_kmer) = 1095.95 +negative lookup (avg_nanosec_per_kmer) 833.498 +access (avg_nanosec_per_kmer) = 404.094 +iterator (avg_nanosec_per_kmer) = 2.83601 diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.json new file mode 100644 index 0000000..744a74d --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6816925", "step 2 (compute minimizer tuples)": "2039383", "step 3 (merging minimizer tuples)": "6333331", "step 4 (build mphf)": "6212807", "step 5 (replacing minimizer values with MPHF hashes)": "4791044", "step 6 (merging minimizers tuples)": "11668938", "step 7.1 (build sparse index)": "1688575", "step 7.2 (build skew index)": "17107834", "step 7 (build sparse and skew index)": "19097895", "total_build_time_in_microsec": "56960323", "index_size_in_bytes": "1647878160", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7230332", "step 2 (compute minimizer tuples)": "1497694", "step 3 (merging minimizer tuples)": "5522549", "step 4 (build mphf)": "3579811", "step 5 (replacing minimizer values with MPHF hashes)": "3431023", "step 6 (merging minimizers tuples)": "16528877", "step 7.1 (build sparse index)": "2436113", "step 7.2 (build skew index)": "13221766", "step 7 (build sparse and skew index)": "15953878", "total_build_time_in_microsec": "53744164", "index_size_in_bytes": "1294767218", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.log new file mode 100644 index 0000000..0f1cde5 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash +2026-03-12 17:18:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81693 [sec] (2.45982 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.03938 [sec] (0.73589 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.33333 [sec] (2.28532 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.21281 [sec] (2.24183 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332333520383383.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.79104 [sec] (1.7288 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.6689 [sec] (4.21061 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3097190/122838669 (2.52135%) +num_buckets_in_skew_index 28203/122838669 (0.0229594%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12724460/140756047 (9.04008%) +num_minimizer_positions_of_buckets_in_skew_index 8318311/140756047 (5.90974%) +=== step 7.1 (build sparse index): 1.68857 [sec] (0.609304 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 7: num kmers in buckets of size > 8192 and <= 147936: 20255122 +num kmers in skew index = 145458128 (5.2487%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 25196923 kmers; bits/key = 2.56001 + built positions[0] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21919654 kmers; bits/key = 2.60671 + built positions[1] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19634878 kmers; bits/key = 2.55594 + built positions[2] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 18051454 kmers; bits/key = 2.60116 + built positions[3] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17018125 kmers; bits/key = 2.58264 + built positions[4] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 14085569 kmers; bits/key = 2.54674 + built positions[5] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9296403 kmers; bits/key = 2.72206 + built positions[6] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 20255122 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 20255122 kmers; bits/key = 2.57505 + built positions[7] for 20255122 kmers; bits/key = 18 +=== step 7.2 (build skew index): 17.1078 [sec] (6.17318 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 19.0979 [sec] (6.89127 [ns/kmer]) +=== total time: 56.9603 [sec] (20.5535 [ns/kmer]) +total index size: 1647878160 [B] -- 1647.88 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.63218% + strings_offsets: 0.11255 [bits/kmer] -- 2.36601% + control_codewords: 1.46273 [bits/kmer] -- 30.7492% + mid_load_buckets: 0.146928 [bits/kmer] -- 3.08869% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.62633e-05% + strings: 2.11826 [bits/kmer] -- 44.5296% + skew_index: 0.791283 [bits/kmer] -- 16.6342% + weights: 5.31156e-07 [bits/kmer] -- 1.11659e-05% + -------------- + total: 4.75695 [bits/kmer] +2026-03-12 17:19:50: saving data structure to disk... +2026-03-12 17:19:51: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash +2026-03-12 17:19:51: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23033 [sec] (4.7415 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.49769 [sec] (0.982156 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.52255 [sec] (3.62157 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.57981 [sec] (2.34756 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332391407095420.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.43102 [sec] (2.24999 [ns/kmer]) +=== step 6 (merging minimizers tuples): 16.5289 [sec] (10.8393 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9999764/61951224 (16.1414%) +num_buckets_in_skew_index 36489/61951224 (0.0588996%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47622255/105337248 (45.2093%) +num_minimizer_positions_of_buckets_in_skew_index 5800022/105337248 (5.50615%) +=== step 7.1 (build sparse index): 2.43611 [sec] (1.59755 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 2: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 7: num kmers in buckets of size > 8192 and <= 144478: 7418130 +num kmers in skew index = 75796687 (4.97059%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 32078766 kmers; bits/key = 2.51437 + built positions[0] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16092632 kmers; bits/key = 2.65398 + built positions[1] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8174536 kmers; bits/key = 2.61889 + built positions[2] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3454318 kmers; bits/key = 3.14686 + built positions[3] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[4] for 2781070 kmers; bits/key = 2.55987 + built positions[4] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[5] for 2981930 kmers; bits/key = 2.55978 + built positions[5] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[6] for 2815305 kmers; bits/key = 2.55984 + built positions[6] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 7418130 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 7418130 kmers; bits/key = 2.72795 + built positions[7] for 7418130 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 13.2218 [sec] (8.67056 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 15.9539 [sec] (10.4622 [ns/kmer]) +=== total time: 53.7442 [sec] (35.2443 [ns/kmer]) +total index size: 1294767218 [B] -- 1294.77 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.70902% + strings_offsets: 0.274587 [bits/kmer] -- 4.04242% + control_codewords: 1.34067 [bits/kmer] -- 19.737% + mid_load_buckets: 0.99935 [bits/kmer] -- 14.7122% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.06987e-05% + strings: 3.35283 [bits/kmer] -- 49.3596% + skew_index: 0.709128 [bits/kmer] -- 10.4396% + weights: 9.65307e-07 [bits/kmer] -- 1.4211e-05% + -------------- + total: 6.79265 [bits/kmer] +2026-03-12 17:20:45: saving data structure to disk... +2026-03-12 17:20:45: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.time.log new file mode 100644 index 0000000..25032c7 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l6/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l6.sshash" + User time (seconds): 146.61 + System time (seconds): 17.90 + Percent of CPU this job got: 284% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:57.88 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6205432 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6339599 + Voluntary context switches: 2212 + Involuntary context switches: 2688 + Swaps: 0 + File system inputs: 88 + File system outputs: 22409992 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l6.sshash" + User time (seconds): 87.80 + System time (seconds): 12.99 + Percent of CPU this job got: 185% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:54.48 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4943624 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4686473 + Voluntary context switches: 1946 + Involuntary context switches: 1905 + Swaps: 0 + File system inputs: 40 + File system outputs: 15833552 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.json new file mode 100644 index 0000000..05b35bf --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "571.221345", "negative lookup (avg_nanosec_per_kmer)": "420.102449", "access (avg_nanosec_per_kmer)": "353.218657", "iterator (avg_nanosec_per_kmer)": "2.526239"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "570.081601", "negative lookup (avg_nanosec_per_kmer)": "424.509480", "access (avg_nanosec_per_kmer)": "352.646825", "iterator (avg_nanosec_per_kmer)": "2.466853"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "572.676359", "negative lookup (avg_nanosec_per_kmer)": "422.447721", "access (avg_nanosec_per_kmer)": "350.743420", "iterator (avg_nanosec_per_kmer)": "2.492212"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "628.943813", "negative lookup (avg_nanosec_per_kmer)": "406.683978", "access (avg_nanosec_per_kmer)": "357.114238", "iterator (avg_nanosec_per_kmer)": "2.407449"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "623.637380", "negative lookup (avg_nanosec_per_kmer)": "400.798925", "access (avg_nanosec_per_kmer)": "362.159687", "iterator (avg_nanosec_per_kmer)": "2.397998"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "631.699977", "negative lookup (avg_nanosec_per_kmer)": "398.553714", "access (avg_nanosec_per_kmer)": "356.954086", "iterator (avg_nanosec_per_kmer)": "2.400431"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.log new file mode 100644 index 0000000..a803481 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 571.221 +negative lookup (avg_nanosec_per_kmer) 420.102 +access (avg_nanosec_per_kmer) = 353.219 +iterator (avg_nanosec_per_kmer) = 2.52624 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 570.082 +negative lookup (avg_nanosec_per_kmer) 424.509 +access (avg_nanosec_per_kmer) = 352.647 +iterator (avg_nanosec_per_kmer) = 2.46685 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 572.676 +negative lookup (avg_nanosec_per_kmer) 422.448 +access (avg_nanosec_per_kmer) = 350.743 +iterator (avg_nanosec_per_kmer) = 2.49221 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 628.944 +negative lookup (avg_nanosec_per_kmer) 406.684 +access (avg_nanosec_per_kmer) = 357.114 +iterator (avg_nanosec_per_kmer) = 2.40745 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 623.637 +negative lookup (avg_nanosec_per_kmer) 400.799 +access (avg_nanosec_per_kmer) = 362.16 +iterator (avg_nanosec_per_kmer) = 2.398 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 631.7 +negative lookup (avg_nanosec_per_kmer) 398.554 +access (avg_nanosec_per_kmer) = 356.954 +iterator (avg_nanosec_per_kmer) = 2.40043 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.json new file mode 100644 index 0000000..5ff39fd --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7243840", "step 2 (compute minimizer tuples)": "4997870", "step 3 (merging minimizer tuples)": "27705593", "step 4 (build mphf)": "23119700", "step 5 (replacing minimizer values with MPHF hashes)": "18874964", "step 6 (merging minimizers tuples)": "71657036", "step 7.1 (build sparse index)": "6520822", "step 7.2 (build skew index)": "6393544", "step 7 (build sparse and skew index)": "14075797", "total_build_time_in_microsec": "167674800", "index_size_in_bytes": "3117848620", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4585383", "step 2 (compute minimizer tuples)": "2055949", "step 3 (merging minimizer tuples)": "10195227", "step 4 (build mphf)": "7075808", "step 5 (replacing minimizer values with MPHF hashes)": "6739323", "step 6 (merging minimizers tuples)": "29009906", "step 7.1 (build sparse index)": "4218709", "step 7.2 (build skew index)": "3109976", "step 7 (build sparse and skew index)": "7837433", "total_build_time_in_microsec": "67499029", "index_size_in_bytes": "1299687230", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.log new file mode 100644 index 0000000..548caf2 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.log @@ -0,0 +1,301 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash --canonical +2026-03-12 17:29:54: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.24384 [sec] (2.89097 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 4.99787 [sec] (1.99462 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 27.7056 [sec] (11.0571 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 23.1197 [sec] (9.22692 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.875 [sec] (7.53287 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332994727104533.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 71.657 [sec] (28.5979 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14305298/462224926 (3.09488%) +num_buckets_in_skew_index 21765/462224926 (0.00470875%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 56575138/511201278 (11.0671%) +num_minimizer_positions_of_buckets_in_skew_index 6728277/511201278 (1.31617%) +=== step 7.1 (build sparse index): 6.52082 [sec] (2.60242 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 1: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 448103 + partition = 7: num kmers in buckets of size > 16384 and <= 22085: 58045 +num kmers in skew index = 28621390 (1.14226%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 10906495 kmers; bits/key = 2.61744 + built positions[0] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 7473094 kmers; bits/key = 2.65359 + built positions[1] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 4774535 kmers; bits/key = 2.75085 + built positions[2] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[3] for 2638087 kmers; bits/key = 2.55989 + built positions[3] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[4] for 1593261 kmers; bits/key = 2.56041 + built positions[4] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 729770)... + built mphs[5] for 729770 kmers; bits/key = 2.56195 + built positions[5] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 448103 + building MPHF with 16 threads and 1 partitions (avg. partition size = 448103)... + built mphs[6] for 448103 kmers; bits/key = 2.42094 + built positions[6] for 448103 kmers; bits/key = 14.0008 + lower = 16384; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 58045 + building MPHF with 16 threads and 1 partitions (avg. partition size = 58045)... + built mphs[7] for 58045 kmers; bits/key = 2.45327 + built positions[7] for 58045 kmers; bits/key = 15.0063 +=== step 7.2 (build skew index): 6.39354 [sec] (2.55162 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 14.0758 [sec] (5.61756 [ns/kmer]) +=== total time: 167.675 [sec] (66.9179 [ns/kmer]) +total index size: 3117848620 [B] -- 3117.85 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.25627% + strings_offsets: 0.153147 [bits/kmer] -- 1.53846% + control_codewords: 6.08754 [bits/kmer] -- 61.1536% + mid_load_buckets: 0.722521 [bits/kmer] -- 7.25823% + begin_buckets_of_size: 1.673e-06 [bits/kmer] -- 1.68065e-05% + strings: 2.24545 [bits/kmer] -- 22.5572% + skew_index: 0.222604 [bits/kmer] -- 2.23621% + weights: 5.87466e-07 [bits/kmer] -- 5.90151e-06% + -------------- + total: 9.9545 [bits/kmer] +2026-03-12 17:32:42: saving data structure to disk... +2026-03-12 17:32:43: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash --canonical +2026-03-12 17:32:44: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.58538 [sec] (5.12729 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.05595 [sec] (2.29892 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.1952 [sec] (11.4001 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.07581 [sec] (7.91203 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333164122765135.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.73932 [sec] (7.53578 [ns/kmer]) +=== step 6 (merging minimizers tuples): 29.0099 [sec] (32.4383 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 18135085/143418843 (12.6448%) +num_buckets_in_skew_index 4055/143418843 (0.00282738%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 66614705/193511241 (34.4242%) +num_minimizer_positions_of_buckets_in_skew_index 1616833/193511241 (0.835524%) +=== step 7.1 (build sparse index): 4.21871 [sec] (4.71728 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 1: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 281237 + partition = 7: num kmers in buckets of size > 16384 and <= 30655: 178334 +num kmers in skew index = 5679238 (0.635041%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[0] for 1914946 kmers; bits/key = 2.56017 + built positions[0] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[1] for 1106165 kmers; bits/key = 2.56093 + built positions[1] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[2] for 771672 kmers; bits/key = 2.41893 + built positions[2] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[3] for 562721 kmers; bits/key = 2.56295 + built positions[3] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 16 threads and 1 partitions (avg. partition size = 475654)... + built mphs[4] for 475654 kmers; bits/key = 2.56365 + built positions[4] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388509)... + built mphs[5] for 388509 kmers; bits/key = 2.42185 + built positions[5] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 281237 + building MPHF with 16 threads and 1 partitions (avg. partition size = 281237)... + built mphs[6] for 281237 kmers; bits/key = 2.42364 + built positions[6] for 281237 kmers; bits/key = 14.0012 + lower = 16384; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 178334 + building MPHF with 16 threads and 1 partitions (avg. partition size = 178334)... + built mphs[7] for 178334 kmers; bits/key = 2.57135 + built positions[7] for 178334 kmers; bits/key = 15.0021 +=== step 7.2 (build skew index): 3.10998 [sec] (3.47751 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 7.83743 [sec] (8.76366 [ns/kmer]) +=== total time: 67.499 [sec] (75.4761 [ns/kmer]) +total index size: 1299687230 [B] -- 1299.69 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.89882% + strings_offsets: 0.333373 [bits/kmer] -- 2.86741% + control_codewords: 5.29215 [bits/kmer] -- 45.5189% + mid_load_buckets: 2.30911 [bits/kmer] -- 19.8611% + begin_buckets_of_size: 4.68741e-06 [bits/kmer] -- 4.03174e-05% + strings: 3.10303 [bits/kmer] -- 26.6898% + skew_index: 0.135325 [bits/kmer] -- 1.16396% + weights: 1.64596e-06 [bits/kmer] -- 1.41573e-05% + -------------- + total: 11.6263 [bits/kmer] +2026-03-12 17:33:51: saving data structure to disk... +2026-03-12 17:33:52: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.time.log new file mode 100644 index 0000000..7d7ab80 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.canon.sshash --canonical" + User time (seconds): 364.97 + System time (seconds): 58.52 + Percent of CPU this job got: 250% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:49.39 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18331376 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16073429 + Voluntary context switches: 40082 + Involuntary context switches: 21962 + Swaps: 0 + File system inputs: 216 + File system outputs: 97147640 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.canon.sshash --canonical" + User time (seconds): 95.75 + System time (seconds): 20.92 + Percent of CPU this job got: 171% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:08.20 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7634604 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6404501 + Voluntary context switches: 2980 + Involuntary context switches: 2408 + Swaps: 0 + File system inputs: 96 + File system outputs: 28746136 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.json new file mode 100644 index 0000000..62c7102 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "763.478003", "negative lookup (avg_nanosec_per_kmer)": "791.970547", "access (avg_nanosec_per_kmer)": "353.335105", "iterator (avg_nanosec_per_kmer)": "2.500511"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "756.678479", "negative lookup (avg_nanosec_per_kmer)": "796.875944", "access (avg_nanosec_per_kmer)": "353.392470", "iterator (avg_nanosec_per_kmer)": "2.518150"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "756.498865", "negative lookup (avg_nanosec_per_kmer)": "787.928969", "access (avg_nanosec_per_kmer)": "350.313084", "iterator (avg_nanosec_per_kmer)": "2.526643"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "779.335025", "negative lookup (avg_nanosec_per_kmer)": "735.946245", "access (avg_nanosec_per_kmer)": "356.489776", "iterator (avg_nanosec_per_kmer)": "2.402227"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "780.278066", "negative lookup (avg_nanosec_per_kmer)": "741.001614", "access (avg_nanosec_per_kmer)": "365.232186", "iterator (avg_nanosec_per_kmer)": "2.424478"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "777.888780", "negative lookup (avg_nanosec_per_kmer)": "743.744486", "access (avg_nanosec_per_kmer)": "358.016299", "iterator (avg_nanosec_per_kmer)": "2.401443"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.log new file mode 100644 index 0000000..18413ef --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 763.478 +negative lookup (avg_nanosec_per_kmer) 791.971 +access (avg_nanosec_per_kmer) = 353.335 +iterator (avg_nanosec_per_kmer) = 2.50051 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 756.678 +negative lookup (avg_nanosec_per_kmer) 796.876 +access (avg_nanosec_per_kmer) = 353.392 +iterator (avg_nanosec_per_kmer) = 2.51815 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 756.499 +negative lookup (avg_nanosec_per_kmer) 787.929 +access (avg_nanosec_per_kmer) = 350.313 +iterator (avg_nanosec_per_kmer) = 2.52664 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 779.335 +negative lookup (avg_nanosec_per_kmer) 735.946 +access (avg_nanosec_per_kmer) = 356.49 +iterator (avg_nanosec_per_kmer) = 2.40223 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 780.278 +negative lookup (avg_nanosec_per_kmer) 741.002 +access (avg_nanosec_per_kmer) = 365.232 +iterator (avg_nanosec_per_kmer) = 2.42448 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 777.889 +negative lookup (avg_nanosec_per_kmer) 743.744 +access (avg_nanosec_per_kmer) = 358.016 +iterator (avg_nanosec_per_kmer) = 2.40144 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.json new file mode 100644 index 0000000..8a53c85 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7227907", "step 2 (compute minimizer tuples)": "3594401", "step 3 (merging minimizer tuples)": "20080483", "step 4 (build mphf)": "18902530", "step 5 (replacing minimizer values with MPHF hashes)": "15018043", "step 6 (merging minimizers tuples)": "43839240", "step 7.1 (build sparse index)": "5003957", "step 7.2 (build skew index)": "4884161", "step 7 (build sparse and skew index)": "10794311", "total_build_time_in_microsec": "119456915", "index_size_in_bytes": "2701933856", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4563726", "step 2 (compute minimizer tuples)": "1383845", "step 3 (merging minimizer tuples)": "8079324", "step 4 (build mphf)": "6290149", "step 5 (replacing minimizer values with MPHF hashes)": "5432405", "step 6 (merging minimizers tuples)": "25243067", "step 7.1 (build sparse index)": "3147844", "step 7.2 (build skew index)": "2292028", "step 7 (build sparse and skew index)": "5847681", "total_build_time_in_microsec": "56840197", "index_size_in_bytes": "1134285972", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.log new file mode 100644 index 0000000..676a7e7 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.log @@ -0,0 +1,286 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash +2026-03-12 17:25:48: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22791 [sec] (2.88461 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.5944 [sec] (1.4345 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.0805 [sec] (8.01399 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.9025 [sec] (7.54388 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332748953933961.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.018 [sec] (5.9936 [ns/kmer]) +=== step 6 (merging minimizers tuples): 43.8392 [sec] (17.496 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10844427/386687326 (2.80444%) +num_buckets_in_skew_index 14697/386687326 (0.00380075%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 42855415/423023926 (10.1307%) +num_minimizer_positions_of_buckets_in_skew_index 4340309/423023926 (1.02602%) +=== step 7.1 (build sparse index): 5.00396 [sec] (1.99705 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 1: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 101047 + partition = 7: num kmers in buckets of size > 16384 and <= 22972: 106463 +num kmers in skew index = 20256533 (0.808425%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 8389556 kmers; bits/key = 2.61352 + built positions[0] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 5343660 kmers; bits/key = 2.66261 + built positions[1] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3076413 kmers; bits/key = 3.34298 + built positions[2] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[3] for 1855446 kmers; bits/key = 2.56022 + built positions[3] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[4] for 1008178 kmers; bits/key = 2.41829 + built positions[4] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 375770)... + built mphs[5] for 375770 kmers; bits/key = 2.42182 + built positions[5] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 101047 + building MPHF with 16 threads and 1 partitions (avg. partition size = 101047)... + built mphs[6] for 101047 kmers; bits/key = 2.43752 + built positions[6] for 101047 kmers; bits/key = 14.0038 + lower = 16384; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 106463 + building MPHF with 16 threads and 1 partitions (avg. partition size = 106463)... + built mphs[7] for 106463 kmers; bits/key = 2.436 + built positions[7] for 106463 kmers; bits/key = 15.0034 +=== step 7.2 (build skew index): 4.88416 [sec] (1.94924 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 10.7943 [sec] (4.30794 [ns/kmer]) +=== total time: 119.457 [sec] (47.6745 [ns/kmer]) +total index size: 2701933856 [B] -- 2701.93 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.05644% + strings_offsets: 0.153147 [bits/kmer] -- 1.77528% + control_codewords: 5.0927 [bits/kmer] -- 59.0349% + mid_load_buckets: 0.547306 [bits/kmer] -- 6.34441% + begin_buckets_of_size: 1.673e-06 [bits/kmer] -- 1.93935e-05% + strings: 2.24545 [bits/kmer] -- 26.0294% + skew_index: 0.151781 [bits/kmer] -- 1.75945% + weights: 5.87466e-07 [bits/kmer] -- 6.80994e-06% + -------------- + total: 8.62659 [bits/kmer] +2026-03-12 17:27:48: saving data structure to disk... +2026-03-12 17:27:49: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash +2026-03-12 17:27:49: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56373 [sec] (5.10307 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.38384 [sec] (1.54739 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.07932 [sec] (9.03414 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.29015 [sec] (7.03352 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773332869837370009.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.43241 [sec] (6.07441 [ns/kmer]) +=== step 6 (merging minimizers tuples): 25.2431 [sec] (28.2263 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14064899/126246665 (11.1408%) +num_buckets_in_skew_index 2635/126246665 (0.00208718%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48645454/162006751 (30.0268%) +num_minimizer_positions_of_buckets_in_skew_index 1182166/162006751 (0.729702%) +=== step 7.1 (build sparse index): 3.14784 [sec] (3.51986 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 1: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 175304 + partition = 7: num kmers in buckets of size > 16384 and <= 36894: 213198 +num kmers in skew index = 4212443 (0.471027%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[0] for 1183762 kmers; bits/key = 2.56081 + built positions[0] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[1] for 885561 kmers; bits/key = 2.56147 + built positions[1] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[2] for 591648 kmers; bits/key = 2.56263 + built positions[2] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[3] for 450833 kmers; bits/key = 2.42098 + built positions[3] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 16 threads and 1 partitions (avg. partition size = 373731)... + built mphs[4] for 373731 kmers; bits/key = 2.56484 + built positions[4] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 16 threads and 1 partitions (avg. partition size = 338406)... + built mphs[5] for 338406 kmers; bits/key = 2.4226 + built positions[5] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 175304 + building MPHF with 16 threads and 1 partitions (avg. partition size = 175304)... + built mphs[6] for 175304 kmers; bits/key = 2.42833 + built positions[6] for 175304 kmers; bits/key = 14.0019 + lower = 16384; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 213198 + building MPHF with 16 threads and 1 partitions (avg. partition size = 213198)... + built mphs[7] for 213198 kmers; bits/key = 2.42629 + built positions[7] for 213198 kmers; bits/key = 16.0017 +=== step 7.2 (build skew index): 2.29203 [sec] (2.5629 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 5.84768 [sec] (6.53876 [ns/kmer]) +=== total time: 56.8402 [sec] (63.5576 [ns/kmer]) +total index size: 1134285972 [B] -- 1134.29 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.99836% + strings_offsets: 0.333373 [bits/kmer] -- 3.28553% + control_codewords: 4.51733 [bits/kmer] -- 44.5202% + mid_load_buckets: 1.68623 [bits/kmer] -- 16.6185% + begin_buckets_of_size: 4.68741e-06 [bits/kmer] -- 4.61965e-05% + strings: 3.10303 [bits/kmer] -- 30.5817% + skew_index: 0.101021 [bits/kmer] -- 0.995608% + weights: 1.64596e-06 [bits/kmer] -- 1.62217e-05% + -------------- + total: 10.1467 [bits/kmer] +2026-03-12 17:28:46: saving data structure to disk... +2026-03-12 17:28:47: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.time.log new file mode 100644 index 0000000..6c59cb7 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l7.sshash" + User time (seconds): 274.37 + System time (seconds): 43.46 + Percent of CPU this job got: 262% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:00.88 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16066868 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13426347 + Voluntary context switches: 35529 + Involuntary context switches: 11572 + Swaps: 0 + File system inputs: 136 + File system outputs: 63573136 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l7.sshash" + User time (seconds): 76.40 + System time (seconds): 17.35 + Percent of CPU this job got: 163% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:57.46 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6405084 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5298892 + Voluntary context switches: 2567 + Involuntary context switches: 1939 + Swaps: 0 + File system inputs: 64 + File system outputs: 23776768 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.json new file mode 100644 index 0000000..e8460a8 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "675.688753", "negative lookup (avg_nanosec_per_kmer)": "511.380770", "access (avg_nanosec_per_kmer)": "360.213333", "iterator (avg_nanosec_per_kmer)": "2.732724"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "687.422862", "negative lookup (avg_nanosec_per_kmer)": "510.102259", "access (avg_nanosec_per_kmer)": "358.694297", "iterator (avg_nanosec_per_kmer)": "2.776710"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "679.240561", "negative lookup (avg_nanosec_per_kmer)": "512.477941", "access (avg_nanosec_per_kmer)": "365.702577", "iterator (avg_nanosec_per_kmer)": "2.832330"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1118.780854", "negative lookup (avg_nanosec_per_kmer)": "498.236482", "access (avg_nanosec_per_kmer)": "411.784884", "iterator (avg_nanosec_per_kmer)": "2.755378"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1114.432906", "negative lookup (avg_nanosec_per_kmer)": "498.568979", "access (avg_nanosec_per_kmer)": "410.983312", "iterator (avg_nanosec_per_kmer)": "2.828148"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1122.148126", "negative lookup (avg_nanosec_per_kmer)": "498.534726", "access (avg_nanosec_per_kmer)": "406.729112", "iterator (avg_nanosec_per_kmer)": "2.761141"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.log new file mode 100644 index 0000000..926bb1f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 675.689 +negative lookup (avg_nanosec_per_kmer) 511.381 +access (avg_nanosec_per_kmer) = 360.213 +iterator (avg_nanosec_per_kmer) = 2.73272 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 687.423 +negative lookup (avg_nanosec_per_kmer) 510.102 +access (avg_nanosec_per_kmer) = 358.694 +iterator (avg_nanosec_per_kmer) = 2.77671 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 679.241 +negative lookup (avg_nanosec_per_kmer) 512.478 +access (avg_nanosec_per_kmer) = 365.703 +iterator (avg_nanosec_per_kmer) = 2.83233 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1118.78 +negative lookup (avg_nanosec_per_kmer) 498.236 +access (avg_nanosec_per_kmer) = 411.785 +iterator (avg_nanosec_per_kmer) = 2.75538 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1114.43 +negative lookup (avg_nanosec_per_kmer) 498.569 +access (avg_nanosec_per_kmer) = 410.983 +iterator (avg_nanosec_per_kmer) = 2.82815 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1122.15 +negative lookup (avg_nanosec_per_kmer) 498.535 +access (avg_nanosec_per_kmer) = 406.729 +iterator (avg_nanosec_per_kmer) = 2.76114 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.json new file mode 100644 index 0000000..64e1375 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6811943", "step 2 (compute minimizer tuples)": "3522021", "step 3 (merging minimizer tuples)": "12207468", "step 4 (build mphf)": "7764790", "step 5 (replacing minimizer values with MPHF hashes)": "6114255", "step 6 (merging minimizers tuples)": "22007351", "step 7.1 (build sparse index)": "2235208", "step 7.2 (build skew index)": "16686472", "step 7 (build sparse and skew index)": "19306561", "total_build_time_in_microsec": "77734389", "index_size_in_bytes": "1802291682", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7287035", "step 2 (compute minimizer tuples)": "2534709", "step 3 (merging minimizer tuples)": "14122104", "step 4 (build mphf)": "3861328", "step 5 (replacing minimizer values with MPHF hashes)": "4213815", "step 6 (merging minimizers tuples)": "20455890", "step 7.1 (build sparse index)": "3075592", "step 7.2 (build skew index)": "12121821", "step 7 (build sparse and skew index)": "15554358", "total_build_time_in_microsec": "68029239", "index_size_in_bytes": "1387330274", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.log new file mode 100644 index 0000000..7a3c19f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash --canonical +2026-03-12 17:38:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81194 [sec] (2.45802 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.52202 [sec] (1.27088 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.2075 [sec] (4.40494 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.76479 [sec] (2.80184 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333481148092635.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.11425 [sec] (2.20626 [ns/kmer]) +=== step 6 (merging minimizers tuples): 22.0074 [sec] (7.94112 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4102172/149769567 (2.73899%) +num_buckets_in_skew_index 16358/149769567 (0.0109221%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 18222540/173272792 (10.5167%) +num_minimizer_positions_of_buckets_in_skew_index 9399215/173272792 (5.42452%) +=== step 7.1 (build sparse index): 2.23521 [sec] (0.806551 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 1: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 9521979 + partition = 7: num kmers in buckets of size > 16384 and <= 284250: 19168596 +num kmers in skew index = 139287724 (5.02605%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 24475836 kmers; bits/key = 2.62316 + built positions[0] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21113117 kmers; bits/key = 2.6904 + built positions[1] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19260150 kmers; bits/key = 2.59757 + built positions[2] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 17989259 kmers; bits/key = 2.53715 + built positions[3] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 15443443 kmers; bits/key = 2.69254 + built positions[4] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 12315344 kmers; bits/key = 2.67895 + built positions[5] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 9521979 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9521979 kmers; bits/key = 2.75746 + built positions[6] for 9521979 kmers; bits/key = 14 + lower = 16384; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 19168596 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 19168596 kmers; bits/key = 2.60798 + built positions[7] for 19168596 kmers; bits/key = 19 +=== step 7.2 (build skew index): 16.6865 [sec] (6.02114 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 19.3066 [sec] (6.96657 [ns/kmer]) +=== total time: 77.7344 [sec] (28.0496 [ns/kmer]) +total index size: 1802291682 [B] -- 1802.29 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.93394% + strings_offsets: 0.11255 [bits/kmer] -- 2.1633% + control_codewords: 1.78341 [bits/kmer] -- 34.2786% + mid_load_buckets: 0.210413 [bits/kmer] -- 4.04431% + begin_buckets_of_size: 1.51264e-06 [bits/kmer] -- 2.90741e-05% + strings: 2.11826 [bits/kmer] -- 40.7145% + skew_index: 0.825427 [bits/kmer] -- 15.8653% + weights: 5.31156e-07 [bits/kmer] -- 1.02092e-05% + -------------- + total: 5.2027 [bits/kmer] +2026-03-12 17:39:18: saving data structure to disk... +2026-03-12 17:39:19: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash --canonical +2026-03-12 17:39:19: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.28704 [sec] (4.77868 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.53471 [sec] (1.66221 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 14.1221 [sec] (9.26098 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.86133 [sec] (2.53218 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333559882175018.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.21382 [sec] (2.76333 [ns/kmer]) +=== step 6 (merging minimizers tuples): 20.4559 [sec] (13.4145 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 12259726/69577229 (17.6203%) +num_buckets_in_skew_index 17582/69577229 (0.0252698%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 63333441/126350163 (50.1253%) +num_minimizer_positions_of_buckets_in_skew_index 5716801/126350163 (4.52457%) +=== step 7.1 (build sparse index): 3.07559 [sec] (2.01691 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 1: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 2527445 + partition = 7: num kmers in buckets of size > 16384 and <= 245177: 8228849 +num kmers in skew index = 66223428 (4.34279%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 26776297 kmers; bits/key = 2.54559 + built positions[0] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[1] for 13974034 kmers; bits/key = 2.56375 + built positions[1] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 6504754 kmers; bits/key = 2.78902 + built positions[2] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3263868 kmers; bits/key = 3.30619 + built positions[3] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[4] for 2527517 kmers; bits/key = 2.55992 + built positions[4] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[5] for 2420664 kmers; bits/key = 2.55997 + built positions[5] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 2527445 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527445)... + built mphs[6] for 2527445 kmers; bits/key = 2.55994 + built positions[6] for 2527445 kmers; bits/key = 14.0001 + lower = 16384; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 8228849 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 8228849 kmers; bits/key = 2.60437 + built positions[7] for 8228849 kmers; bits/key = 18 +=== step 7.2 (build skew index): 12.1218 [sec] (7.94923 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 15.5544 [sec] (10.2002 [ns/kmer]) +=== total time: 68.0292 [sec] (44.6121 [ns/kmer]) +total index size: 1387330274 [B] -- 1387.33 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.81826% + strings_offsets: 0.274587 [bits/kmer] -- 3.7727% + control_codewords: 1.5057 [bits/kmer] -- 20.6877% + mid_load_buckets: 1.32905 [bits/kmer] -- 18.2605% + begin_buckets_of_size: 2.74903e-06 [bits/kmer] -- 3.77704e-05% + strings: 3.35283 [bits/kmer] -- 46.0663% + skew_index: 0.683753 [bits/kmer] -- 9.39447% + weights: 9.65307e-07 [bits/kmer] -- 1.32629e-05% + -------------- + total: 7.27826 [bits/kmer] +2026-03-12 17:40:27: saving data structure to disk... +2026-03-12 17:40:28: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.time.log new file mode 100644 index 0000000..590d708 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.canon.sshash --canonical" + User time (seconds): 170.56 + System time (seconds): 20.91 + Percent of CPU this job got: 243% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:18.73 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7409000 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7263438 + Voluntary context switches: 2170 + Involuntary context switches: 2452 + Swaps: 0 + File system inputs: 64 + File system outputs: 28074128 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.canon.sshash --canonical" + User time (seconds): 100.71 + System time (seconds): 14.71 + Percent of CPU this job got: 167% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:08.79 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5461152 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5197457 + Voluntary context switches: 2040 + Involuntary context switches: 1899 + Swaps: 0 + File system inputs: 56 + File system outputs: 19097048 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.json new file mode 100644 index 0000000..8938fe0 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "853.650238", "negative lookup (avg_nanosec_per_kmer)": "875.717349", "access (avg_nanosec_per_kmer)": "357.148332", "iterator (avg_nanosec_per_kmer)": "2.754030"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "853.266120", "negative lookup (avg_nanosec_per_kmer)": "859.583139", "access (avg_nanosec_per_kmer)": "358.279253", "iterator (avg_nanosec_per_kmer)": "2.739023"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "860.193761", "negative lookup (avg_nanosec_per_kmer)": "856.380349", "access (avg_nanosec_per_kmer)": "360.446480", "iterator (avg_nanosec_per_kmer)": "2.718255"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1156.942116", "negative lookup (avg_nanosec_per_kmer)": "840.198612", "access (avg_nanosec_per_kmer)": "404.566023", "iterator (avg_nanosec_per_kmer)": "2.774395"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1156.656581", "negative lookup (avg_nanosec_per_kmer)": "835.752406", "access (avg_nanosec_per_kmer)": "407.137029", "iterator (avg_nanosec_per_kmer)": "2.753035"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1162.872687", "negative lookup (avg_nanosec_per_kmer)": "839.504214", "access (avg_nanosec_per_kmer)": "408.550252", "iterator (avg_nanosec_per_kmer)": "2.828713"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.log new file mode 100644 index 0000000..d95e600 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 853.65 +negative lookup (avg_nanosec_per_kmer) 875.717 +access (avg_nanosec_per_kmer) = 357.148 +iterator (avg_nanosec_per_kmer) = 2.75403 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 853.266 +negative lookup (avg_nanosec_per_kmer) 859.583 +access (avg_nanosec_per_kmer) = 358.279 +iterator (avg_nanosec_per_kmer) = 2.73902 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 860.194 +negative lookup (avg_nanosec_per_kmer) 856.38 +access (avg_nanosec_per_kmer) = 360.446 +iterator (avg_nanosec_per_kmer) = 2.71826 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 1156.94 +negative lookup (avg_nanosec_per_kmer) 840.199 +access (avg_nanosec_per_kmer) = 404.566 +iterator (avg_nanosec_per_kmer) = 2.77439 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 1156.66 +negative lookup (avg_nanosec_per_kmer) 835.752 +access (avg_nanosec_per_kmer) = 407.137 +iterator (avg_nanosec_per_kmer) = 2.75303 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash +positive lookup (avg_nanosec_per_kmer) = 1162.87 +negative lookup (avg_nanosec_per_kmer) 839.504 +access (avg_nanosec_per_kmer) = 408.55 +iterator (avg_nanosec_per_kmer) = 2.82871 diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.json new file mode 100644 index 0000000..2c569ec --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6810895", "step 2 (compute minimizer tuples)": "2045567", "step 3 (merging minimizer tuples)": "6289609", "step 4 (build mphf)": "6240282", "step 5 (replacing minimizer values with MPHF hashes)": "4765747", "step 6 (merging minimizers tuples)": "12080937", "step 7.1 (build sparse index)": "1657535", "step 7.2 (build skew index)": "14076156", "step 7 (build sparse and skew index)": "16039643", "total_build_time_in_microsec": "54272680", "index_size_in_bytes": "1615016816", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7237655", "step 2 (compute minimizer tuples)": "1491381", "step 3 (merging minimizer tuples)": "5471044", "step 4 (build mphf)": "3526173", "step 5 (replacing minimizer values with MPHF hashes)": "3424733", "step 6 (merging minimizers tuples)": "15918602", "step 7.1 (build sparse index)": "2456094", "step 7.2 (build skew index)": "9540503", "step 7 (build sparse and skew index)": "12278663", "total_build_time_in_microsec": "49348251", "index_size_in_bytes": "1256134332", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.log new file mode 100644 index 0000000..d38c718 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash +2026-03-12 17:34:58: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.8109 [sec] (2.45764 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.04557 [sec] (0.738121 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.28961 [sec] (2.26954 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.24028 [sec] (2.25174 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333298003519344.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.76575 [sec] (1.71967 [ns/kmer]) +=== step 6 (merging minimizers tuples): 12.0809 [sec] (4.35928 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3112601/122838669 (2.53389%) +num_buckets_in_skew_index 12792/122838669 (0.0104137%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 14093861/140756047 (10.013%) +num_minimizer_positions_of_buckets_in_skew_index 6948910/140756047 (4.93685%) +=== step 7.1 (build sparse index): 1.65753 [sec] (0.598104 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 1: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 6145851 + partition = 7: num kmers in buckets of size > 16384 and <= 147936: 14109271 +num kmers in skew index = 120261205 (4.3395%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 21919654 kmers; bits/key = 2.60671 + built positions[0] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19634878 kmers; bits/key = 2.55594 + built positions[1] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 18051454 kmers; bits/key = 2.60116 + built positions[2] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 17018125 kmers; bits/key = 2.58264 + built positions[3] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 14085569 kmers; bits/key = 2.54674 + built positions[4] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[5] for 9296403 kmers; bits/key = 2.72206 + built positions[5] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 6145851 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[6] for 6145851 kmers; bits/key = 2.92757 + built positions[6] for 6145851 kmers; bits/key = 14.0001 + lower = 16384; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 14109271 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[7] for 14109271 kmers; bits/key = 2.60389 + built positions[7] for 14109271 kmers; bits/key = 18 +=== step 7.2 (build skew index): 14.0762 [sec] (5.07923 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 16.0396 [sec] (5.78773 [ns/kmer]) +=== total time: 54.2727 [sec] (19.5837 [ns/kmer]) +total index size: 1615016816 [B] -- 1615.02 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.68573% + strings_offsets: 0.11255 [bits/kmer] -- 2.41416% + control_codewords: 1.46273 [bits/kmer] -- 31.3749% + mid_load_buckets: 0.16274 [bits/kmer] -- 3.49071% + begin_buckets_of_size: 1.51264e-06 [bits/kmer] -- 3.24455e-05% + strings: 2.11826 [bits/kmer] -- 45.4357% + skew_index: 0.680609 [bits/kmer] -- 14.5988% + weights: 5.31156e-07 [bits/kmer] -- 1.13931e-05% + -------------- + total: 4.66209 [bits/kmer] +2026-03-12 17:35:52: saving data structure to disk... +2026-03-12 17:35:53: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash +2026-03-12 17:35:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23766 [sec] (4.7463 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.49138 [sec] (0.978016 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.47104 [sec] (3.5878 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.52617 [sec] (2.31239 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333353187284617.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.42473 [sec] (2.24587 [ns/kmer]) +=== step 6 (merging minimizers tuples): 15.9186 [sec] (10.4391 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10026416/61951224 (16.1844%) +num_buckets_in_skew_index 9837/61951224 (0.0158786%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 49904590/105337248 (47.376%) +num_minimizer_positions_of_buckets_in_skew_index 3517687/105337248 (3.33945%) +=== step 7.1 (build sparse index): 2.45609 [sec] (1.61065 [ns/kmer]) + partition = 0: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 1: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 2: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 3: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 4: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 5: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 6: num kmers in buckets of size > 8192 and <= 16384: 1071910 + partition = 7: num kmers in buckets of size > 16384 and <= 144478: 6346220 +num kmers in skew index = 43717921 (2.86693%) + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[0] for 16092632 kmers; bits/key = 2.65398 + built positions[0] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 8174536 kmers; bits/key = 2.61889 + built positions[1] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3454318 kmers; bits/key = 3.14686 + built positions[2] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[3] for 2781070 kmers; bits/key = 2.55987 + built positions[3] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[4] for 2981930 kmers; bits/key = 2.55978 + built positions[4] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[5] for 2815305 kmers; bits/key = 2.55984 + built positions[5] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 1071910 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1071910)... + built mphs[6] for 1071910 kmers; bits/key = 2.41822 + built positions[6] for 1071910 kmers; bits/key = 14.0003 + lower = 16384; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 6346220 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 6346220 kmers; bits/key = 2.84827 + built positions[7] for 6346220 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 9.5405 [sec] (6.25646 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 12.2787 [sec] (8.05209 [ns/kmer]) +=== total time: 49.3483 [sec] (32.3615 [ns/kmer]) +total index size: 1256134332 [B] -- 1256.13 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.76158% + strings_offsets: 0.274587 [bits/kmer] -- 4.16674% + control_codewords: 1.34067 [bits/kmer] -- 20.3441% + mid_load_buckets: 1.04724 [bits/kmer] -- 15.8915% + begin_buckets_of_size: 2.74903e-06 [bits/kmer] -- 4.17153e-05% + strings: 3.35283 [bits/kmer] -- 50.8777% + skew_index: 0.458555 [bits/kmer] -- 6.95837% + weights: 9.65307e-07 [bits/kmer] -- 1.46481e-05% + -------------- + total: 6.58997 [bits/kmer] +2026-03-12 17:36:42: saving data structure to disk... +2026-03-12 17:36:43: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.time.log new file mode 100644 index 0000000..1f0a3fa --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l7/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l7.sshash" + User time (seconds): 131.32 + System time (seconds): 17.30 + Percent of CPU this job got: 269% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:55.18 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6219252 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5950770 + Voluntary context switches: 2493 + Involuntary context switches: 2525 + Swaps: 0 + File system inputs: 64 + File system outputs: 22345800 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l7.sshash" + User time (seconds): 69.50 + System time (seconds): 12.05 + Percent of CPU this job got: 162% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:50.05 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4517020 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4066169 + Voluntary context switches: 1915 + Involuntary context switches: 1543 + Swaps: 0 + File system inputs: 64 + File system outputs: 15758096 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.json new file mode 100644 index 0000000..ec48797 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "584.091078", "negative lookup (avg_nanosec_per_kmer)": "421.262564", "access (avg_nanosec_per_kmer)": "348.373548", "iterator (avg_nanosec_per_kmer)": "2.491848"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "588.177869", "negative lookup (avg_nanosec_per_kmer)": "424.866713", "access (avg_nanosec_per_kmer)": "346.666917", "iterator (avg_nanosec_per_kmer)": "2.534373"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "587.648345", "negative lookup (avg_nanosec_per_kmer)": "422.484274", "access (avg_nanosec_per_kmer)": "349.915632", "iterator (avg_nanosec_per_kmer)": "2.493485"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "641.312019", "negative lookup (avg_nanosec_per_kmer)": "399.265367", "access (avg_nanosec_per_kmer)": "363.958299", "iterator (avg_nanosec_per_kmer)": "2.412303"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "627.135569", "negative lookup (avg_nanosec_per_kmer)": "398.825227", "access (avg_nanosec_per_kmer)": "354.602662", "iterator (avg_nanosec_per_kmer)": "2.433360"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "632.245169", "negative lookup (avg_nanosec_per_kmer)": "397.198909", "access (avg_nanosec_per_kmer)": "358.876440", "iterator (avg_nanosec_per_kmer)": "2.404691"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.log new file mode 100644 index 0000000..20ddd96 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 584.091 +negative lookup (avg_nanosec_per_kmer) 421.263 +access (avg_nanosec_per_kmer) = 348.374 +iterator (avg_nanosec_per_kmer) = 2.49185 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 588.178 +negative lookup (avg_nanosec_per_kmer) 424.867 +access (avg_nanosec_per_kmer) = 346.667 +iterator (avg_nanosec_per_kmer) = 2.53437 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 587.648 +negative lookup (avg_nanosec_per_kmer) 422.484 +access (avg_nanosec_per_kmer) = 349.916 +iterator (avg_nanosec_per_kmer) = 2.49348 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 641.312 +negative lookup (avg_nanosec_per_kmer) 399.265 +access (avg_nanosec_per_kmer) = 363.958 +iterator (avg_nanosec_per_kmer) = 2.4123 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 627.136 +negative lookup (avg_nanosec_per_kmer) 398.825 +access (avg_nanosec_per_kmer) = 354.603 +iterator (avg_nanosec_per_kmer) = 2.43336 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 632.245 +negative lookup (avg_nanosec_per_kmer) 397.199 +access (avg_nanosec_per_kmer) = 358.876 +iterator (avg_nanosec_per_kmer) = 2.40469 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.json new file mode 100644 index 0000000..40e4aff --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7232936", "step 2 (compute minimizer tuples)": "5136428", "step 3 (merging minimizer tuples)": "20320835", "step 4 (build mphf)": "22946584", "step 5 (replacing minimizer values with MPHF hashes)": "18696106", "step 6 (merging minimizers tuples)": "69342737", "step 7.1 (build sparse index)": "6652525", "step 7.2 (build skew index)": "4779386", "step 7 (build sparse and skew index)": "12591108", "total_build_time_in_microsec": "156266734", "index_size_in_bytes": "3161152324", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4564334", "step 2 (compute minimizer tuples)": "2033489", "step 3 (merging minimizer tuples)": "10393469", "step 4 (build mphf)": "7101981", "step 5 (replacing minimizer values with MPHF hashes)": "6728974", "step 6 (merging minimizers tuples)": "31198034", "step 7.1 (build sparse index)": "4187956", "step 7.2 (build skew index)": "2010504", "step 7 (build sparse and skew index)": "6694128", "total_build_time_in_microsec": "68714409", "index_size_in_bytes": "1315087286", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.log new file mode 100644 index 0000000..8778c39 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.log @@ -0,0 +1,291 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash --canonical +2026-03-12 17:45:49: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23294 [sec] (2.88662 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 5.13643 [sec] (2.04991 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.3208 [sec] (8.10991 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 22.9466 [sec] (9.15783 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.6961 [sec] (7.46149 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333949330707823.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 69.3427 [sec] (27.6742 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 34 +num_buckets_larger_than_1_not_in_skew_index 14319755/462224926 (3.09801%) +num_buckets_in_skew_index 7308/462224926 (0.00158105%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 59097759/511201278 (11.5606%) +num_minimizer_positions_of_buckets_in_skew_index 4205656/511201278 (0.822701%) +=== step 7.1 (build sparse index): 6.65252 [sec] (2.65498 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 448103 + partition = 6: num kmers in buckets of size > 16384 and <= 22085: 58045 +num kmers in skew index = 17714895 (0.70699%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 7473094 kmers; bits/key = 2.65359 + built positions[0] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 4774535 kmers; bits/key = 2.75085 + built positions[1] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[2] for 2638087 kmers; bits/key = 2.55989 + built positions[2] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[3] for 1593261 kmers; bits/key = 2.56041 + built positions[3] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 729770)... + built mphs[4] for 729770 kmers; bits/key = 2.56195 + built positions[4] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 448103 + building MPHF with 16 threads and 1 partitions (avg. partition size = 448103)... + built mphs[5] for 448103 kmers; bits/key = 2.42094 + built positions[5] for 448103 kmers; bits/key = 14.0008 + lower = 16384; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 58045 + building MPHF with 16 threads and 1 partitions (avg. partition size = 58045)... + built mphs[6] for 58045 kmers; bits/key = 2.45327 + built positions[6] for 58045 kmers; bits/key = 15.0063 +=== step 7.2 (build skew index): 4.77939 [sec] (1.90742 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 12.5911 [sec] (5.02503 [ns/kmer]) +=== total time: 156.267 [sec] (62.365 [ns/kmer]) +total index size: 3161152324 [B] -- 3161.15 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.18427% + strings_offsets: 0.153147 [bits/kmer] -- 1.51739% + control_codewords: 6.27201 [bits/kmer] -- 62.1437% + mid_load_buckets: 0.754737 [bits/kmer] -- 7.478% + begin_buckets_of_size: 3.30769e-06 [bits/kmer] -- 3.27729e-05% + strings: 2.24545 [bits/kmer] -- 22.2482% + skew_index: 0.144173 [bits/kmer] -- 1.42848% + weights: 5.87466e-07 [bits/kmer] -- 5.82066e-06% + -------------- + total: 10.0928 [bits/kmer] +2026-03-12 17:48:25: saving data structure to disk... +2026-03-12 17:48:27: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash --canonical +2026-03-12 17:48:27: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56433 [sec] (5.10375 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.03349 [sec] (2.27381 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.3935 [sec] (11.6218 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.10198 [sec] (7.9413 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334107305514422.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.72897 [sec] (7.52421 [ns/kmer]) +=== step 6 (merging minimizers tuples): 31.198 [sec] (34.885 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 34 +num_buckets_larger_than_1_not_in_skew_index 18137781/143418843 (12.6467%) +num_buckets_in_skew_index 1359/143418843 (0.000947574%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 67077563/193511241 (34.6634%) +num_minimizer_positions_of_buckets_in_skew_index 1153975/193511241 (0.596335%) +=== step 7.1 (build sparse index): 4.18796 [sec] (4.68289 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 281237 + partition = 6: num kmers in buckets of size > 16384 and <= 30655: 178334 +num kmers in skew index = 3764292 (0.420916%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[0] for 1106165 kmers; bits/key = 2.56093 + built positions[0] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[1] for 771672 kmers; bits/key = 2.41893 + built positions[1] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[2] for 562721 kmers; bits/key = 2.56295 + built positions[2] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 16 threads and 1 partitions (avg. partition size = 475654)... + built mphs[3] for 475654 kmers; bits/key = 2.56365 + built positions[3] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388509)... + built mphs[4] for 388509 kmers; bits/key = 2.42185 + built positions[4] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 281237 + building MPHF with 16 threads and 1 partitions (avg. partition size = 281237)... + built mphs[5] for 281237 kmers; bits/key = 2.42364 + built positions[5] for 281237 kmers; bits/key = 14.0012 + lower = 16384; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 178334 + building MPHF with 16 threads and 1 partitions (avg. partition size = 178334)... + built mphs[6] for 178334 kmers; bits/key = 2.57135 + built positions[6] for 178334 kmers; bits/key = 15.0021 +=== step 7.2 (build skew index): 2.0105 [sec] (2.24811 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 6.69413 [sec] (7.48524 [ns/kmer]) +=== total time: 68.7144 [sec] (76.8351 [ns/kmer]) +total index size: 1315087286 [B] -- 1315.09 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.85316% + strings_offsets: 0.333373 [bits/kmer] -- 2.83383% + control_codewords: 5.45252 [bits/kmer] -- 46.349% + mid_load_buckets: 2.32515 [bits/kmer] -- 19.7649% + begin_buckets_of_size: 9.26748e-06 [bits/kmer] -- 7.8778e-05% + strings: 3.10303 [bits/kmer] -- 26.3773% + skew_index: 0.0966681 [bits/kmer] -- 0.821725% + weights: 1.64596e-06 [bits/kmer] -- 1.39915e-05% + -------------- + total: 11.764 [bits/kmer] +2026-03-12 17:49:36: saving data structure to disk... +2026-03-12 17:49:36: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.time.log new file mode 100644 index 0000000..5a47e80 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.canon.sshash --canonical" + User time (seconds): 365.10 + System time (seconds): 58.07 + Percent of CPU this job got: 267% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:37.97 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18317020 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 15962389 + Voluntary context switches: 30036 + Involuntary context switches: 22734 + Swaps: 0 + File system inputs: 192 + File system outputs: 97232216 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.canon.sshash --canonical" + User time (seconds): 95.00 + System time (seconds): 20.66 + Percent of CPU this job got: 166% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:09.41 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7644268 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6387877 + Voluntary context switches: 2806 + Involuntary context switches: 2127 + Swaps: 0 + File system inputs: 144 + File system outputs: 28776216 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.json new file mode 100644 index 0000000..9c3c7ac --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "767.285286", "negative lookup (avg_nanosec_per_kmer)": "785.792290", "access (avg_nanosec_per_kmer)": "347.817957", "iterator (avg_nanosec_per_kmer)": "2.470697"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "769.103333", "negative lookup (avg_nanosec_per_kmer)": "802.832249", "access (avg_nanosec_per_kmer)": "348.034867", "iterator (avg_nanosec_per_kmer)": "2.466030"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "778.784786", "negative lookup (avg_nanosec_per_kmer)": "795.253729", "access (avg_nanosec_per_kmer)": "352.007333", "iterator (avg_nanosec_per_kmer)": "2.537160"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "783.269610", "negative lookup (avg_nanosec_per_kmer)": "739.393952", "access (avg_nanosec_per_kmer)": "358.006087", "iterator (avg_nanosec_per_kmer)": "2.413896"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "783.172163", "negative lookup (avg_nanosec_per_kmer)": "737.640174", "access (avg_nanosec_per_kmer)": "365.162834", "iterator (avg_nanosec_per_kmer)": "2.394331"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "780.743322", "negative lookup (avg_nanosec_per_kmer)": "739.788801", "access (avg_nanosec_per_kmer)": "354.513698", "iterator (avg_nanosec_per_kmer)": "2.422217"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.log new file mode 100644 index 0000000..e6f2352 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 767.285 +negative lookup (avg_nanosec_per_kmer) 785.792 +access (avg_nanosec_per_kmer) = 347.818 +iterator (avg_nanosec_per_kmer) = 2.4707 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 769.103 +negative lookup (avg_nanosec_per_kmer) 802.832 +access (avg_nanosec_per_kmer) = 348.035 +iterator (avg_nanosec_per_kmer) = 2.46603 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 778.785 +negative lookup (avg_nanosec_per_kmer) 795.254 +access (avg_nanosec_per_kmer) = 352.007 +iterator (avg_nanosec_per_kmer) = 2.53716 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 783.27 +negative lookup (avg_nanosec_per_kmer) 739.394 +access (avg_nanosec_per_kmer) = 358.006 +iterator (avg_nanosec_per_kmer) = 2.4139 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 783.172 +negative lookup (avg_nanosec_per_kmer) 737.64 +access (avg_nanosec_per_kmer) = 365.163 +iterator (avg_nanosec_per_kmer) = 2.39433 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 780.743 +negative lookup (avg_nanosec_per_kmer) 739.789 +access (avg_nanosec_per_kmer) = 354.514 +iterator (avg_nanosec_per_kmer) = 2.42222 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.json new file mode 100644 index 0000000..323d91c --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7239122", "step 2 (compute minimizer tuples)": "3593876", "step 3 (merging minimizer tuples)": "20178860", "step 4 (build mphf)": "18984835", "step 5 (replacing minimizer values with MPHF hashes)": "15053654", "step 6 (merging minimizers tuples)": "43934302", "step 7.1 (build sparse index)": "4991951", "step 7.2 (build skew index)": "3347064", "step 7 (build sparse and skew index)": "9230656", "total_build_time_in_microsec": "118215305", "index_size_in_bytes": "2690803984", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4551933", "step 2 (compute minimizer tuples)": "1364443", "step 3 (merging minimizer tuples)": "8261816", "step 4 (build mphf)": "6327657", "step 5 (replacing minimizer values with MPHF hashes)": "5421177", "step 6 (merging minimizers tuples)": "23819215", "step 7.1 (build sparse index)": "3231403", "step 7.2 (build skew index)": "1625294", "step 7 (build sparse and skew index)": "5254098", "total_build_time_in_microsec": "55000339", "index_size_in_bytes": "1148491786", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.log new file mode 100644 index 0000000..24bccbf --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.log @@ -0,0 +1,281 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash +2026-03-12 17:41:46: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23912 [sec] (2.88909 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.59388 [sec] (1.43429 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.1789 [sec] (8.05325 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.9848 [sec] (7.57672 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333706386778429.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.0537 [sec] (6.00782 [ns/kmer]) +=== step 6 (merging minimizers tuples): 43.9343 [sec] (17.5339 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10854445/386687326 (2.80703%) +num_buckets_in_skew_index 4679/386687326 (0.00121002%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 44599043/423023926 (10.5429%) +num_minimizer_positions_of_buckets_in_skew_index 2596681/423023926 (0.613838%) +=== step 7.1 (build sparse index): 4.99195 [sec] (1.99226 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 101047 + partition = 6: num kmers in buckets of size > 16384 and <= 22972: 106463 +num kmers in skew index = 11866977 (0.473603%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 5343660 kmers; bits/key = 2.66261 + built positions[0] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3076413 kmers; bits/key = 3.34298 + built positions[1] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[2] for 1855446 kmers; bits/key = 2.56022 + built positions[2] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[3] for 1008178 kmers; bits/key = 2.41829 + built positions[3] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 375770)... + built mphs[4] for 375770 kmers; bits/key = 2.42182 + built positions[4] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 101047 + building MPHF with 16 threads and 1 partitions (avg. partition size = 101047)... + built mphs[5] for 101047 kmers; bits/key = 2.43752 + built positions[5] for 101047 kmers; bits/key = 14.0038 + lower = 16384; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 106463 + building MPHF with 16 threads and 1 partitions (avg. partition size = 106463)... + built mphs[6] for 106463 kmers; bits/key = 2.436 + built positions[6] for 106463 kmers; bits/key = 15.0034 +=== step 7.2 (build skew index): 3.34706 [sec] (1.33579 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 9.23066 [sec] (3.68389 [ns/kmer]) +=== total time: 118.215 [sec] (47.179 [ns/kmer]) +total index size: 2690803984 [B] -- 2690.8 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.07736% + strings_offsets: 0.153147 [bits/kmer] -- 1.78263% + control_codewords: 5.0927 [bits/kmer] -- 59.2791% + mid_load_buckets: 0.569574 [bits/kmer] -- 6.62985% + begin_buckets_of_size: 3.30769e-06 [bits/kmer] -- 3.85015e-05% + strings: 2.24545 [bits/kmer] -- 26.1371% + skew_index: 0.0939766 [bits/kmer] -- 1.09389% + weights: 5.87466e-07 [bits/kmer] -- 6.8381e-06% + -------------- + total: 8.59106 [bits/kmer] +2026-03-12 17:43:44: saving data structure to disk... +2026-03-12 17:43:45: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash +2026-03-12 17:43:46: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55193 [sec] (5.08988 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.36444 [sec] (1.52569 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.26182 [sec] (9.2382 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.32766 [sec] (7.07546 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773333826031954160.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.42118 [sec] (6.06185 [ns/kmer]) +=== step 6 (merging minimizers tuples): 23.8192 [sec] (26.6342 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14066528/126246665 (11.1421%) +num_buckets_in_skew_index 1006/126246665 (0.000796853%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48928943/162006751 (30.2018%) +num_minimizer_positions_of_buckets_in_skew_index 898677/162006751 (0.554716%) +=== step 7.1 (build sparse index): 3.2314 [sec] (3.61329 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 175304 + partition = 6: num kmers in buckets of size > 16384 and <= 32768: 104836 + partition = 7: num kmers in buckets of size > 32768 and <= 36894: 108362 +num kmers in skew index = 3028681 (0.338661%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[0] for 885561 kmers; bits/key = 2.56147 + built positions[0] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[1] for 591648 kmers; bits/key = 2.56263 + built positions[1] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[2] for 450833 kmers; bits/key = 2.42098 + built positions[2] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 16 threads and 1 partitions (avg. partition size = 373731)... + built mphs[3] for 373731 kmers; bits/key = 2.56484 + built positions[3] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 16 threads and 1 partitions (avg. partition size = 338406)... + built mphs[4] for 338406 kmers; bits/key = 2.4226 + built positions[4] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 175304 + building MPHF with 16 threads and 1 partitions (avg. partition size = 175304)... + built mphs[5] for 175304 kmers; bits/key = 2.42833 + built positions[5] for 175304 kmers; bits/key = 14.0019 + lower = 16384; upper = 32768; num_bits_per_pos = 15; num_kmers_in_partition = 104836 + building MPHF with 16 threads and 1 partitions (avg. partition size = 104836)... + built mphs[6] for 104836 kmers; bits/key = 2.43672 + built positions[6] for 104836 kmers; bits/key = 15.0031 + lower = 32768; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 108362 + building MPHF with 16 threads and 1 partitions (avg. partition size = 108362)... + built mphs[7] for 108362 kmers; bits/key = 2.43584 + built positions[7] for 108362 kmers; bits/key = 16.0032 +=== step 7.2 (build skew index): 1.62529 [sec] (1.81737 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 5.2541 [sec] (5.87503 [ns/kmer]) +=== total time: 55.0003 [sec] (61.5003 [ns/kmer]) +total index size: 1148491786 [B] -- 1148.49 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.94891% + strings_offsets: 0.333373 [bits/kmer] -- 3.24489% + control_codewords: 4.6585 [bits/kmer] -- 45.3436% + mid_load_buckets: 1.69605 [bits/kmer] -- 16.5086% + begin_buckets_of_size: 9.26748e-06 [bits/kmer] -- 9.02053e-05% + strings: 3.10303 [bits/kmer] -- 30.2034% + skew_index: 0.0771007 [bits/kmer] -- 0.750462% + weights: 1.64596e-06 [bits/kmer] -- 1.6021e-05% + -------------- + total: 10.2738 [bits/kmer] +2026-03-12 17:44:41: saving data structure to disk... +2026-03-12 17:44:41: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.time.log new file mode 100644 index 0000000..4aabe8a --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k31.l8.sshash" + User time (seconds): 259.48 + System time (seconds): 44.52 + Percent of CPU this job got: 254% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:59.64 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16073716 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13305536 + Voluntary context switches: 40835 + Involuntary context switches: 7264 + Swaps: 0 + File system inputs: 184 + File system outputs: 63551408 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k31.l8.sshash" + User time (seconds): 76.69 + System time (seconds): 17.19 + Percent of CPU this job got: 168% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:55.62 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6406876 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5287424 + Voluntary context switches: 2676 + Involuntary context switches: 1815 + Swaps: 0 + File system inputs: 72 + File system outputs: 23804512 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.json b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.json new file mode 100644 index 0000000..eca6e1b --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "732.042764", "negative lookup (avg_nanosec_per_kmer)": "507.464542", "access (avg_nanosec_per_kmer)": "356.353327", "iterator (avg_nanosec_per_kmer)": "2.724046"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "736.899675", "negative lookup (avg_nanosec_per_kmer)": "514.279309", "access (avg_nanosec_per_kmer)": "356.310398", "iterator (avg_nanosec_per_kmer)": "2.723453"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "744.172899", "negative lookup (avg_nanosec_per_kmer)": "518.371224", "access (avg_nanosec_per_kmer)": "359.404171", "iterator (avg_nanosec_per_kmer)": "2.728790"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1214.817451", "negative lookup (avg_nanosec_per_kmer)": "498.832210", "access (avg_nanosec_per_kmer)": "406.164529", "iterator (avg_nanosec_per_kmer)": "2.763313"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1218.151549", "negative lookup (avg_nanosec_per_kmer)": "499.100114", "access (avg_nanosec_per_kmer)": "409.177891", "iterator (avg_nanosec_per_kmer)": "2.856331"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1217.751327", "negative lookup (avg_nanosec_per_kmer)": "499.075542", "access (avg_nanosec_per_kmer)": "405.840375", "iterator (avg_nanosec_per_kmer)": "2.769486"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.log new file mode 100644 index 0000000..82d9b24 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 732.043 +negative lookup (avg_nanosec_per_kmer) 507.465 +access (avg_nanosec_per_kmer) = 356.353 +iterator (avg_nanosec_per_kmer) = 2.72405 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 736.9 +negative lookup (avg_nanosec_per_kmer) 514.279 +access (avg_nanosec_per_kmer) = 356.31 +iterator (avg_nanosec_per_kmer) = 2.72345 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 744.173 +negative lookup (avg_nanosec_per_kmer) 518.371 +access (avg_nanosec_per_kmer) = 359.404 +iterator (avg_nanosec_per_kmer) = 2.72879 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1214.82 +negative lookup (avg_nanosec_per_kmer) 498.832 +access (avg_nanosec_per_kmer) = 406.165 +iterator (avg_nanosec_per_kmer) = 2.76331 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1218.15 +negative lookup (avg_nanosec_per_kmer) 499.1 +access (avg_nanosec_per_kmer) = 409.178 +iterator (avg_nanosec_per_kmer) = 2.85633 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1217.75 +negative lookup (avg_nanosec_per_kmer) 499.076 +access (avg_nanosec_per_kmer) = 405.84 +iterator (avg_nanosec_per_kmer) = 2.76949 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.json b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.json new file mode 100644 index 0000000..0d53297 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6805402", "step 2 (compute minimizer tuples)": "3518886", "step 3 (merging minimizer tuples)": "12908965", "step 4 (build mphf)": "7870968", "step 5 (replacing minimizer values with MPHF hashes)": "6256549", "step 6 (merging minimizers tuples)": "14586522", "step 7.1 (build sparse index)": "2254076", "step 7.2 (build skew index)": "14413433", "step 7 (build sparse and skew index)": "17063249", "total_build_time_in_microsec": "69010541", "index_size_in_bytes": "1765892698", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7221631", "step 2 (compute minimizer tuples)": "2540364", "step 3 (merging minimizer tuples)": "6840849", "step 4 (build mphf)": "3807668", "step 5 (replacing minimizer values with MPHF hashes)": "4137565", "step 6 (merging minimizers tuples)": "20885320", "step 7.1 (build sparse index)": "3099620", "step 7.2 (build skew index)": "8781924", "step 7 (build sparse and skew index)": "12236749", "total_build_time_in_microsec": "57670146", "index_size_in_bytes": "1351738536", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.log new file mode 100644 index 0000000..e910da8 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash --canonical +2026-03-12 17:53:43: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.8054 [sec] (2.45566 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.51889 [sec] (1.26975 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.909 [sec] (4.65806 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.87097 [sec] (2.84016 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334423802651364.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.25655 [sec] (2.25761 [ns/kmer]) +=== step 6 (merging minimizers tuples): 14.5865 [sec] (5.26339 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4111166/149769567 (2.74499%) +num_buckets_in_skew_index 7364/149769567 (0.00491689%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 19817728/173272792 (11.4373%) +num_minimizer_positions_of_buckets_in_skew_index 7804027/173272792 (4.5039%) +=== step 7.1 (build sparse index): 2.25408 [sec] (0.813359 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 9521979 + partition = 6: num kmers in buckets of size > 16384 and <= 32768: 7689242 + partition = 7: num kmers in buckets of size > 32768 and <= 284250: 11479354 +num kmers in skew index = 114811888 (4.14287%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 21113117 kmers; bits/key = 2.6904 + built positions[0] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19260150 kmers; bits/key = 2.59757 + built positions[1] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 17989259 kmers; bits/key = 2.53715 + built positions[2] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 15443443 kmers; bits/key = 2.69254 + built positions[3] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 12315344 kmers; bits/key = 2.67895 + built positions[4] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 9521979 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[5] for 9521979 kmers; bits/key = 2.75746 + built positions[5] for 9521979 kmers; bits/key = 14 + lower = 16384; upper = 32768; num_bits_per_pos = 15; num_kmers_in_partition = 7689242 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[6] for 7689242 kmers; bits/key = 2.64642 + built positions[6] for 7689242 kmers; bits/key = 15 + lower = 32768; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 11479354 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 11479354 kmers; bits/key = 2.54492 + built positions[7] for 11479354 kmers; bits/key = 19 +=== step 7.2 (build skew index): 14.4134 [sec] (5.20093 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 17.0632 [sec] (6.15709 [ns/kmer]) +=== total time: 69.0105 [sec] (24.9017 [ns/kmer]) +total index size: 1765892698 [B] -- 1765.89 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.99442% + strings_offsets: 0.11255 [bits/kmer] -- 2.20789% + control_codewords: 1.78341 [bits/kmer] -- 34.9851% + mid_load_buckets: 0.228833 [bits/kmer] -- 4.489% + begin_buckets_of_size: 2.99064e-06 [bits/kmer] -- 5.86672e-05% + strings: 2.11826 [bits/kmer] -- 41.5537% + skew_index: 0.701932 [bits/kmer] -- 13.7698% + weights: 5.31156e-07 [bits/kmer] -- 1.04197e-05% + -------------- + total: 5.09763 [bits/kmer] +2026-03-12 17:54:52: saving data structure to disk... +2026-03-12 17:54:53: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash --canonical +2026-03-12 17:54:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22163 [sec] (4.73579 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.54036 [sec] (1.66592 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.84085 [sec] (4.48608 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.80767 [sec] (2.49699 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334493796299847.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.13757 [sec] (2.71333 [ns/kmer]) +=== step 6 (merging minimizers tuples): 20.8853 [sec] (13.6962 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 12272535/69577229 (17.6387%) +num_buckets_in_skew_index 4773/69577229 (0.00686%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 65524580/126350163 (51.8595%) +num_minimizer_positions_of_buckets_in_skew_index 3525662/126350163 (2.79039%) +=== step 7.1 (build sparse index): 3.09962 [sec] (2.03267 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 2527445 + partition = 6: num kmers in buckets of size > 16384 and <= 32768: 1262486 + partition = 7: num kmers in buckets of size > 32768 and <= 245177: 6966363 +num kmers in skew index = 39447131 (2.58686%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 13974034 kmers; bits/key = 2.56375 + built positions[0] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 6504754 kmers; bits/key = 2.78902 + built positions[1] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3263868 kmers; bits/key = 3.30619 + built positions[2] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[3] for 2527517 kmers; bits/key = 2.55992 + built positions[3] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[4] for 2420664 kmers; bits/key = 2.55997 + built positions[4] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 2527445 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527445)... + built mphs[5] for 2527445 kmers; bits/key = 2.55994 + built positions[5] for 2527445 kmers; bits/key = 14.0001 + lower = 16384; upper = 32768; num_bits_per_pos = 15; num_kmers_in_partition = 1262486 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1262486)... + built mphs[6] for 1262486 kmers; bits/key = 2.56073 + built positions[6] for 1262486 kmers; bits/key = 15.0003 + lower = 32768; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 6966363 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 6966363 kmers; bits/key = 2.81632 + built positions[7] for 6966363 kmers; bits/key = 18 +=== step 7.2 (build skew index): 8.78192 [sec] (5.759 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 12.2367 [sec] (8.0246 [ns/kmer]) +=== total time: 57.6701 [sec] (37.8189 [ns/kmer]) +total index size: 1351738536 [B] -- 1351.74 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.86613% + strings_offsets: 0.274587 [bits/kmer] -- 3.87204% + control_codewords: 1.5057 [bits/kmer] -- 21.2324% + mid_load_buckets: 1.37503 [bits/kmer] -- 19.3897% + begin_buckets_of_size: 5.4351e-06 [bits/kmer] -- 7.6642e-05% + strings: 3.35283 [bits/kmer] -- 47.2793% + skew_index: 0.451047 [bits/kmer] -- 6.36037% + weights: 9.65307e-07 [bits/kmer] -- 1.36121e-05% + -------------- + total: 7.09153 [bits/kmer] +2026-03-12 17:55:51: saving data structure to disk... +2026-03-12 17:55:52: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.time.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.time.log new file mode 100644 index 0000000..5dc2ef5 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.canon.sshash --canonical" + User time (seconds): 159.64 + System time (seconds): 20.40 + Percent of CPU this job got: 257% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:09.99 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7401688 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7034573 + Voluntary context switches: 1907 + Involuntary context switches: 2663 + Swaps: 0 + File system inputs: 88 + File system outputs: 28003040 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.canon.sshash --canonical" + User time (seconds): 83.73 + System time (seconds): 13.86 + Percent of CPU this job got: 167% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:58.42 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5463756 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4696116 + Voluntary context switches: 1859 + Involuntary context switches: 1810 + Swaps: 0 + File system inputs: 72 + File system outputs: 19027520 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.json b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.json new file mode 100644 index 0000000..fe67375 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "896.532942", "negative lookup (avg_nanosec_per_kmer)": "867.414538", "access (avg_nanosec_per_kmer)": "355.167072", "iterator (avg_nanosec_per_kmer)": "2.731437"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "911.857515", "negative lookup (avg_nanosec_per_kmer)": "865.270283", "access (avg_nanosec_per_kmer)": "358.926339", "iterator (avg_nanosec_per_kmer)": "2.713315"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "902.880285", "negative lookup (avg_nanosec_per_kmer)": "861.712309", "access (avg_nanosec_per_kmer)": "360.404421", "iterator (avg_nanosec_per_kmer)": "2.744831"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1220.093930", "negative lookup (avg_nanosec_per_kmer)": "851.979027", "access (avg_nanosec_per_kmer)": "411.829261", "iterator (avg_nanosec_per_kmer)": "2.754139"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1217.188762", "negative lookup (avg_nanosec_per_kmer)": "839.179825", "access (avg_nanosec_per_kmer)": "406.840707", "iterator (avg_nanosec_per_kmer)": "2.767536"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1217.561753", "negative lookup (avg_nanosec_per_kmer)": "843.070124", "access (avg_nanosec_per_kmer)": "407.428862", "iterator (avg_nanosec_per_kmer)": "2.797190"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.log new file mode 100644 index 0000000..7e4e4b2 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 896.533 +negative lookup (avg_nanosec_per_kmer) 867.415 +access (avg_nanosec_per_kmer) = 355.167 +iterator (avg_nanosec_per_kmer) = 2.73144 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 911.858 +negative lookup (avg_nanosec_per_kmer) 865.27 +access (avg_nanosec_per_kmer) = 358.926 +iterator (avg_nanosec_per_kmer) = 2.71331 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 902.88 +negative lookup (avg_nanosec_per_kmer) 861.712 +access (avg_nanosec_per_kmer) = 360.404 +iterator (avg_nanosec_per_kmer) = 2.74483 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 1220.09 +negative lookup (avg_nanosec_per_kmer) 851.979 +access (avg_nanosec_per_kmer) = 411.829 +iterator (avg_nanosec_per_kmer) = 2.75414 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 1217.19 +negative lookup (avg_nanosec_per_kmer) 839.18 +access (avg_nanosec_per_kmer) = 406.841 +iterator (avg_nanosec_per_kmer) = 2.76754 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash +positive lookup (avg_nanosec_per_kmer) = 1217.56 +negative lookup (avg_nanosec_per_kmer) 843.07 +access (avg_nanosec_per_kmer) = 407.429 +iterator (avg_nanosec_per_kmer) = 2.79719 diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.json b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.json new file mode 100644 index 0000000..0788779 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6818838", "step 2 (compute minimizer tuples)": "2045847", "step 3 (merging minimizer tuples)": "6292820", "step 4 (build mphf)": "6267551", "step 5 (replacing minimizer values with MPHF hashes)": "4796710", "step 6 (merging minimizers tuples)": "11971315", "step 7.1 (build sparse index)": "1642127", "step 7.2 (build skew index)": "11705032", "step 7 (build sparse and skew index)": "13646570", "total_build_time_in_microsec": "51839651", "index_size_in_bytes": "1583231240", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7239718", "step 2 (compute minimizer tuples)": "1504710", "step 3 (merging minimizer tuples)": "5461216", "step 4 (build mphf)": "3499512", "step 5 (replacing minimizer values with MPHF hashes)": "3432166", "step 6 (merging minimizers tuples)": "15425407", "step 7.1 (build sparse index)": "2468104", "step 7.2 (build skew index)": "8905150", "step 7 (build sparse and skew index)": "11667378", "total_build_time_in_microsec": "48230107", "index_size_in_bytes": "1234200254", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.log new file mode 100644 index 0000000..a64564f --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash +2026-03-12 17:50:42: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81884 [sec] (2.46051 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.04585 [sec] (0.738222 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.29282 [sec] (2.2707 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.26755 [sec] (2.26158 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334242591321268.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.79671 [sec] (1.73084 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.9713 [sec] (4.31972 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3119564/122838669 (2.53956%) +num_buckets_in_skew_index 5829/122838669 (0.00474525%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 15319693/140756047 (10.8839%) +num_minimizer_positions_of_buckets_in_skew_index 5723078/140756047 (4.06596%) +=== step 7.1 (build sparse index): 1.64213 [sec] (0.592544 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 6145851 + partition = 6: num kmers in buckets of size > 16384 and <= 32768: 7836580 + partition = 7: num kmers in buckets of size > 32768 and <= 147936: 6272691 +num kmers in skew index = 98341551 (3.54855%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[0] for 19634878 kmers; bits/key = 2.55594 + built positions[0] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 18051454 kmers; bits/key = 2.60116 + built positions[1] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 17018125 kmers; bits/key = 2.58264 + built positions[2] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 14085569 kmers; bits/key = 2.54674 + built positions[3] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[4] for 9296403 kmers; bits/key = 2.72206 + built positions[4] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 6145851 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[5] for 6145851 kmers; bits/key = 2.92757 + built positions[5] for 6145851 kmers; bits/key = 14.0001 + lower = 16384; upper = 32768; num_bits_per_pos = 15; num_kmers_in_partition = 7836580 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[6] for 7836580 kmers; bits/key = 2.60449 + built positions[6] for 7836580 kmers; bits/key = 15 + lower = 32768; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 6272691 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 6272691 kmers; bits/key = 2.87679 + built positions[7] for 6272691 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 11.705 [sec] (4.22364 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 13.6466 [sec] (4.92422 [ns/kmer]) +=== total time: 51.8397 [sec] (18.7058 [ns/kmer]) +total index size: 1583231240 [B] -- 1583.23 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.73965% + strings_offsets: 0.11255 [bits/kmer] -- 2.46262% + control_codewords: 1.46273 [bits/kmer] -- 32.0048% + mid_load_buckets: 0.176894 [bits/kmer] -- 3.87049% + begin_buckets_of_size: 2.99064e-06 [bits/kmer] -- 6.54358e-05% + strings: 2.11826 [bits/kmer] -- 46.3479% + skew_index: 0.574697 [bits/kmer] -- 12.5745% + weights: 5.31156e-07 [bits/kmer] -- 1.16218e-05% + -------------- + total: 4.57034 [bits/kmer] +2026-03-12 17:51:34: saving data structure to disk... +2026-03-12 17:51:35: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash +2026-03-12 17:51:35: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23972 [sec] (4.74765 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.50471 [sec] (0.986757 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.46122 [sec] (3.58135 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.49951 [sec] (2.29491 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773334295318727846.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.43217 [sec] (2.25074 [ns/kmer]) +=== step 6 (merging minimizers tuples): 15.4254 [sec] (10.1157 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10033509/61951224 (16.1958%) +num_buckets_in_skew_index 2744/61951224 (0.00442929%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 51127318/105337248 (48.5368%) +num_minimizer_positions_of_buckets_in_skew_index 2294959/105337248 (2.17868%) +=== step 7.1 (build sparse index): 2.4681 [sec] (1.61853 [ns/kmer]) + partition = 0: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 1: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 2: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 3: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 4: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 5: num kmers in buckets of size > 8192 and <= 16384: 1071910 + partition = 6: num kmers in buckets of size > 16384 and <= 32768: 597919 + partition = 7: num kmers in buckets of size > 32768 and <= 144478: 5748301 +num kmers in skew index = 27625289 (1.81161%) + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 8174536 kmers; bits/key = 2.61889 + built positions[0] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3454318 kmers; bits/key = 3.14686 + built positions[1] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[2] for 2781070 kmers; bits/key = 2.55987 + built positions[2] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[3] for 2981930 kmers; bits/key = 2.55978 + built positions[3] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[4] for 2815305 kmers; bits/key = 2.55984 + built positions[4] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 16384; num_bits_per_pos = 14; num_kmers_in_partition = 1071910 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1071910)... + built mphs[5] for 1071910 kmers; bits/key = 2.41822 + built positions[5] for 1071910 kmers; bits/key = 14.0003 + lower = 16384; upper = 32768; num_bits_per_pos = 15; num_kmers_in_partition = 597919 + building MPHF with 16 threads and 1 partitions (avg. partition size = 597919)... + built mphs[6] for 597919 kmers; bits/key = 2.41973 + built positions[6] for 597919 kmers; bits/key = 15.0006 + lower = 32768; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 5748301 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 5748301 kmers; bits/key = 2.5045 + built positions[7] for 5748301 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 8.90515 [sec] (5.83981 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 11.6674 [sec] (7.65122 [ns/kmer]) +=== total time: 48.2301 [sec] (31.6283 [ns/kmer]) +total index size: 1234200254 [B] -- 1234.2 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.79289% + strings_offsets: 0.274587 [bits/kmer] -- 4.24079% + control_codewords: 1.34067 [bits/kmer] -- 20.7056% + mid_load_buckets: 1.0729 [bits/kmer] -- 16.5702% + begin_buckets_of_size: 5.4351e-06 [bits/kmer] -- 8.3941e-05% + strings: 3.35283 [bits/kmer] -- 51.7819% + skew_index: 0.317822 [bits/kmer] -- 4.90852% + weights: 9.65307e-07 [bits/kmer] -- 1.49084e-05% + -------------- + total: 6.4749 [bits/kmer] +2026-03-12 17:52:23: saving data structure to disk... +2026-03-12 17:52:24: DONE diff --git a/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.time.log b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.time.log new file mode 100644 index 0000000..8b9a1e5 --- /dev/null +++ b/benchmarks/results-sweep-l-12-03-26/l8/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/human.k63.l8.sshash" + User time (seconds): 119.85 + System time (seconds): 17.09 + Percent of CPU this job got: 259% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:52.72 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6218384 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5992195 + Voluntary context switches: 3026 + Involuntary context switches: 2362 + Swaps: 0 + File system inputs: 48 + File system outputs: 22283712 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-l-indexes/se.k63.l8.sshash" + User time (seconds): 65.36 + System time (seconds): 11.49 + Percent of CPU this job got: 157% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:48.91 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4525044 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 3819028 + Voluntary context switches: 1751 + Involuntary context switches: 1679 + Swaps: 0 + File system inputs: 64 + File system outputs: 15715264 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.json new file mode 100644 index 0000000..0641e01 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "693.850576", "negative lookup (avg_nanosec_per_kmer)": "451.055484", "access (avg_nanosec_per_kmer)": "346.769586", "iterator (avg_nanosec_per_kmer)": "2.465789"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "688.555253", "negative lookup (avg_nanosec_per_kmer)": "452.041315", "access (avg_nanosec_per_kmer)": "357.459507", "iterator (avg_nanosec_per_kmer)": "2.647531"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "693.711235", "negative lookup (avg_nanosec_per_kmer)": "449.164775", "access (avg_nanosec_per_kmer)": "349.046761", "iterator (avg_nanosec_per_kmer)": "2.464475"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "701.329252", "negative lookup (avg_nanosec_per_kmer)": "401.445138", "access (avg_nanosec_per_kmer)": "357.766107", "iterator (avg_nanosec_per_kmer)": "2.424100"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "695.685945", "negative lookup (avg_nanosec_per_kmer)": "403.100032", "access (avg_nanosec_per_kmer)": "360.147575", "iterator (avg_nanosec_per_kmer)": "2.399567"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash", "k": "31", "m": "17", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "697.412868", "negative lookup (avg_nanosec_per_kmer)": "395.397547", "access (avg_nanosec_per_kmer)": "360.473420", "iterator (avg_nanosec_per_kmer)": "2.423397"} diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.log new file mode 100644 index 0000000..4be63db --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 693.851 +negative lookup (avg_nanosec_per_kmer) 451.055 +access (avg_nanosec_per_kmer) = 346.77 +iterator (avg_nanosec_per_kmer) = 2.46579 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 688.555 +negative lookup (avg_nanosec_per_kmer) 452.041 +access (avg_nanosec_per_kmer) = 357.46 +iterator (avg_nanosec_per_kmer) = 2.64753 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 693.711 +negative lookup (avg_nanosec_per_kmer) 449.165 +access (avg_nanosec_per_kmer) = 349.047 +iterator (avg_nanosec_per_kmer) = 2.46448 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 701.329 +negative lookup (avg_nanosec_per_kmer) 401.445 +access (avg_nanosec_per_kmer) = 357.766 +iterator (avg_nanosec_per_kmer) = 2.4241 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 695.686 +negative lookup (avg_nanosec_per_kmer) 403.1 +access (avg_nanosec_per_kmer) = 360.148 +iterator (avg_nanosec_per_kmer) = 2.39957 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 697.413 +negative lookup (avg_nanosec_per_kmer) 395.398 +access (avg_nanosec_per_kmer) = 360.473 +iterator (avg_nanosec_per_kmer) = 2.4234 diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.json new file mode 100644 index 0000000..227a161 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "17", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7202850", "step 2 (compute minimizer tuples)": "4032946", "step 3 (merging minimizer tuples)": "21450790", "step 4 (build mphf)": "13189309", "step 5 (replacing minimizer values with MPHF hashes)": "13718708", "step 6 (merging minimizers tuples)": "37352893", "step 7.1 (build sparse index)": "9628762", "step 7.2 (build skew index)": "16627925", "step 7 (build sparse and skew index)": "27326608", "total_build_time_in_microsec": "124274104", "index_size_in_bytes": "2867333320", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "17", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4559363", "step 2 (compute minimizer tuples)": "1732743", "step 3 (merging minimizer tuples)": "8248836", "step 4 (build mphf)": "4594888", "step 5 (replacing minimizer values with MPHF hashes)": "5071388", "step 6 (merging minimizers tuples)": "12545847", "step 7.1 (build sparse index)": "3857431", "step 7.2 (build skew index)": "6738707", "step 7 (build sparse and skew index)": "11022926", "total_build_time_in_microsec": "47775991", "index_size_in_bytes": "1126974146", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.log new file mode 100644 index 0000000..bb18eaf --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.log @@ -0,0 +1,285 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash --canonical +2026-03-12 18:22:22: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.20285 [sec] (2.87461 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.17.bin'... +=== step 2 (compute minimizer tuples): 4.03295 [sec] (1.60952 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 21.4508 [sec] (8.56087 [ns/kmer]) +num_minimizers = 257573300 +num_minimizer_positions = 390041614 +num_super_kmers = 407329090 +building minimizers MPHF with 16 threads and 86 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 13.1893 [sec] (5.26377 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336142489061091.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 13.7187 [sec] (5.47505 [ns/kmer]) +=== step 6 (merging minimizers tuples): 37.3529 [sec] (14.9073 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 34566934 +bits_for_list_id = 26 +num_bits_for_control = 34 +num_buckets_larger_than_1_not_in_skew_index 53599552/257573300 (20.8094%) +num_buckets_in_skew_index 102145/257573300 (0.0396567%) +max_bucket_size 43201 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 164601821/390041614 (42.2011%) +num_minimizer_positions_of_buckets_in_skew_index 21568190/390041614 (5.52972%) +=== step 7.1 (build sparse index): 9.62876 [sec] (3.84278 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 32935302 + partition = 1: num kmers in buckets of size > 128 and <= 256: 25734767 + partition = 2: num kmers in buckets of size > 256 and <= 512: 20792425 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 15917364 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 10989483 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 7593320 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 5155723 + partition = 7: num kmers in buckets of size > 8192 and <= 43201: 6209464 +num kmers in skew index = 125327848 (5.00175%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32935302 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 32935302 kmers; bits/key = 2.52487 + built positions[0] for 32935302 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 25734767 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 25734767 kmers; bits/key = 2.54852 + built positions[1] for 25734767 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 20792425 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 20792425 kmers; bits/key = 2.49866 + built positions[2] for 20792425 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 15917364 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 15917364 kmers; bits/key = 2.67862 + built positions[3] for 15917364 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 10989483 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[4] for 10989483 kmers; bits/key = 2.60084 + built positions[4] for 10989483 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 7593320 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[5] for 7593320 kmers; bits/key = 2.61816 + built positions[5] for 7593320 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 5155723 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 5155723 kmers; bits/key = 2.66136 + built positions[6] for 5155723 kmers; bits/key = 13.0001 + lower = 8192; upper = 43201; num_bits_per_pos = 16; num_kmers_in_partition = 6209464 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 6209464 kmers; bits/key = 2.90183 + built positions[7] for 6209464 kmers; bits/key = 16.0001 +=== step 7.2 (build skew index): 16.6279 [sec] (6.6361 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 79.1509% +buckets with 2 minimizer positions = 13.4202% +buckets with 3 minimizer positions = 3.79529% +buckets with 4 minimizer positions = 1.45653% +buckets with 5 minimizer positions = 0.695178% +buckets with 6 minimizer positions = 0.387084% +buckets with 7 minimizer positions = 0.239602% +buckets with 8 minimizer positions = 0.160886% +buckets with 9 minimizer positions = 0.113528% +buckets with 10 minimizer positions = 0.0844567% +buckets with 11 minimizer positions = 0.0648215% +buckets with 12 minimizer positions = 0.0512355% +buckets with 13 minimizer positions = 0.040932% +buckets with 14 minimizer positions = 0.0336526% +buckets with 15 minimizer positions = 0.0284063% +buckets with 16 minimizer positions = 0.0237676% +max_bucket_size = 43201 +=== step 7 (build sparse and skew index): 27.3266 [sec] (10.9059 [ns/kmer]) +=== total time: 124.274 [sec] (49.597 [ns/kmer]) +total index size: 2867333320 [B] -- 2867.33 [MB] +SPACE BREAKDOWN: + mphf: 0.290923 [bits/kmer] (2.8301 [bits/key]) -- 3.17786% + strings_offsets: 0.153147 [bits/kmer] -- 1.67288% + control_codewords: 3.49506 [bits/kmer] -- 38.1779% + mid_load_buckets: 2.10213 [bits/kmer] -- 22.9624% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.34666e-06% + strings: 2.24545 [bits/kmer] -- 24.528% + skew_index: 0.867961 [bits/kmer] -- 9.48107% + weights: 5.87466e-07 [bits/kmer] -- 6.41711e-06% + -------------- + total: 9.15467 [bits/kmer] +2026-03-12 18:24:26: saving data structure to disk... +2026-03-12 18:24:28: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash --canonical +2026-03-12 18:52:00: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55936 [sec] (5.09819 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.73274 [sec] (1.93752 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.24884 [sec] (9.22369 [ns/kmer]) +num_minimizers = 90760041 +num_minimizer_positions = 150933269 +num_super_kmers = 156804883 +building minimizers MPHF with 16 threads and 31 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 4.59489 [sec] (5.13791 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337920448583863.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.07139 [sec] (5.67073 [ns/kmer]) +=== step 6 (merging minimizers tuples): 12.5458 [sec] (14.0285 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10049318 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 18102465/90760041 (19.9454%) +num_buckets_in_skew_index 32827/90760041 (0.036169%) +max_bucket_size 71959 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 72933260/150933269 (48.3215%) +num_minimizer_positions_of_buckets_in_skew_index 5375260/150933269 (3.56135%) +=== step 7.1 (build sparse index): 3.85743 [sec] (4.3133 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 12484915 + partition = 1: num kmers in buckets of size > 128 and <= 256: 6211067 + partition = 2: num kmers in buckets of size > 256 and <= 512: 3339455 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 2305854 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1586424 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1393755 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 937328 + partition = 7: num kmers in buckets of size > 8192 and <= 71959: 1821068 +num kmers in skew index = 30079866 (3.36347%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 12484915 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 12484915 kmers; bits/key = 2.64823 + built positions[0] for 12484915 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 6211067 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 6211067 kmers; bits/key = 2.90122 + built positions[1] for 6211067 kmers; bits/key = 8.00006 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 3339455 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3339455 kmers; bits/key = 3.2408 + built positions[2] for 3339455 kmers; bits/key = 9.0001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 2305854 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2305854)... + built mphs[3] for 2305854 kmers; bits/key = 2.56 + built positions[3] for 2305854 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1586424 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1586424)... + built mphs[4] for 1586424 kmers; bits/key = 2.41752 + built positions[4] for 1586424 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1393755 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1393755)... + built mphs[5] for 1393755 kmers; bits/key = 2.5606 + built positions[5] for 1393755 kmers; bits/key = 12.0003 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 937328 + building MPHF with 16 threads and 1 partitions (avg. partition size = 937328)... + built mphs[6] for 937328 kmers; bits/key = 2.41848 + built positions[6] for 937328 kmers; bits/key = 13.0004 + lower = 8192; upper = 71959; num_bits_per_pos = 17; num_kmers_in_partition = 1821068 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1821068)... + built mphs[7] for 1821068 kmers; bits/key = 2.56023 + built positions[7] for 1821068 kmers; bits/key = 17.0002 +=== step 7.2 (build skew index): 6.73871 [sec] (7.53509 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 80.0184% +buckets with 2 minimizer positions = 11.0724% +buckets with 3 minimizer positions = 3.52819% +buckets with 4 minimizer positions = 1.59728% +buckets with 5 minimizer positions = 0.891066% +buckets with 6 minimizer positions = 0.561201% +buckets with 7 minimizer positions = 0.385857% +buckets with 8 minimizer positions = 0.281767% +buckets with 9 minimizer positions = 0.216731% +buckets with 10 minimizer positions = 0.171421% +buckets with 11 minimizer positions = 0.140259% +buckets with 12 minimizer positions = 0.118164% +buckets with 13 minimizer positions = 0.101219% +buckets with 14 minimizer positions = 0.0882547% +buckets with 15 minimizer positions = 0.0771871% +buckets with 16 minimizer positions = 0.0687009% +max_bucket_size = 71959 +=== step 7 (build sparse and skew index): 11.0229 [sec] (12.3256 [ns/kmer]) +=== total time: 47.776 [sec] (53.4222 [ns/kmer]) +total index size: 1126974146 [B] -- 1126.97 [MB] +SPACE BREAKDOWN: + mphf: 0.29188 [bits/kmer] (2.87606 [bits/key]) -- 2.89527% + strings_offsets: 0.333373 [bits/kmer] -- 3.30685% + control_codewords: 3.24756 [bits/kmer] -- 32.2137% + mid_load_buckets: 2.52813 [bits/kmer] -- 25.0775% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.37805e-05% + strings: 3.10303 [bits/kmer] -- 30.7801% + skew_index: 0.577309 [bits/kmer] -- 5.72655% + weights: 1.64596e-06 [bits/kmer] -- 1.63269e-05% + -------------- + total: 10.0813 [bits/kmer] +2026-03-12 18:52:48: saving data structure to disk... +2026-03-12 18:52:48: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.time.log new file mode 100644 index 0000000..48d8a63 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.canon.sshash --canonical" + User time (seconds): 286.66 + System time (seconds): 42.99 + Percent of CPU this job got: 262% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:05.80 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 15237336 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 14358698 + Voluntary context switches: 29434 + Involuntary context switches: 6712 + Swaps: 0 + File system inputs: 240 + File system outputs: 57673992 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.canon.sshash --canonical" + User time (seconds): 77.23 + System time (seconds): 16.39 + Percent of CPU this job got: 193% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:48.41 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6078552 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5321485 + Voluntary context switches: 2513 + Involuntary context switches: 1690 + Swaps: 0 + File system inputs: 72 + File system outputs: 21956880 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.json new file mode 100644 index 0000000..08a6d14 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "865.445638", "negative lookup (avg_nanosec_per_kmer)": "822.725279", "access (avg_nanosec_per_kmer)": "348.998577", "iterator (avg_nanosec_per_kmer)": "2.465760"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "869.186438", "negative lookup (avg_nanosec_per_kmer)": "829.114695", "access (avg_nanosec_per_kmer)": "350.341328", "iterator (avg_nanosec_per_kmer)": "2.489794"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "869.471396", "negative lookup (avg_nanosec_per_kmer)": "823.423849", "access (avg_nanosec_per_kmer)": "352.539411", "iterator (avg_nanosec_per_kmer)": "2.618378"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "849.979971", "negative lookup (avg_nanosec_per_kmer)": "739.378012", "access (avg_nanosec_per_kmer)": "356.244441", "iterator (avg_nanosec_per_kmer)": "2.433187"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "852.144884", "negative lookup (avg_nanosec_per_kmer)": "739.477228", "access (avg_nanosec_per_kmer)": "357.236152", "iterator (avg_nanosec_per_kmer)": "2.393705"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash", "k": "31", "m": "17", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "850.250251", "negative lookup (avg_nanosec_per_kmer)": "732.213271", "access (avg_nanosec_per_kmer)": "362.152994", "iterator (avg_nanosec_per_kmer)": "2.434135"} diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.log new file mode 100644 index 0000000..6b4e708 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 865.446 +negative lookup (avg_nanosec_per_kmer) 822.725 +access (avg_nanosec_per_kmer) = 348.999 +iterator (avg_nanosec_per_kmer) = 2.46576 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 869.186 +negative lookup (avg_nanosec_per_kmer) 829.115 +access (avg_nanosec_per_kmer) = 350.341 +iterator (avg_nanosec_per_kmer) = 2.48979 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 869.471 +negative lookup (avg_nanosec_per_kmer) 823.424 +access (avg_nanosec_per_kmer) = 352.539 +iterator (avg_nanosec_per_kmer) = 2.61838 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 849.98 +negative lookup (avg_nanosec_per_kmer) 739.378 +access (avg_nanosec_per_kmer) = 356.244 +iterator (avg_nanosec_per_kmer) = 2.43319 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 852.145 +negative lookup (avg_nanosec_per_kmer) 739.477 +access (avg_nanosec_per_kmer) = 357.236 +iterator (avg_nanosec_per_kmer) = 2.39371 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash +positive lookup (avg_nanosec_per_kmer) = 850.25 +negative lookup (avg_nanosec_per_kmer) 732.213 +access (avg_nanosec_per_kmer) = 362.153 +iterator (avg_nanosec_per_kmer) = 2.43413 diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.json new file mode 100644 index 0000000..f728d0c --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "17", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7266184", "step 2 (compute minimizer tuples)": "2539844", "step 3 (merging minimizer tuples)": "13541220", "step 4 (build mphf)": "11328813", "step 5 (replacing minimizer values with MPHF hashes)": "10821453", "step 6 (merging minimizers tuples)": "28433264", "step 7.1 (build sparse index)": "6837311", "step 7.2 (build skew index)": "12518934", "step 7 (build sparse and skew index)": "20190170", "total_build_time_in_microsec": "94120948", "index_size_in_bytes": "2446406244", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "17", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4596410", "step 2 (compute minimizer tuples)": "1178903", "step 3 (merging minimizer tuples)": "6623481", "step 4 (build mphf)": "4241201", "step 5 (replacing minimizer values with MPHF hashes)": "4118733", "step 6 (merging minimizers tuples)": "10074152", "step 7.1 (build sparse index)": "3004213", "step 7.2 (build skew index)": "6098873", "step 7 (build sparse and skew index)": "9460104", "total_build_time_in_microsec": "40292984", "index_size_in_bytes": "997475468", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.log new file mode 100644 index 0000000..bd173e6 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.log @@ -0,0 +1,283 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash +2026-03-12 18:20:10: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26618 [sec] (2.89989 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.16.bin'... +=== step 2 (compute minimizer tuples): 2.53984 [sec] (1.01364 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +=== step 3 (merging minimizer tuples): 13.5412 [sec] (5.40421 [ns/kmer]) +num_minimizers = 229995982 +num_minimizer_positions = 321049590 +num_super_kmers = 321049590 +building minimizers MPHF with 16 threads and 77 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 11.3288 [sec] (4.52126 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336010316242022.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 10.8215 [sec] (4.31877 [ns/kmer]) +=== step 6 (merging minimizers tuples): 28.4333 [sec] (11.3475 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 26196249 +bits_for_list_id = 25 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 38279650/229995982 (16.6436%) +num_buckets_in_skew_index 75540/229995982 (0.0328441%) +max_bucket_size 100390 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 113923345/321049590 (35.4847%) +num_minimizer_positions_of_buckets_in_skew_index 15485453/321049590 (4.82338%) +=== step 7.1 (build sparse index): 6.83731 [sec] (2.72873 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 27177565 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21867722 + partition = 2: num kmers in buckets of size > 256 and <= 512: 17054883 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 12552761 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 8091354 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 5403995 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 4427397 + partition = 7: num kmers in buckets of size > 8192 and <= 100390: 3423247 +num kmers in skew index = 99998924 (3.99089%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 27177565 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 27177565 kmers; bits/key = 2.62461 + built positions[0] for 27177565 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21867722 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21867722 kmers; bits/key = 2.61192 + built positions[1] for 21867722 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 17054883 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 17054883 kmers; bits/key = 2.52773 + built positions[2] for 17054883 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 12552761 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 12552761 kmers; bits/key = 2.67031 + built positions[3] for 12552761 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 8091354 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 8091354 kmers; bits/key = 2.64155 + built positions[4] for 8091354 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 5403995 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 5403995 kmers; bits/key = 2.63755 + built positions[5] for 5403995 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 4427397 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 4427397 kmers; bits/key = 2.74028 + built positions[6] for 4427397 kmers; bits/key = 13.0001 + lower = 8192; upper = 100390; num_bits_per_pos = 17; num_kmers_in_partition = 3423247 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 3423247 kmers; bits/key = 3.17162 + built positions[7] for 3423247 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 12.5189 [sec] (4.99622 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.3235% +buckets with 2 minimizer positions = 11.3899% +buckets with 3 minimizer positions = 2.76236% +buckets with 4 minimizer positions = 0.991411% +buckets with 5 minimizer positions = 0.467817% +buckets with 6 minimizer positions = 0.262065% +buckets with 7 minimizer positions = 0.164978% +buckets with 8 minimizer positions = 0.112558% +buckets with 9 minimizer positions = 0.0810058% +buckets with 10 minimizer positions = 0.0612402% +buckets with 11 minimizer positions = 0.047173% +buckets with 12 minimizer positions = 0.0379381% +buckets with 13 minimizer positions = 0.0308336% +buckets with 14 minimizer positions = 0.0252617% +buckets with 15 minimizer positions = 0.0213578% +buckets with 16 minimizer positions = 0.0183277% +max_bucket_size = 100390 +=== step 7 (build sparse and skew index): 20.1902 [sec] (8.05777 [ns/kmer]) +=== total time: 94.1209 [sec] (37.5631 [ns/kmer]) +total index size: 2446406244 [B] -- 2446.41 [MB] +SPACE BREAKDOWN: + mphf: 0.260475 [bits/kmer] (2.83773 [bits/key]) -- 3.33483% + strings_offsets: 0.153147 [bits/kmer] -- 1.96071% + control_codewords: 3.02907 [bits/kmer] -- 38.7807% + mid_load_buckets: 1.45491 [bits/kmer] -- 18.6271% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 1.09548e-05% + strings: 2.24545 [bits/kmer] -- 28.7482% + skew_index: 0.6677 [bits/kmer] -- 8.54847% + weights: 5.87466e-07 [bits/kmer] -- 7.52124e-06% + -------------- + total: 7.81076 [bits/kmer] +2026-03-12 18:21:44: saving data structure to disk... +2026-03-12 18:21:45: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash +2026-03-12 18:50:46: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.59641 [sec] (5.13962 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.1789 [sec] (1.31823 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.62348 [sec] (7.40625 [ns/kmer]) +num_minimizers = 81934994 +num_minimizer_positions = 126082693 +num_super_kmers = 126082693 +building minimizers MPHF with 16 threads and 28 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 4.2412 [sec] (4.74243 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337846214720804.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.11873 [sec] (4.60549 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.0742 [sec] (11.2647 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8141324 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14436329/81934994 (17.6192%) +num_buckets_in_skew_index 18183/81934994 (0.022192%) +max_bucket_size 51129 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 55222887/126082693 (43.7989%) +num_minimizer_positions_of_buckets_in_skew_index 3379324/126082693 (2.68024%) +=== step 7.1 (build sparse index): 3.00421 [sec] (3.35925 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 7351356 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3837170 + partition = 2: num kmers in buckets of size > 256 and <= 512: 2302094 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1645908 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1321087 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 872020 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 798930 + partition = 7: num kmers in buckets of size > 8192 and <= 51129: 1549328 +num kmers in skew index = 19677893 (2.20034%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 7351356 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 7351356 kmers; bits/key = 2.69062 + built positions[0] for 7351356 kmers; bits/key = 7.00005 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3837170 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3837170 kmers; bits/key = 2.98611 + built positions[1] for 3837170 kmers; bits/key = 8.0001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 2302094 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2302094)... + built mphs[2] for 2302094 kmers; bits/key = 2.56001 + built positions[2] for 2302094 kmers; bits/key = 9.00014 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1645908 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1645908)... + built mphs[3] for 1645908 kmers; bits/key = 2.56038 + built positions[3] for 1645908 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1321087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1321087)... + built mphs[4] for 1321087 kmers; bits/key = 2.56065 + built positions[4] for 1321087 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 872020 + building MPHF with 16 threads and 1 partitions (avg. partition size = 872020)... + built mphs[5] for 872020 kmers; bits/key = 2.41866 + built positions[5] for 872020 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 798930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 798930)... + built mphs[6] for 798930 kmers; bits/key = 2.41888 + built positions[6] for 798930 kmers; bits/key = 13.0004 + lower = 8192; upper = 51129; num_bits_per_pos = 16; num_kmers_in_partition = 1549328 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1549328)... + built mphs[7] for 1549328 kmers; bits/key = 2.56044 + built positions[7] for 1549328 kmers; bits/key = 16.0002 +=== step 7.2 (build skew index): 6.09887 [sec] (6.81964 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3586% +buckets with 2 minimizer positions = 9.93632% +buckets with 3 minimizer positions = 2.99716% +buckets with 4 minimizer positions = 1.35523% +buckets with 5 minimizer positions = 0.770106% +buckets with 6 minimizer positions = 0.50212% +buckets with 7 minimizer positions = 0.362181% +buckets with 8 minimizer positions = 0.277691% +buckets with 9 minimizer positions = 0.222655% +buckets with 10 minimizer positions = 0.183248% +buckets with 11 minimizer positions = 0.153295% +buckets with 12 minimizer positions = 0.131565% +buckets with 13 minimizer positions = 0.109942% +buckets with 14 minimizer positions = 0.093418% +buckets with 15 minimizer positions = 0.0785086% +buckets with 16 minimizer positions = 0.0654116% +max_bucket_size = 51129 +=== step 7 (build sparse and skew index): 9.4601 [sec] (10.5781 [ns/kmer]) +=== total time: 40.293 [sec] (45.0548 [ns/kmer]) +total index size: 997475468 [B] -- 997.475 [MB] +SPACE BREAKDOWN: + mphf: 0.263614 [bits/kmer] (2.87732 [bits/key]) -- 2.95437% + strings_offsets: 0.333373 [bits/kmer] -- 3.73616% + control_codewords: 2.93178 [bits/kmer] -- 32.857% + mid_load_buckets: 1.91422 [bits/kmer] -- 21.453% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.68678e-05% + strings: 3.10303 [bits/kmer] -- 34.7762% + skew_index: 0.376833 [bits/kmer] -- 4.22323% + weights: 1.64596e-06 [bits/kmer] -- 1.84466e-05% + -------------- + total: 8.92286 [bits/kmer] +2026-03-12 18:51:26: saving data structure to disk... +2026-03-12 18:51:26: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.time.log new file mode 100644 index 0000000..7fbf1cb --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m17/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m17.sshash" + User time (seconds): 217.90 + System time (seconds): 34.07 + Percent of CPU this job got: 264% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:35.44 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 12245088 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 11209343 + Voluntary context switches: 2136 + Involuntary context switches: 5505 + Swaps: 0 + File system inputs: 152 + File system outputs: 46778304 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 17 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m17.sshash" + User time (seconds): 62.53 + System time (seconds): 13.82 + Percent of CPU this job got: 186% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:40.86 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4979048 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4337216 + Voluntary context switches: 2360 + Involuntary context switches: 1631 + Swaps: 0 + File system inputs: 48 + File system outputs: 18150968 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.json new file mode 100644 index 0000000..1b91aef --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "594.637768", "negative lookup (avg_nanosec_per_kmer)": "422.895480", "access (avg_nanosec_per_kmer)": "348.524349", "iterator (avg_nanosec_per_kmer)": "2.488289"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "588.016307", "negative lookup (avg_nanosec_per_kmer)": "421.612600", "access (avg_nanosec_per_kmer)": "358.599252", "iterator (avg_nanosec_per_kmer)": "2.624688"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "592.770498", "negative lookup (avg_nanosec_per_kmer)": "422.790660", "access (avg_nanosec_per_kmer)": "355.415434", "iterator (avg_nanosec_per_kmer)": "2.490581"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "650.045102", "negative lookup (avg_nanosec_per_kmer)": "394.408862", "access (avg_nanosec_per_kmer)": "360.314070", "iterator (avg_nanosec_per_kmer)": "2.422974"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "652.682688", "negative lookup (avg_nanosec_per_kmer)": "396.787863", "access (avg_nanosec_per_kmer)": "362.199094", "iterator (avg_nanosec_per_kmer)": "2.420815"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "649.465051", "negative lookup (avg_nanosec_per_kmer)": "398.383698", "access (avg_nanosec_per_kmer)": "359.902460", "iterator (avg_nanosec_per_kmer)": "2.389854"} diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.log new file mode 100644 index 0000000..6feda72 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 594.638 +negative lookup (avg_nanosec_per_kmer) 422.895 +access (avg_nanosec_per_kmer) = 348.524 +iterator (avg_nanosec_per_kmer) = 2.48829 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 588.016 +negative lookup (avg_nanosec_per_kmer) 421.613 +access (avg_nanosec_per_kmer) = 358.599 +iterator (avg_nanosec_per_kmer) = 2.62469 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 592.77 +negative lookup (avg_nanosec_per_kmer) 422.791 +access (avg_nanosec_per_kmer) = 355.415 +iterator (avg_nanosec_per_kmer) = 2.49058 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 650.045 +negative lookup (avg_nanosec_per_kmer) 394.409 +access (avg_nanosec_per_kmer) = 360.314 +iterator (avg_nanosec_per_kmer) = 2.42297 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 652.683 +negative lookup (avg_nanosec_per_kmer) 396.788 +access (avg_nanosec_per_kmer) = 362.199 +iterator (avg_nanosec_per_kmer) = 2.42082 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 649.465 +negative lookup (avg_nanosec_per_kmer) 398.384 +access (avg_nanosec_per_kmer) = 359.902 +iterator (avg_nanosec_per_kmer) = 2.38985 diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.json new file mode 100644 index 0000000..2b75346 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7231908", "step 2 (compute minimizer tuples)": "4659013", "step 3 (merging minimizer tuples)": "17092647", "step 4 (build mphf)": "18165449", "step 5 (replacing minimizer values with MPHF hashes)": "16153448", "step 6 (merging minimizers tuples)": "46322991", "step 7.1 (build sparse index)": "6872319", "step 7.2 (build skew index)": "10962136", "step 7 (build sparse and skew index)": "18863049", "total_build_time_in_microsec": "128488505", "index_size_in_bytes": "2904690080", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4569437", "step 2 (compute minimizer tuples)": "1861996", "step 3 (merging minimizer tuples)": "9005507", "step 4 (build mphf)": "6009340", "step 5 (replacing minimizer values with MPHF hashes)": "5696065", "step 6 (merging minimizers tuples)": "14113353", "step 7.1 (build sparse index)": "3802522", "step 7.2 (build skew index)": "5316546", "step 7 (build sparse and skew index)": "9562252", "total_build_time_in_microsec": "50817950", "index_size_in_bytes": "1189890622", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.log new file mode 100644 index 0000000..0467af1 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.log @@ -0,0 +1,287 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash --canonical +2026-03-12 18:27:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23191 [sec] (2.88621 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.19.bin'... +=== step 2 (compute minimizer tuples): 4.65901 [sec] (1.85938 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 17.0926 [sec] (6.82156 [ns/kmer]) +num_minimizers = 376279399 +num_minimizer_positions = 442807337 +num_super_kmers = 461253862 +building minimizers MPHF with 16 threads and 126 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.1654 [sec] (7.24971 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336444276995979.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 16.1534 [sec] (6.44674 [ns/kmer]) +=== step 6 (merging minimizers tuples): 46.323 [sec] (18.4872 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 15712475 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 22668916/376279399 (6.02449%) +num_buckets_in_skew_index 78974/376279399 (0.0209881%) +max_bucket_size 61612 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 74028533/442807337 (16.718%) +num_minimizer_positions_of_buckets_in_skew_index 15247295/442807337 (3.44332%) +=== step 7.1 (build sparse index): 6.87232 [sec] (2.7427 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 21855138 + partition = 1: num kmers in buckets of size > 128 and <= 256: 17243618 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13207474 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 9277976 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 6294513 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3589985 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2385148 + partition = 7: num kmers in buckets of size > 8192 and <= 61612: 2335507 +num kmers in skew index = 76189359 (3.04067%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 21855138 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 21855138 kmers; bits/key = 2.61319 + built positions[0] for 21855138 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 17243618 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 17243618 kmers; bits/key = 2.52947 + built positions[1] for 17243618 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13207474 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13207474 kmers; bits/key = 2.65592 + built positions[2] for 13207474 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 9277976 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[3] for 9277976 kmers; bits/key = 2.81902 + built positions[3] for 9277976 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 6294513 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 6294513 kmers; bits/key = 2.86825 + built positions[4] for 6294513 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3589985 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3589985 kmers; bits/key = 3.04364 + built positions[5] for 3589985 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2385148 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2385148)... + built mphs[6] for 2385148 kmers; bits/key = 2.55996 + built positions[6] for 2385148 kmers; bits/key = 13.0002 + lower = 8192; upper = 61612; num_bits_per_pos = 16; num_kmers_in_partition = 2335507 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2335507)... + built mphs[7] for 2335507 kmers; bits/key = 2.55996 + built positions[7] for 2335507 kmers; bits/key = 16.0001 +=== step 7.2 (build skew index): 10.9621 [sec] (4.37492 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 93.9545% +buckets with 2 minimizer positions = 4.17575% +buckets with 3 minimizer positions = 0.830039% +buckets with 4 minimizer positions = 0.328906% +buckets with 5 minimizer positions = 0.175666% +buckets with 6 minimizer positions = 0.108966% +buckets with 7 minimizer positions = 0.0740423% +buckets with 8 minimizer positions = 0.0533869% +buckets with 9 minimizer positions = 0.0403368% +buckets with 10 minimizer positions = 0.0315795% +buckets with 11 minimizer positions = 0.0251946% +buckets with 12 minimizer positions = 0.0205908% +buckets with 13 minimizer positions = 0.0171476% +buckets with 14 minimizer positions = 0.0144015% +buckets with 15 minimizer positions = 0.0123725% +buckets with 16 minimizer positions = 0.0105863% +max_bucket_size = 61612 +=== step 7 (build sparse and skew index): 18.863 [sec] (7.52812 [ns/kmer]) +=== total time: 128.489 [sec] (51.2789 [ns/kmer]) +total index size: 2904690080 [B] -- 2904.69 [MB] +SPACE BREAKDOWN: + mphf: 0.425849 [bits/kmer] (2.83577 [bits/key]) -- 4.59189% + strings_offsets: 0.153147 [bits/kmer] -- 1.65136% + control_codewords: 4.95563 [bits/kmer] -- 53.4361% + mid_load_buckets: 0.945418 [bits/kmer] -- 10.1943% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.22646e-06% + strings: 2.24545 [bits/kmer] -- 24.2125% + skew_index: 0.548442 [bits/kmer] -- 5.9138% + weights: 5.87466e-07 [bits/kmer] -- 6.33458e-06% + -------------- + total: 9.27394 [bits/kmer] +2026-03-12 18:29:32: saving data structure to disk... +2026-03-12 18:29:34: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash --canonical +2026-03-12 18:54:30: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56944 [sec] (5.10945 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.862 [sec] (2.08205 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.00551 [sec] (10.0698 [ns/kmer]) +num_minimizers = 115806893 +num_minimizer_positions = 169436116 +num_super_kmers = 175718948 +building minimizers MPHF with 16 threads and 39 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.00934 [sec] (6.71953 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338070606780347.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.69606 [sec] (6.36923 [ns/kmer]) +=== step 6 (merging minimizers tuples): 14.1134 [sec] (15.7813 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 9886356 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 17334935/115806893 (14.9688%) +num_buckets_in_skew_index 22944/115806893 (0.0198123%) +max_bucket_size 42113 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 67159496/169436116 (39.6371%) +num_minimizer_positions_of_buckets_in_skew_index 3827606/169436116 (2.25903%) +=== step 7.1 (build sparse index): 3.80252 [sec] (4.25191 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 7625277 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3650580 + partition = 2: num kmers in buckets of size > 256 and <= 512: 2014467 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1391242 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 836644 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 788715 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 534650 + partition = 7: num kmers in buckets of size > 8192 and <= 42113: 1286331 +num kmers in skew index = 18127906 (2.02703%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 7625277 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 7625277 kmers; bits/key = 2.66515 + built positions[0] for 7625277 kmers; bits/key = 7.00004 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3650580 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3650580 kmers; bits/key = 3.11744 + built positions[1] for 3650580 kmers; bits/key = 8.0001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 2014467 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2014467)... + built mphs[2] for 2014467 kmers; bits/key = 2.56013 + built positions[2] for 2014467 kmers; bits/key = 9.00018 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1391242 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1391242)... + built mphs[3] for 1391242 kmers; bits/key = 2.56057 + built positions[3] for 1391242 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 836644 + building MPHF with 16 threads and 1 partitions (avg. partition size = 836644)... + built mphs[4] for 836644 kmers; bits/key = 2.56164 + built positions[4] for 836644 kmers; bits/key = 11.0004 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 788715 + building MPHF with 16 threads and 1 partitions (avg. partition size = 788715)... + built mphs[5] for 788715 kmers; bits/key = 2.56178 + built positions[5] for 788715 kmers; bits/key = 12.0005 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 534650 + building MPHF with 16 threads and 1 partitions (avg. partition size = 534650)... + built mphs[6] for 534650 kmers; bits/key = 2.42016 + built positions[6] for 534650 kmers; bits/key = 13.0006 + lower = 8192; upper = 42113; num_bits_per_pos = 16; num_kmers_in_partition = 1286331 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1286331)... + built mphs[7] for 1286331 kmers; bits/key = 2.41787 + built positions[7] for 1286331 kmers; bits/key = 16.0003 +=== step 7.2 (build skew index): 5.31655 [sec] (5.94486 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 85.0114% +buckets with 2 minimizer positions = 8.53693% +buckets with 3 minimizer positions = 2.58608% +buckets with 4 minimizer positions = 1.15077% +buckets with 5 minimizer positions = 0.639091% +buckets with 6 minimizer positions = 0.405366% +buckets with 7 minimizer positions = 0.280185% +buckets with 8 minimizer positions = 0.206562% +buckets with 9 minimizer positions = 0.16018% +buckets with 10 minimizer positions = 0.128228% +buckets with 11 minimizer positions = 0.106562% +buckets with 12 minimizer positions = 0.0899083% +buckets with 13 minimizer positions = 0.0774418% +buckets with 14 minimizer positions = 0.0678129% +buckets with 15 minimizer positions = 0.0595025% +buckets with 16 minimizer positions = 0.0528932% +max_bucket_size = 42113 +=== step 7 (build sparse and skew index): 9.56225 [sec] (10.6923 [ns/kmer]) +=== total time: 50.818 [sec] (56.8236 [ns/kmer]) +total index size: 1189890622 [B] -- 1189.89 [MB] +SPACE BREAKDOWN: + mphf: 0.368632 [bits/kmer] (2.84673 [bits/key]) -- 3.46325% + strings_offsets: 0.333373 [bits/kmer] -- 3.132% + control_codewords: 4.14378 [bits/kmer] -- 38.9303% + mid_load_buckets: 2.32799 [bits/kmer] -- 21.8712% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.25231e-05% + strings: 3.10303 [bits/kmer] -- 29.1526% + skew_index: 0.367292 [bits/kmer] -- 3.45066% + weights: 1.64596e-06 [bits/kmer] -- 1.54636e-05% + -------------- + total: 10.6441 [bits/kmer] +2026-03-12 18:55:21: saving data structure to disk... +2026-03-12 18:55:21: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.time.log new file mode 100644 index 0000000..7586f0d --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.canon.sshash --canonical" + User time (seconds): 310.97 + System time (seconds): 48.32 + Percent of CPU this job got: 276% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:10.10 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 17215068 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 14870740 + Voluntary context switches: 45550 + Involuntary context switches: 6553 + Swaps: 0 + File system inputs: 176 + File system outputs: 67637672 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.canon.sshash --canonical" + User time (seconds): 84.82 + System time (seconds): 17.91 + Percent of CPU this job got: 199% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:51.46 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6732784 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5743050 + Voluntary context switches: 2160 + Involuntary context switches: 1789 + Swaps: 0 + File system inputs: 72 + File system outputs: 24957392 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.json new file mode 100644 index 0000000..beea892 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "777.140051", "negative lookup (avg_nanosec_per_kmer)": "790.970038", "access (avg_nanosec_per_kmer)": "349.034472", "iterator (avg_nanosec_per_kmer)": "2.466624"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "777.265590", "negative lookup (avg_nanosec_per_kmer)": "795.483296", "access (avg_nanosec_per_kmer)": "349.471672", "iterator (avg_nanosec_per_kmer)": "2.466399"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "783.353549", "negative lookup (avg_nanosec_per_kmer)": "796.383034", "access (avg_nanosec_per_kmer)": "351.570930", "iterator (avg_nanosec_per_kmer)": "2.518450"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "797.477274", "negative lookup (avg_nanosec_per_kmer)": "735.214508", "access (avg_nanosec_per_kmer)": "365.810770", "iterator (avg_nanosec_per_kmer)": "2.424492"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "807.943499", "negative lookup (avg_nanosec_per_kmer)": "733.285880", "access (avg_nanosec_per_kmer)": "358.375192", "iterator (avg_nanosec_per_kmer)": "2.413100"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "804.108848", "negative lookup (avg_nanosec_per_kmer)": "734.853006", "access (avg_nanosec_per_kmer)": "359.812503", "iterator (avg_nanosec_per_kmer)": "2.474188"} diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.log new file mode 100644 index 0000000..2d620b7 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 777.14 +negative lookup (avg_nanosec_per_kmer) 790.97 +access (avg_nanosec_per_kmer) = 349.034 +iterator (avg_nanosec_per_kmer) = 2.46662 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 777.266 +negative lookup (avg_nanosec_per_kmer) 795.483 +access (avg_nanosec_per_kmer) = 349.472 +iterator (avg_nanosec_per_kmer) = 2.4664 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 783.354 +negative lookup (avg_nanosec_per_kmer) 796.383 +access (avg_nanosec_per_kmer) = 351.571 +iterator (avg_nanosec_per_kmer) = 2.51845 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 797.477 +negative lookup (avg_nanosec_per_kmer) 735.215 +access (avg_nanosec_per_kmer) = 365.811 +iterator (avg_nanosec_per_kmer) = 2.42449 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 807.943 +negative lookup (avg_nanosec_per_kmer) 733.286 +access (avg_nanosec_per_kmer) = 358.375 +iterator (avg_nanosec_per_kmer) = 2.4131 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash +positive lookup (avg_nanosec_per_kmer) = 804.109 +negative lookup (avg_nanosec_per_kmer) 734.853 +access (avg_nanosec_per_kmer) = 359.813 +iterator (avg_nanosec_per_kmer) = 2.47419 diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.json new file mode 100644 index 0000000..25a539f --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7227120", "step 2 (compute minimizer tuples)": "3052971", "step 3 (merging minimizer tuples)": "12796355", "step 4 (build mphf)": "15374991", "step 5 (replacing minimizer values with MPHF hashes)": "12810037", "step 6 (merging minimizers tuples)": "43808380", "step 7.1 (build sparse index)": "5015201", "step 7.2 (build skew index)": "9475723", "step 7 (build sparse and skew index)": "15300846", "total_build_time_in_microsec": "110370700", "index_size_in_bytes": "2513337668", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4576354", "step 2 (compute minimizer tuples)": "1247265", "step 3 (merging minimizer tuples)": "7209512", "step 4 (build mphf)": "5434096", "step 5 (replacing minimizer values with MPHF hashes)": "4634423", "step 6 (merging minimizers tuples)": "11309869", "step 7.1 (build sparse index)": "2952688", "step 7.2 (build skew index)": "4638495", "step 7 (build sparse and skew index)": "7942504", "total_build_time_in_microsec": "42354023", "index_size_in_bytes": "1053150108", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.log new file mode 100644 index 0000000..f9221ac --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.log @@ -0,0 +1,284 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash +2026-03-12 18:24:57: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22712 [sec] (2.8843 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.17.bin'... +=== step 2 (compute minimizer tuples): 3.05297 [sec] (1.21842 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +=== step 3 (merging minimizer tuples): 12.7964 [sec] (5.10694 [ns/kmer]) +num_minimizers = 317605945 +num_minimizer_positions = 365539499 +num_super_kmers = 365539499 +building minimizers MPHF with 16 threads and 106 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 15.375 [sec] (6.13606 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336297436262670.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 12.81 [sec] (5.1124 [ns/kmer]) +=== step 6 (merging minimizers tuples): 43.8084 [sec] (17.4836 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 10735908 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 15760473/317605945 (4.96227%) +num_buckets_in_skew_index 58330/317605945 (0.0183655%) +max_bucket_size 32155 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 53042003/365539499 (14.5106%) +num_minimizer_positions_of_buckets_in_skew_index 10710354/365539499 (2.93001%) +=== step 7.1 (build sparse index): 5.0152 [sec] (2.00153 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 18318072 + partition = 1: num kmers in buckets of size > 128 and <= 256: 14129011 + partition = 2: num kmers in buckets of size > 256 and <= 512: 10206828 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6857559 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4051506 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2862728 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 1977306 + partition = 7: num kmers in buckets of size > 8192 and <= 32155: 936034 +num kmers in skew index = 59339044 (2.36818%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 18318072 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[0] for 18318072 kmers; bits/key = 2.63957 + built positions[0] for 18318072 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 14129011 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[1] for 14129011 kmers; bits/key = 2.54019 + built positions[1] for 14129011 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 10206828 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[2] for 10206828 kmers; bits/key = 2.64235 + built positions[2] for 10206828 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6857559 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6857559 kmers; bits/key = 2.72943 + built positions[3] for 6857559 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4051506 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4051506 kmers; bits/key = 2.95595 + built positions[4] for 4051506 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2862728 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2862728)... + built mphs[5] for 2862728 kmers; bits/key = 2.55984 + built positions[5] for 2862728 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 1977306 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1977306)... + built mphs[6] for 1977306 kmers; bits/key = 2.56016 + built positions[6] for 1977306 kmers; bits/key = 13.0002 + lower = 8192; upper = 32155; num_bits_per_pos = 15; num_kmers_in_partition = 936034 + building MPHF with 16 threads and 1 partitions (avg. partition size = 936034)... + built mphs[7] for 936034 kmers; bits/key = 2.41851 + built positions[7] for 936034 kmers; bits/key = 15.0003 +=== step 7.2 (build skew index): 9.47572 [sec] (3.7817 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.0194% +buckets with 2 minimizer positions = 3.38026% +buckets with 3 minimizer positions = 0.679501% +buckets with 4 minimizer positions = 0.281692% +buckets with 5 minimizer positions = 0.154819% +buckets with 6 minimizer positions = 0.0975662% +buckets with 7 minimizer positions = 0.0671234% +buckets with 8 minimizer positions = 0.0487463% +buckets with 9 minimizer positions = 0.0367613% +buckets with 10 minimizer positions = 0.028792% +buckets with 11 minimizer positions = 0.0231025% +buckets with 12 minimizer positions = 0.0188753% +buckets with 13 minimizer positions = 0.0156219% +buckets with 14 minimizer positions = 0.0132274% +buckets with 15 minimizer positions = 0.0113949% +buckets with 16 minimizer positions = 0.00975391% +max_bucket_size = 32155 +=== step 7 (build sparse and skew index): 15.3008 [sec] (6.10647 [ns/kmer]) +=== total time: 110.371 [sec] (44.0482 [ns/kmer]) +total index size: 2513337668 [B] -- 2513.34 [MB] +SPACE BREAKDOWN: + mphf: 0.358618 [bits/kmer] (2.82923 [bits/key]) -- 4.46906% + strings_offsets: 0.153147 [bits/kmer] -- 1.9085% + control_codewords: 4.1829 [bits/kmer] -- 52.1269% + mid_load_buckets: 0.677399 [bits/kmer] -- 8.44169% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 1.06631e-05% + strings: 2.24545 [bits/kmer] -- 27.9826% + skew_index: 0.406938 [bits/kmer] -- 5.07122% + weights: 5.87466e-07 [bits/kmer] -- 7.32094e-06% + -------------- + total: 8.02445 [bits/kmer] +2026-03-12 18:26:47: saving data structure to disk... +2026-03-12 18:26:49: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash +2026-03-12 18:53:15: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.57635 [sec] (5.11719 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.24727 [sec] (1.39467 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.20951 [sec] (8.06153 [ns/kmer]) +num_minimizers = 102732110 +num_minimizer_positions = 141636222 +num_super_kmers = 141636222 +building minimizers MPHF with 16 threads and 35 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.4341 [sec] (6.0763 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337995005185294.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.63442 [sec] (5.18212 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.3099 [sec] (12.6465 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 7861886 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 13678608/102732110 (13.3148%) +num_buckets_in_skew_index 12419/102732110 (0.0120887%) +max_bucket_size 36153 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 50194340/141636222 (35.4389%) +num_minimizer_positions_of_buckets_in_skew_index 2400799/141636222 (1.69505%) +=== step 7.1 (build sparse index): 2.95269 [sec] (3.30164 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4287182 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2149393 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1411583 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1005341 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 686861 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 484582 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 658081 + partition = 7: num kmers in buckets of size > 8192 and <= 36153: 1039310 +num kmers in skew index = 11722333 (1.31077%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4287182 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4287182 kmers; bits/key = 2.81631 + built positions[0] for 4287182 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2149393 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2149393)... + built mphs[1] for 2149393 kmers; bits/key = 2.56007 + built positions[1] for 2149393 kmers; bits/key = 8.00017 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1411583 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1411583)... + built mphs[2] for 1411583 kmers; bits/key = 2.41771 + built positions[2] for 1411583 kmers; bits/key = 9.00023 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1005341 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1005341)... + built mphs[3] for 1005341 kmers; bits/key = 2.5612 + built positions[3] for 1005341 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 686861 + building MPHF with 16 threads and 1 partitions (avg. partition size = 686861)... + built mphs[4] for 686861 kmers; bits/key = 2.56217 + built positions[4] for 686861 kmers; bits/key = 11.0005 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 484582 + building MPHF with 16 threads and 1 partitions (avg. partition size = 484582)... + built mphs[5] for 484582 kmers; bits/key = 2.56336 + built positions[5] for 484582 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 658081 + building MPHF with 16 threads and 1 partitions (avg. partition size = 658081)... + built mphs[6] for 658081 kmers; bits/key = 2.56234 + built positions[6] for 658081 kmers; bits/key = 13.0005 + lower = 8192; upper = 36153; num_bits_per_pos = 16; num_kmers_in_partition = 1039310 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1039310)... + built mphs[7] for 1039310 kmers; bits/key = 2.41825 + built positions[7] for 1039310 kmers; bits/key = 16.0003 +=== step 7.2 (build skew index): 4.63849 [sec] (5.18667 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 86.6731% +buckets with 2 minimizer positions = 7.6528% +buckets with 3 minimizer positions = 2.21988% +buckets with 4 minimizer positions = 1.00354% +buckets with 5 minimizer positions = 0.580156% +buckets with 6 minimizer positions = 0.386814% +buckets with 7 minimizer positions = 0.282687% +buckets with 8 minimizer positions = 0.219164% +buckets with 9 minimizer positions = 0.176159% +buckets with 10 minimizer positions = 0.142793% +buckets with 11 minimizer positions = 0.11659% +buckets with 12 minimizer positions = 0.0952847% +buckets with 13 minimizer positions = 0.0772757% +buckets with 14 minimizer positions = 0.0620877% +buckets with 15 minimizer positions = 0.049522% +buckets with 16 minimizer positions = 0.0397033% +max_bucket_size = 36153 +=== step 7 (build sparse and skew index): 7.9425 [sec] (8.88115 [ns/kmer]) +=== total time: 42.354 [sec] (47.3594 [ns/kmer]) +total index size: 1053150108 [B] -- 1053.15 [MB] +SPACE BREAKDOWN: + mphf: 0.329663 [bits/kmer] (2.86981 [bits/key]) -- 3.49928% + strings_offsets: 0.333373 [bits/kmer] -- 3.53865% + control_codewords: 3.67594 [bits/kmer] -- 39.019% + mid_load_buckets: 1.73992 [bits/kmer] -- 18.4687% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.54475e-05% + strings: 3.10303 [bits/kmer] -- 32.9378% + skew_index: 0.238968 [bits/kmer] -- 2.53657% + weights: 1.64596e-06 [bits/kmer] -- 1.74714e-05% + -------------- + total: 9.42089 [bits/kmer] +2026-03-12 18:53:57: saving data structure to disk... +2026-03-12 18:53:57: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.time.log new file mode 100644 index 0000000..7d657a2 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m19/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m19.sshash" + User time (seconds): 236.93 + System time (seconds): 39.09 + Percent of CPU this job got: 246% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:51.75 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 13897396 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 12168626 + Voluntary context switches: 27271 + Involuntary context switches: 6731 + Swaps: 0 + File system inputs: 144 + File system outputs: 54698600 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m19.sshash" + User time (seconds): 68.30 + System time (seconds): 14.93 + Percent of CPU this job got: 193% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:42.92 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5526812 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4679840 + Voluntary context switches: 2133 + Involuntary context switches: 1684 + Swaps: 0 + File system inputs: 56 + File system outputs: 20636448 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.json new file mode 100644 index 0000000..976a6ec --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "551.128859", "negative lookup (avg_nanosec_per_kmer)": "418.789076", "access (avg_nanosec_per_kmer)": "354.496322", "iterator (avg_nanosec_per_kmer)": "2.474745"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "556.774630", "negative lookup (avg_nanosec_per_kmer)": "418.498309", "access (avg_nanosec_per_kmer)": "352.833716", "iterator (avg_nanosec_per_kmer)": "2.504248"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "559.606403", "negative lookup (avg_nanosec_per_kmer)": "423.032092", "access (avg_nanosec_per_kmer)": "351.742066", "iterator (avg_nanosec_per_kmer)": "2.499182"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "611.813443", "negative lookup (avg_nanosec_per_kmer)": "394.497997", "access (avg_nanosec_per_kmer)": "355.583848", "iterator (avg_nanosec_per_kmer)": "2.437596"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "611.361809", "negative lookup (avg_nanosec_per_kmer)": "396.069068", "access (avg_nanosec_per_kmer)": "369.328967", "iterator (avg_nanosec_per_kmer)": "2.401815"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "613.041295", "negative lookup (avg_nanosec_per_kmer)": "397.495810", "access (avg_nanosec_per_kmer)": "357.644905", "iterator (avg_nanosec_per_kmer)": "2.401856"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.log new file mode 100644 index 0000000..76f3105 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 551.129 +negative lookup (avg_nanosec_per_kmer) 418.789 +access (avg_nanosec_per_kmer) = 354.496 +iterator (avg_nanosec_per_kmer) = 2.47474 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 556.775 +negative lookup (avg_nanosec_per_kmer) 418.498 +access (avg_nanosec_per_kmer) = 352.834 +iterator (avg_nanosec_per_kmer) = 2.50425 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 559.606 +negative lookup (avg_nanosec_per_kmer) 423.032 +access (avg_nanosec_per_kmer) = 351.742 +iterator (avg_nanosec_per_kmer) = 2.49918 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 611.813 +negative lookup (avg_nanosec_per_kmer) 394.498 +access (avg_nanosec_per_kmer) = 355.584 +iterator (avg_nanosec_per_kmer) = 2.4376 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 611.362 +negative lookup (avg_nanosec_per_kmer) 396.069 +access (avg_nanosec_per_kmer) = 369.329 +iterator (avg_nanosec_per_kmer) = 2.40182 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 613.041 +negative lookup (avg_nanosec_per_kmer) 397.496 +access (avg_nanosec_per_kmer) = 357.645 +iterator (avg_nanosec_per_kmer) = 2.40186 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.json new file mode 100644 index 0000000..3eeeaa0 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7252785", "step 2 (compute minimizer tuples)": "5006399", "step 3 (merging minimizer tuples)": "20215350", "step 4 (build mphf)": "22850764", "step 5 (replacing minimizer values with MPHF hashes)": "18923473", "step 6 (merging minimizers tuples)": "72217212", "step 7.1 (build sparse index)": "6563845", "step 7.2 (build skew index)": "8799292", "step 7 (build sparse and skew index)": "16561668", "total_build_time_in_microsec": "163027651", "index_size_in_bytes": "3135788878", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4570196", "step 2 (compute minimizer tuples)": "2017657", "step 3 (merging minimizer tuples)": "10119591", "step 4 (build mphf)": "7029738", "step 5 (replacing minimizer values with MPHF hashes)": "6626365", "step 6 (merging minimizers tuples)": "23310083", "step 7.1 (build sparse index)": "4053517", "step 7.2 (build skew index)": "3747703", "step 7 (build sparse and skew index)": "8299154", "total_build_time_in_microsec": "61972784", "index_size_in_bytes": "1287008500", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.log new file mode 100644 index 0000000..53dbb82 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.log @@ -0,0 +1,301 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash --canonical +2026-03-12 18:32:45: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.25279 [sec] (2.89454 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.24.bin'... +=== step 2 (compute minimizer tuples): 5.0064 [sec] (1.99802 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 20.2154 [sec] (8.06781 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 16 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 22.8508 [sec] (9.11959 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.9235 [sec] (7.55223 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336765177462457.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 72.2172 [sec] (28.8214 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14266506/462224926 (3.08649%) +num_buckets_in_skew_index 60557/462224926 (0.0131012%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 53165974/511201278 (10.4002%) +num_minimizer_positions_of_buckets_in_skew_index 10137441/511201278 (1.98306%) +=== step 7.1 (build sparse index): 6.56384 [sec] (2.61959 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 2: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 7: num kmers in buckets of size > 8192 and <= 22085: 506148 +num kmers in skew index = 43578595 (1.73919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 14957205 kmers; bits/key = 2.56583 + built positions[0] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10906495 kmers; bits/key = 2.61744 + built positions[1] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 7473094 kmers; bits/key = 2.65359 + built positions[2] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 4774535 kmers; bits/key = 2.75085 + built positions[3] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[4] for 2638087 kmers; bits/key = 2.55989 + built positions[4] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[5] for 1593261 kmers; bits/key = 2.56041 + built positions[5] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 729770)... + built mphs[6] for 729770 kmers; bits/key = 2.56195 + built positions[6] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 506148 + building MPHF with 16 threads and 1 partitions (avg. partition size = 506148)... + built mphs[7] for 506148 kmers; bits/key = 2.42048 + built positions[7] for 506148 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 8.79929 [sec] (3.51174 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 16.5617 [sec] (6.60965 [ns/kmer]) +=== total time: 163.028 [sec] (65.0633 [ns/kmer]) +total index size: 3135788878 [B] -- 3135.79 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.2262% + strings_offsets: 0.153147 [bits/kmer] -- 1.52966% + control_codewords: 6.08754 [bits/kmer] -- 60.8038% + mid_load_buckets: 0.678982 [bits/kmer] -- 6.78183% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 8.54649e-06% + strings: 2.24545 [bits/kmer] -- 22.4281% + skew_index: 0.323422 [bits/kmer] -- 3.23041% + weights: 5.87466e-07 [bits/kmer] -- 5.86774e-06% + -------------- + total: 10.0118 [bits/kmer] +2026-03-12 18:35:28: saving data structure to disk... +2026-03-12 18:35:29: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash --canonical +2026-03-12 18:57:06: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.5702 [sec] (5.1103 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.01766 [sec] (2.2561 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.1196 [sec] (11.3155 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 16 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.02974 [sec] (7.86052 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338226158264892.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.62636 [sec] (7.40947 [ns/kmer]) +=== step 6 (merging minimizers tuples): 23.3101 [sec] (26.0649 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 18123968/143418843 (12.6371%) +num_buckets_in_skew_index 15172/143418843 (0.0105788%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 65664193/193511241 (33.933%) +num_minimizer_positions_of_buckets_in_skew_index 2567345/193511241 (1.32672%) +=== step 7.1 (build sparse index): 4.05352 [sec] (4.53256 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 7: num kmers in buckets of size > 8192 and <= 30655: 459571 +num kmers in skew index = 9919638 (1.10919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4240400 kmers; bits/key = 2.84282 + built positions[0] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[1] for 1914946 kmers; bits/key = 2.56017 + built positions[1] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[2] for 1106165 kmers; bits/key = 2.56093 + built positions[2] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 16 threads and 1 partitions (avg. partition size = 771672)... + built mphs[3] for 771672 kmers; bits/key = 2.41893 + built positions[3] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 16 threads and 1 partitions (avg. partition size = 562721)... + built mphs[4] for 562721 kmers; bits/key = 2.56295 + built positions[4] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 16 threads and 1 partitions (avg. partition size = 475654)... + built mphs[5] for 475654 kmers; bits/key = 2.56365 + built positions[5] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388509)... + built mphs[6] for 388509 kmers; bits/key = 2.42185 + built positions[6] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 459571 + building MPHF with 16 threads and 1 partitions (avg. partition size = 459571)... + built mphs[7] for 459571 kmers; bits/key = 2.4208 + built positions[7] for 459571 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 3.7477 [sec] (4.19061 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 8.29915 [sec] (9.27995 [ns/kmer]) +=== total time: 61.9728 [sec] (69.2968 [ns/kmer]) +total index size: 1287008500 [B] -- 1287.01 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.93723% + strings_offsets: 0.333373 [bits/kmer] -- 2.89565% + control_codewords: 5.13178 [bits/kmer] -- 44.5743% + mid_load_buckets: 2.27616 [bits/kmer] -- 19.7706% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.08235e-05% + strings: 3.10303 [bits/kmer] -- 26.9527% + skew_index: 0.215227 [bits/kmer] -- 1.86944% + weights: 1.64596e-06 [bits/kmer] -- 1.42967e-05% + -------------- + total: 11.5129 [bits/kmer] +2026-03-12 18:58:08: saving data structure to disk... +2026-03-12 18:58:08: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.time.log new file mode 100644 index 0000000..031e98e --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.canon.sshash --canonical" + User time (seconds): 368.43 + System time (seconds): 59.41 + Percent of CPU this job got: 259% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:44.74 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18326948 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16196904 + Voluntary context switches: 42649 + Involuntary context switches: 24771 + Swaps: 0 + File system inputs: 192 + File system outputs: 97182608 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.canon.sshash --canonical" + User time (seconds): 96.80 + System time (seconds): 20.18 + Percent of CPU this job got: 186% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:02.65 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7641728 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6436612 + Voluntary context switches: 2861 + Involuntary context switches: 2121 + Swaps: 0 + File system inputs: 88 + File system outputs: 28721344 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.json new file mode 100644 index 0000000..c8e48b3 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "753.365088", "negative lookup (avg_nanosec_per_kmer)": "795.381957", "access (avg_nanosec_per_kmer)": "350.217917", "iterator (avg_nanosec_per_kmer)": "2.553912"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "749.784395", "negative lookup (avg_nanosec_per_kmer)": "784.864902", "access (avg_nanosec_per_kmer)": "348.683511", "iterator (avg_nanosec_per_kmer)": "2.483274"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "754.121198", "negative lookup (avg_nanosec_per_kmer)": "792.784862", "access (avg_nanosec_per_kmer)": "351.856995", "iterator (avg_nanosec_per_kmer)": "2.466763"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "780.019256", "negative lookup (avg_nanosec_per_kmer)": "739.267550", "access (avg_nanosec_per_kmer)": "356.777157", "iterator (avg_nanosec_per_kmer)": "2.477555"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "779.711311", "negative lookup (avg_nanosec_per_kmer)": "745.234314", "access (avg_nanosec_per_kmer)": "361.893511", "iterator (avg_nanosec_per_kmer)": "2.409225"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "780.966669", "negative lookup (avg_nanosec_per_kmer)": "743.829468", "access (avg_nanosec_per_kmer)": "366.770538", "iterator (avg_nanosec_per_kmer)": "2.400553"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.log new file mode 100644 index 0000000..e2be067 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 753.365 +negative lookup (avg_nanosec_per_kmer) 795.382 +access (avg_nanosec_per_kmer) = 350.218 +iterator (avg_nanosec_per_kmer) = 2.55391 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 749.784 +negative lookup (avg_nanosec_per_kmer) 784.865 +access (avg_nanosec_per_kmer) = 348.684 +iterator (avg_nanosec_per_kmer) = 2.48327 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 754.121 +negative lookup (avg_nanosec_per_kmer) 792.785 +access (avg_nanosec_per_kmer) = 351.857 +iterator (avg_nanosec_per_kmer) = 2.46676 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 780.019 +negative lookup (avg_nanosec_per_kmer) 739.268 +access (avg_nanosec_per_kmer) = 356.777 +iterator (avg_nanosec_per_kmer) = 2.47755 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 779.711 +negative lookup (avg_nanosec_per_kmer) 745.234 +access (avg_nanosec_per_kmer) = 361.894 +iterator (avg_nanosec_per_kmer) = 2.40923 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 780.967 +negative lookup (avg_nanosec_per_kmer) 743.829 +access (avg_nanosec_per_kmer) = 366.771 +iterator (avg_nanosec_per_kmer) = 2.40055 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.json new file mode 100644 index 0000000..dadc3b6 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7266521", "step 2 (compute minimizer tuples)": "3486098", "step 3 (merging minimizer tuples)": "14959750", "step 4 (build mphf)": "18866776", "step 5 (replacing minimizer values with MPHF hashes)": "15105642", "step 6 (merging minimizers tuples)": "54692826", "step 7.1 (build sparse index)": "4947797", "step 7.2 (build skew index)": "6915067", "step 7 (build sparse and skew index)": "12760128", "total_build_time_in_microsec": "127137741", "index_size_in_bytes": "2716053224", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4565477", "step 2 (compute minimizer tuples)": "1378903", "step 3 (merging minimizer tuples)": "8068742", "step 4 (build mphf)": "6235786", "step 5 (replacing minimizer values with MPHF hashes)": "5423750", "step 6 (merging minimizers tuples)": "12841911", "step 7.1 (build sparse index)": "3122647", "step 7.2 (build skew index)": "3522428", "step 7 (build sparse and skew index)": "7035345", "total_build_time_in_microsec": "45549914", "index_size_in_bytes": "1137030140", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.log new file mode 100644 index 0000000..4abd5fe --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.log @@ -0,0 +1,286 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash +2026-03-12 18:30:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26652 [sec] (2.90002 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.18.bin'... +=== step 2 (compute minimizer tuples): 3.4861 [sec] (1.39128 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 14.9597 [sec] (5.97034 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 16 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.8668 [sec] (7.52961 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336601702852795.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.1056 [sec] (6.02856 [ns/kmer]) +=== step 6 (merging minimizers tuples): 54.6928 [sec] (21.8275 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10816752/386687326 (2.79729%) +num_buckets_in_skew_index 42372/386687326 (0.0109577%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 40422973/423023926 (9.55572%) +num_minimizer_positions_of_buckets_in_skew_index 6772751/423023926 (1.60103%) +=== step 7.1 (build sparse index): 4.9478 [sec] (1.97463 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 1: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 2: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 7: num kmers in buckets of size > 8192 and <= 22972: 207510 +num kmers in skew index = 32063746 (1.27964%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 11807213 kmers; bits/key = 2.55841 + built positions[0] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 8389556 kmers; bits/key = 2.61352 + built positions[1] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 5343660 kmers; bits/key = 2.66261 + built positions[2] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3076413 kmers; bits/key = 3.34298 + built positions[3] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[4] for 1855446 kmers; bits/key = 2.56022 + built positions[4] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[5] for 1008178 kmers; bits/key = 2.41829 + built positions[5] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 16 threads and 1 partitions (avg. partition size = 375770)... + built mphs[6] for 375770 kmers; bits/key = 2.42182 + built positions[6] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 207510 + building MPHF with 16 threads and 1 partitions (avg. partition size = 207510)... + built mphs[7] for 207510 kmers; bits/key = 2.42618 + built positions[7] for 207510 kmers; bits/key = 15.0018 +=== step 7.2 (build skew index): 6.91507 [sec] (2.75976 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 12.7601 [sec] (5.09248 [ns/kmer]) +=== total time: 127.138 [sec] (50.7398 [ns/kmer]) +total index size: 2716053224 [B] -- 2716.05 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.03015% + strings_offsets: 0.153147 [bits/kmer] -- 1.76606% + control_codewords: 5.0927 [bits/kmer] -- 58.7281% + mid_load_buckets: 0.516242 [bits/kmer] -- 5.95319% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.86726e-06% + strings: 2.24545 [bits/kmer] -- 25.8941% + skew_index: 0.227926 [bits/kmer] -- 2.6284% + weights: 5.87466e-07 [bits/kmer] -- 6.77454e-06% + -------------- + total: 8.67167 [bits/kmer] +2026-03-12 18:32:08: saving data structure to disk... +2026-03-12 18:32:10: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash +2026-03-12 18:55:47: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56548 [sec] (5.10503 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.3789 [sec] (1.54186 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.06874 [sec] (9.02231 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.23579 [sec] (6.97273 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338147486625816.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.42375 [sec] (6.06473 [ns/kmer]) +=== step 6 (merging minimizers tuples): 12.8419 [sec] (14.3596 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14059268/126246665 (11.1363%) +num_buckets_in_skew_index 8266/126246665 (0.0065475%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48164669/162006751 (29.73%) +num_minimizer_positions_of_buckets_in_skew_index 1662951/162006751 (1.02647%) +=== step 7.1 (build sparse index): 3.12265 [sec] (3.49168 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 2: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 7: num kmers in buckets of size > 8192 and <= 36894: 388502 +num kmers in skew index = 6466768 (0.723101%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[0] for 2254325 kmers; bits/key = 2.56001 + built positions[0] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[1] for 1183762 kmers; bits/key = 2.56081 + built positions[1] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 16 threads and 1 partitions (avg. partition size = 885561)... + built mphs[2] for 885561 kmers; bits/key = 2.56147 + built positions[2] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 16 threads and 1 partitions (avg. partition size = 591648)... + built mphs[3] for 591648 kmers; bits/key = 2.56263 + built positions[3] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 16 threads and 1 partitions (avg. partition size = 450833)... + built mphs[4] for 450833 kmers; bits/key = 2.42098 + built positions[4] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 16 threads and 1 partitions (avg. partition size = 373731)... + built mphs[5] for 373731 kmers; bits/key = 2.56484 + built positions[5] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 16 threads and 1 partitions (avg. partition size = 338406)... + built mphs[6] for 338406 kmers; bits/key = 2.4226 + built positions[6] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 388502 + building MPHF with 16 threads and 1 partitions (avg. partition size = 388502)... + built mphs[7] for 388502 kmers; bits/key = 2.56472 + built positions[7] for 388502 kmers; bits/key = 16.0009 +=== step 7.2 (build skew index): 3.52243 [sec] (3.93871 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 7.03535 [sec] (7.86678 [ns/kmer]) +=== total time: 45.5499 [sec] (50.933 [ns/kmer]) +total index size: 1137030140 [B] -- 1137.03 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.98871% + strings_offsets: 0.333373 [bits/kmer] -- 3.2776% + control_codewords: 4.51733 [bits/kmer] -- 44.4128% + mid_load_buckets: 1.66956 [bits/kmer] -- 16.4145% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.35702e-05% + strings: 3.10303 [bits/kmer] -- 30.5079% + skew_index: 0.142237 [bits/kmer] -- 1.39842% + weights: 1.64596e-06 [bits/kmer] -- 1.61825e-05% + -------------- + total: 10.1712 [bits/kmer] +2026-03-12 18:56:33: saving data structure to disk... +2026-03-12 18:56:33: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.time.log new file mode 100644 index 0000000..0452654 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m21.sshash" + User time (seconds): 273.32 + System time (seconds): 44.92 + Percent of CPU this job got: 247% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:08.57 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16076696 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13585214 + Voluntary context switches: 36543 + Involuntary context switches: 8458 + Swaps: 0 + File system inputs: 128 + File system outputs: 63600688 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m21.sshash" + User time (seconds): 77.72 + System time (seconds): 17.10 + Percent of CPU this job got: 205% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:46.14 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6417584 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5329294 + Voluntary context switches: 2339 + Involuntary context switches: 1716 + Swaps: 0 + File system inputs: 48 + File system outputs: 23782096 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.json new file mode 100644 index 0000000..ece1751 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.json @@ -0,0 +1,3 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash", "k": "63", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "674.457950", "negative lookup (avg_nanosec_per_kmer)": "518.797740", "access (avg_nanosec_per_kmer)": "357.381628", "iterator (avg_nanosec_per_kmer)": "2.722747"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash", "k": "63", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "690.009828", "negative lookup (avg_nanosec_per_kmer)": "515.939769", "access (avg_nanosec_per_kmer)": "354.894255", "iterator (avg_nanosec_per_kmer)": "2.722457"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash", "k": "63", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "674.425657", "negative lookup (avg_nanosec_per_kmer)": "516.152959", "access (avg_nanosec_per_kmer)": "360.085246", "iterator (avg_nanosec_per_kmer)": "2.721920"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.log new file mode 100644 index 0000000..0112f0e --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-bench.log @@ -0,0 +1,15 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 674.458 +negative lookup (avg_nanosec_per_kmer) 518.798 +access (avg_nanosec_per_kmer) = 357.382 +iterator (avg_nanosec_per_kmer) = 2.72275 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 690.01 +negative lookup (avg_nanosec_per_kmer) 515.94 +access (avg_nanosec_per_kmer) = 354.894 +iterator (avg_nanosec_per_kmer) = 2.72246 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 674.426 +negative lookup (avg_nanosec_per_kmer) 516.153 +access (avg_nanosec_per_kmer) = 360.085 +iterator (avg_nanosec_per_kmer) = 2.72192 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.json new file mode 100644 index 0000000..108ee74 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.json @@ -0,0 +1 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "21", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6815431", "step 2 (compute minimizer tuples)": "3405167", "step 3 (merging minimizer tuples)": "7572185", "step 4 (build mphf)": "6330933", "step 5 (replacing minimizer values with MPHF hashes)": "5525068", "step 6 (merging minimizers tuples)": "16635932", "step 7.1 (build sparse index)": "2191097", "step 7.2 (build skew index)": "29500327", "step 7 (build sparse and skew index)": "32072304", "total_build_time_in_microsec": "78357020", "index_size_in_bytes": "1872801034", "num_kmers": "2771316093"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.log new file mode 100644 index 0000000..3aa47a8 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.log @@ -0,0 +1,129 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash --canonical +2026-03-12 19:07:07: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81543 [sec] (2.45928 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.40517 [sec] (1.22872 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.57219 [sec] (2.73234 [ns/kmer]) +num_minimizers = 129507446 +num_minimizer_positions = 157059644 +num_super_kmers = 165382382 +building minimizers MPHF with 16 threads and 44 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.33093 [sec] (2.28445 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338827510174728.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.52507 [sec] (1.99366 [ns/kmer]) +=== step 6 (merging minimizers tuples): 16.6359 [sec] (6.0029 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2886574 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4754115/129507446 (3.67092%) +num_buckets_in_skew_index 41370/129507446 (0.0319441%) +max_bucket_size 110689 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 18643203/157059644 (11.8701%) +num_minimizer_positions_of_buckets_in_skew_index 13704480/157059644 (8.72565%) +=== step 7.1 (build sparse index): 2.1911 [sec] (0.790634 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 33994966 + partition = 1: num kmers in buckets of size > 128 and <= 256: 31223219 + partition = 2: num kmers in buckets of size > 256 and <= 512: 29108949 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 26414088 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 24581219 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 22774080 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 20192217 + partition = 7: num kmers in buckets of size > 8192 and <= 110689: 41776846 +num kmers in skew index = 230065584 (8.30167%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 33994966 + building MPHF with 16 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[0] for 33994966 kmers; bits/key = 2.57265 + built positions[0] for 33994966 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 31223219 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[1] for 31223219 kmers; bits/key = 2.53069 + built positions[1] for 31223219 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 29108949 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[2] for 29108949 kmers; bits/key = 2.56642 + built positions[2] for 29108949 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 26414088 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[3] for 26414088 kmers; bits/key = 2.54233 + built positions[3] for 26414088 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 24581219 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[4] for 24581219 kmers; bits/key = 2.61371 + built positions[4] for 24581219 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 22774080 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[5] for 22774080 kmers; bits/key = 2.54336 + built positions[5] for 22774080 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 20192217 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[6] for 20192217 kmers; bits/key = 2.56054 + built positions[6] for 20192217 kmers; bits/key = 13 + lower = 8192; upper = 110689; num_bits_per_pos = 17; num_kmers_in_partition = 41776846 + building MPHF with 16 threads and 14 partitions (avg. partition size = 3000000)... + built mphs[7] for 41776846 kmers; bits/key = 2.57114 + built positions[7] for 41776846 kmers; bits/key = 17 +=== step 7.2 (build skew index): 29.5003 [sec] (10.6449 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.2971% +buckets with 2 minimizer positions = 2.22889% +buckets with 3 minimizer positions = 0.571317% +buckets with 4 minimizer positions = 0.254619% +buckets with 5 minimizer positions = 0.14466% +buckets with 6 minimizer positions = 0.0925453% +buckets with 7 minimizer positions = 0.063984% +buckets with 8 minimizer positions = 0.0473517% +buckets with 9 minimizer positions = 0.0361894% +buckets with 10 minimizer positions = 0.0281482% +buckets with 11 minimizer positions = 0.0230581% +buckets with 12 minimizer positions = 0.0188568% +buckets with 13 minimizer positions = 0.0158169% +buckets with 14 minimizer positions = 0.0135251% +buckets with 15 minimizer positions = 0.0117931% +buckets with 16 minimizer positions = 0.010106% +max_bucket_size = 110689 +=== step 7 (build sparse and skew index): 32.0723 [sec] (11.573 [ns/kmer]) +=== total time: 78.357 [sec] (28.2743 [ns/kmer]) +total index size: 1872801034 [B] -- 1872.8 [MB] +SPACE BREAKDOWN: + mphf: 0.134009 [bits/kmer] (2.86765 [bits/key]) -- 2.47879% + strings_offsets: 0.11255 [bits/kmer] -- 2.08186% + control_codewords: 1.54214 [bits/kmer] -- 28.5251% + mid_load_buckets: 0.215271 [bits/kmer] -- 3.98189% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.43101e-05% + strings: 2.11826 [bits/kmer] -- 39.1816% + skew_index: 1.28402 [bits/kmer] -- 23.7507% + weights: 5.31156e-07 [bits/kmer] -- 9.82486e-06% + -------------- + total: 5.40624 [bits/kmer] +2026-03-12 19:08:25: saving data structure to disk... +2026-03-12 19:08:26: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.time.log new file mode 100644 index 0000000..775b5f7 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/canon-build.time.log @@ -0,0 +1,23 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.canon.sshash --canonical" + User time (seconds): 224.79 + System time (seconds): 22.76 + Percent of CPU this job got: 311% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:19.39 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6816188 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8896898 + Voluntary context switches: 2531 + Involuntary context switches: 3059 + Swaps: 0 + File system inputs: 120 + File system outputs: 25689728 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.json new file mode 100644 index 0000000..0ce81e1 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.json @@ -0,0 +1,3 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash", "k": "63", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "860.602257", "negative lookup (avg_nanosec_per_kmer)": "868.903949", "access (avg_nanosec_per_kmer)": "357.791801", "iterator (avg_nanosec_per_kmer)": "2.719885"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash", "k": "63", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "862.913410", "negative lookup (avg_nanosec_per_kmer)": "870.923787", "access (avg_nanosec_per_kmer)": "359.358708", "iterator (avg_nanosec_per_kmer)": "2.715509"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash", "k": "63", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "851.101473", "negative lookup (avg_nanosec_per_kmer)": "862.228716", "access (avg_nanosec_per_kmer)": "355.765814", "iterator (avg_nanosec_per_kmer)": "2.863297"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.log new file mode 100644 index 0000000..94ccd0f --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-bench.log @@ -0,0 +1,15 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 860.602 +negative lookup (avg_nanosec_per_kmer) 868.904 +access (avg_nanosec_per_kmer) = 357.792 +iterator (avg_nanosec_per_kmer) = 2.71989 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 862.913 +negative lookup (avg_nanosec_per_kmer) 870.924 +access (avg_nanosec_per_kmer) = 359.359 +iterator (avg_nanosec_per_kmer) = 2.71551 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash +positive lookup (avg_nanosec_per_kmer) = 851.101 +negative lookup (avg_nanosec_per_kmer) 862.229 +access (avg_nanosec_per_kmer) = 355.766 +iterator (avg_nanosec_per_kmer) = 2.8633 diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.json new file mode 100644 index 0000000..3498d9c --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.json @@ -0,0 +1 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "21", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6850314", "step 2 (compute minimizer tuples)": "1987350", "step 3 (merging minimizer tuples)": "5723893", "step 4 (build mphf)": "5477694", "step 5 (replacing minimizer values with MPHF hashes)": "4215445", "step 6 (merging minimizers tuples)": "10240536", "step 7.1 (build sparse index)": "1600471", "step 7.2 (build skew index)": "24549211", "step 7 (build sparse and skew index)": "26429200", "total_build_time_in_microsec": "60924432", "index_size_in_bytes": "1688062682", "num_kmers": "2771316093"} diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.log new file mode 100644 index 0000000..9500fc3 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.log @@ -0,0 +1,129 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash +2026-03-12 19:05:28: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.85031 [sec] (2.47186 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.98735 [sec] (0.717114 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.72389 [sec] (2.06541 [ns/kmer]) +num_minimizers = 106331239 +num_minimizer_positions = 127331421 +num_super_kmers = 127331421 +building minimizers MPHF with 16 threads and 36 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.47769 [sec] (1.97657 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338728756718347.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.21544 [sec] (1.5211 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.2405 [sec] (3.69519 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2104735 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3521982/106331239 (3.31227%) +num_buckets_in_skew_index 32333/106331239 (0.0304078%) +max_bucket_size 77996 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 14180976/127331421 (11.1371%) +num_minimizer_positions_of_buckets_in_skew_index 10373521/127331421 (8.14687%) +=== step 7.1 (build sparse index): 1.60047 [sec] (0.577513 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 31571106 + partition = 1: num kmers in buckets of size > 128 and <= 256: 29375504 + partition = 2: num kmers in buckets of size > 256 and <= 512: 27071907 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 25512138 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 24326601 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 21469394 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 13563325 + partition = 7: num kmers in buckets of size > 8192 and <= 77996: 31736500 +num kmers in skew index = 204626475 (7.38373%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 31571106 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 31571106 kmers; bits/key = 2.52096 + built positions[0] for 31571106 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 29375504 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[1] for 29375504 kmers; bits/key = 2.54691 + built positions[1] for 29375504 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 27071907 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[2] for 27071907 kmers; bits/key = 2.63322 + built positions[2] for 27071907 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 25512138 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[3] for 25512138 kmers; bits/key = 2.53352 + built positions[3] for 25512138 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 24326601 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[4] for 24326601 kmers; bits/key = 2.63671 + built positions[4] for 24326601 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 21469394 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[5] for 21469394 kmers; bits/key = 2.65266 + built positions[5] for 21469394 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 13563325 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 13563325 kmers; bits/key = 2.62878 + built positions[6] for 13563325 kmers; bits/key = 13 + lower = 8192; upper = 77996; num_bits_per_pos = 17; num_kmers_in_partition = 31736500 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[7] for 31736500 kmers; bits/key = 2.53699 + built positions[7] for 31736500 kmers; bits/key = 17 +=== step 7.2 (build skew index): 24.5492 [sec] (8.85832 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.6573% +buckets with 2 minimizer positions = 1.97941% +buckets with 3 minimizer positions = 0.515326% +buckets with 4 minimizer positions = 0.23427% +buckets with 5 minimizer positions = 0.133983% +buckets with 6 minimizer positions = 0.0865522% +buckets with 7 minimizer positions = 0.0604065% +buckets with 8 minimizer positions = 0.0443896% +buckets with 9 minimizer positions = 0.0339768% +buckets with 10 minimizer positions = 0.0270212% +buckets with 11 minimizer positions = 0.022397% +buckets with 12 minimizer positions = 0.0181461% +buckets with 13 minimizer positions = 0.0153069% +buckets with 14 minimizer positions = 0.0132369% +buckets with 15 minimizer positions = 0.0111839% +buckets with 16 minimizer positions = 0.00982872% +max_bucket_size = 77996 +=== step 7 (build sparse and skew index): 26.4292 [sec] (9.5367 [ns/kmer]) +=== total time: 60.9244 [sec] (21.9839 [ns/kmer]) +total index size: 1688062682 [B] -- 1688.06 [MB] +SPACE BREAKDOWN: + mphf: 0.109739 [bits/kmer] (2.86014 [bits/key]) -- 2.252% + strings_offsets: 0.11255 [bits/kmer] -- 2.30969% + control_codewords: 1.26616 [bits/kmer] -- 25.9834% + mid_load_buckets: 0.163746 [bits/kmer] -- 3.3603% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.58762e-05% + strings: 2.11826 [bits/kmer] -- 43.4696% + skew_index: 1.1025 [bits/kmer] -- 22.6249% + weights: 5.31156e-07 [bits/kmer] -- 1.09001e-05% + -------------- + total: 4.87296 [bits/kmer] +2026-03-12 19:06:29: saving data structure to disk... +2026-03-12 19:06:30: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.time.log new file mode 100644 index 0000000..e5e79cb --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m21/k63/regular-build.time.log @@ -0,0 +1,23 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 21 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m21.sshash" + User time (seconds): 174.08 + System time (seconds): 18.30 + Percent of CPU this job got: 310% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:01.87 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5585036 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7133732 + Voluntary context switches: 1974 + Involuntary context switches: 2698 + Swaps: 0 + File system inputs: 56 + File system outputs: 20493328 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.json new file mode 100644 index 0000000..5186d55 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "550.529554", "negative lookup (avg_nanosec_per_kmer)": "424.144137", "access (avg_nanosec_per_kmer)": "350.301719", "iterator (avg_nanosec_per_kmer)": "2.466296"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "544.184844", "negative lookup (avg_nanosec_per_kmer)": "423.209266", "access (avg_nanosec_per_kmer)": "352.340390", "iterator (avg_nanosec_per_kmer)": "2.465803"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "556.737126", "negative lookup (avg_nanosec_per_kmer)": "428.886509", "access (avg_nanosec_per_kmer)": "352.247635", "iterator (avg_nanosec_per_kmer)": "2.484417"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "583.310497", "negative lookup (avg_nanosec_per_kmer)": "401.439151", "access (avg_nanosec_per_kmer)": "358.163119", "iterator (avg_nanosec_per_kmer)": "2.416260"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "591.679675", "negative lookup (avg_nanosec_per_kmer)": "403.837786", "access (avg_nanosec_per_kmer)": "363.801686", "iterator (avg_nanosec_per_kmer)": "2.431972"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash", "k": "31", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "586.426394", "negative lookup (avg_nanosec_per_kmer)": "403.693156", "access (avg_nanosec_per_kmer)": "360.365395", "iterator (avg_nanosec_per_kmer)": "2.389400"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.log new file mode 100644 index 0000000..e73c10d --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 550.53 +negative lookup (avg_nanosec_per_kmer) 424.144 +access (avg_nanosec_per_kmer) = 350.302 +iterator (avg_nanosec_per_kmer) = 2.4663 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 544.185 +negative lookup (avg_nanosec_per_kmer) 423.209 +access (avg_nanosec_per_kmer) = 352.34 +iterator (avg_nanosec_per_kmer) = 2.4658 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 556.737 +negative lookup (avg_nanosec_per_kmer) 428.887 +access (avg_nanosec_per_kmer) = 352.248 +iterator (avg_nanosec_per_kmer) = 2.48442 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 583.31 +negative lookup (avg_nanosec_per_kmer) 401.439 +access (avg_nanosec_per_kmer) = 358.163 +iterator (avg_nanosec_per_kmer) = 2.41626 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 591.68 +negative lookup (avg_nanosec_per_kmer) 403.838 +access (avg_nanosec_per_kmer) = 363.802 +iterator (avg_nanosec_per_kmer) = 2.43197 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 586.426 +negative lookup (avg_nanosec_per_kmer) 403.693 +access (avg_nanosec_per_kmer) = 360.365 +iterator (avg_nanosec_per_kmer) = 2.3894 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.json new file mode 100644 index 0000000..709b530 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "23", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7285985", "step 2 (compute minimizer tuples)": "5274838", "step 3 (merging minimizer tuples)": "25014851", "step 4 (build mphf)": "26711196", "step 5 (replacing minimizer values with MPHF hashes)": "21868237", "step 6 (merging minimizers tuples)": "90016065", "step 7.1 (build sparse index)": "7135514", "step 7.2 (build skew index)": "5727805", "step 7 (build sparse and skew index)": "14231669", "total_build_time_in_microsec": "190402841", "index_size_in_bytes": "3537818250", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "23", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4584197", "step 2 (compute minimizer tuples)": "2304430", "step 3 (merging minimizer tuples)": "8552991", "step 4 (build mphf)": "8852528", "step 5 (replacing minimizer values with MPHF hashes)": "7975767", "step 6 (merging minimizers tuples)": "31436171", "step 7.1 (build sparse index)": "4365431", "step 7.2 (build skew index)": "2525119", "step 7 (build sparse and skew index)": "7446389", "total_build_time_in_microsec": "71152473", "index_size_in_bytes": "1435128052", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.log new file mode 100644 index 0000000..31de2d8 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.log @@ -0,0 +1,312 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash --canonical +2026-03-12 18:39:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.28599 [sec] (2.90779 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.32.bin'... +=== step 2 (compute minimizer tuples): 5.27484 [sec] (2.10515 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 3 (merging minimizer tuples): 25.0149 [sec] (9.98326 [ns/kmer]) +num_minimizers = 566754902 +num_minimizer_positions = 608435333 +num_super_kmers = 630211143 +building minimizers MPHF with 16 threads and 189 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 26.7112 [sec] (10.6603 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 21.8682 [sec] (8.72747 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337141002244330.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 6 (merging minimizers tuples): 90.0161 [sec] (35.9248 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 7844210 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 13251640/566754902 (2.33816%) +num_buckets_in_skew_index 40446/566754902 (0.00713642%) +max_bucket_size 27103 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 49117556/608435333 (8.07277%) +num_minimizer_positions_of_buckets_in_skew_index 5854961/608435333 (0.962298%) +=== step 7.1 (build sparse index): 7.13551 [sec] (2.84774 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 8938157 + partition = 1: num kmers in buckets of size > 128 and <= 256: 5789871 + partition = 2: num kmers in buckets of size > 256 and <= 512: 3312157 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1575711 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 828497 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 384186 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 176549 + partition = 7: num kmers in buckets of size > 8192 and <= 27103: 137499 +num kmers in skew index = 21142627 (0.843788%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 8938157 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 8938157 kmers; bits/key = 2.57454 + built positions[0] for 8938157 kmers; bits/key = 7.00004 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 5789871 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 5789871 kmers; bits/key = 2.56351 + built positions[1] for 5789871 kmers; bits/key = 8.00006 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 3312157 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3312157 kmers; bits/key = 3.13467 + built positions[2] for 3312157 kmers; bits/key = 9.00011 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1575711 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1575711)... + built mphs[3] for 1575711 kmers; bits/key = 2.56043 + built positions[3] for 1575711 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 828497 + building MPHF with 16 threads and 1 partitions (avg. partition size = 828497)... + built mphs[4] for 828497 kmers; bits/key = 2.41874 + built positions[4] for 828497 kmers; bits/key = 11.0004 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 384186 + building MPHF with 16 threads and 1 partitions (avg. partition size = 384186)... + built mphs[5] for 384186 kmers; bits/key = 2.56455 + built positions[5] for 384186 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 176549 + building MPHF with 16 threads and 1 partitions (avg. partition size = 176549)... + built mphs[6] for 176549 kmers; bits/key = 2.42852 + built positions[6] for 176549 kmers; bits/key = 13.002 + lower = 8192; upper = 27103; num_bits_per_pos = 15; num_kmers_in_partition = 137499 + building MPHF with 16 threads and 1 partitions (avg. partition size = 137499)... + built mphs[7] for 137499 kmers; bits/key = 2.43144 + built positions[7] for 137499 kmers; bits/key = 15.0026 +=== step 7.2 (build skew index): 5.72781 [sec] (2.28593 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.6547% +buckets with 2 minimizer positions = 1.38406% +buckets with 3 minimizer positions = 0.390922% +buckets with 4 minimizer positions = 0.176082% +buckets with 5 minimizer positions = 0.0994901% +buckets with 6 minimizer positions = 0.0631894% +buckets with 7 minimizer positions = 0.0431804% +buckets with 8 minimizer positions = 0.0312684% +buckets with 9 minimizer positions = 0.0233692% +buckets with 10 minimizer positions = 0.0181061% +buckets with 11 minimizer positions = 0.0144685% +buckets with 12 minimizer positions = 0.0115958% +buckets with 13 minimizer positions = 0.00963662% +buckets with 14 minimizer positions = 0.00805375% +buckets with 15 minimizer positions = 0.00677347% +buckets with 16 minimizer positions = 0.0057951% +max_bucket_size = 27103 +=== step 7 (build sparse and skew index): 14.2317 [sec] (5.67977 [ns/kmer]) +=== total time: 190.403 [sec] (75.9885 [ns/kmer]) +total index size: 3537818250 [B] -- 3537.82 [MB] +SPACE BREAKDOWN: + mphf: 0.639279 [bits/kmer] (2.82631 [bits/key]) -- 5.65966% + strings_offsets: 0.153147 [bits/kmer] -- 1.35584% + control_codewords: 7.46421 [bits/kmer] -- 66.0821% + mid_load_buckets: 0.62728 [bits/kmer] -- 5.55343% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 7.57529e-06% + strings: 2.24545 [bits/kmer] -- 19.8794% + skew_index: 0.165991 [bits/kmer] -- 1.46955% + weights: 5.87466e-07 [bits/kmer] -- 5.20095e-06% + -------------- + total: 11.2954 [bits/kmer] +2026-03-12 18:42:11: saving data structure to disk... +2026-03-12 18:42:13: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash --canonical +2026-03-12 19:00:05: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.5842 [sec] (5.12596 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.16.bin'... +=== step 2 (compute minimizer tuples): 2.30443 [sec] (2.57677 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.55299 [sec] (9.56379 [ns/kmer]) +num_minimizers = 181201121 +num_minimizer_positions = 227847194 +num_super_kmers = 235345425 +building minimizers MPHF with 16 threads and 61 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 8.85253 [sec] (9.89872 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338405111820286.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 7.97577 [sec] (8.91835 [ns/kmer]) +=== step 6 (merging minimizers tuples): 31.4362 [sec] (35.1513 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 11936988 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 19219522/181201121 (10.6067%) +num_buckets_in_skew_index 9325/181201121 (0.00514622%) +max_bucket_size 10077 +log2_max_bucket_size 14 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 64349938/227847194 (28.2426%) +num_minimizer_positions_of_buckets_in_skew_index 1524982/227847194 (0.6693%) +=== step 7.1 (build sparse index): 4.36543 [sec] (4.88134 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2006649 + partition = 1: num kmers in buckets of size > 128 and <= 256: 962223 + partition = 2: num kmers in buckets of size > 256 and <= 512: 637428 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 371459 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 366552 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 175532 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 140185 + partition = 7: num kmers in buckets of size > 8192 and <= 10077: 17772 +num kmers in skew index = 4677800 (0.523062%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2006649 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2006649)... + built mphs[0] for 2006649 kmers; bits/key = 2.56015 + built positions[0] for 2006649 kmers; bits/key = 7.00018 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 962223 + building MPHF with 16 threads and 1 partitions (avg. partition size = 962223)... + built mphs[1] for 962223 kmers; bits/key = 2.5612 + built positions[1] for 962223 kmers; bits/key = 8.00034 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 637428 + building MPHF with 16 threads and 1 partitions (avg. partition size = 637428)... + built mphs[2] for 637428 kmers; bits/key = 2.5624 + built positions[2] for 637428 kmers; bits/key = 9.00057 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 371459 + building MPHF with 16 threads and 1 partitions (avg. partition size = 371459)... + built mphs[3] for 371459 kmers; bits/key = 2.42197 + built positions[3] for 371459 kmers; bits/key = 10.001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 366552 + building MPHF with 16 threads and 1 partitions (avg. partition size = 366552)... + built mphs[4] for 366552 kmers; bits/key = 2.56487 + built positions[4] for 366552 kmers; bits/key = 11.001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 175532 + building MPHF with 16 threads and 1 partitions (avg. partition size = 175532)... + built mphs[5] for 175532 kmers; bits/key = 2.57111 + built positions[5] for 175532 kmers; bits/key = 12.0021 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 140185 + building MPHF with 16 threads and 1 partitions (avg. partition size = 140185)... + built mphs[6] for 140185 kmers; bits/key = 2.43107 + built positions[6] for 140185 kmers; bits/key = 13.0027 + lower = 8192; upper = 10077; num_bits_per_pos = 14; num_kmers_in_partition = 17772 + building MPHF with 16 threads and 1 partitions (avg. partition size = 17772)... + built mphs[7] for 17772 kmers; bits/key = 2.53883 + built positions[7] for 17772 kmers; bits/key = 14.0194 +=== step 7.2 (build skew index): 2.52512 [sec] (2.82354 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 89.3881% +buckets with 2 minimizer positions = 6.5877% +buckets with 3 minimizer positions = 1.74975% +buckets with 4 minimizer positions = 0.731186% +buckets with 5 minimizer positions = 0.394461% +buckets with 6 minimizer positions = 0.249639% +buckets with 7 minimizer positions = 0.174072% +buckets with 8 minimizer positions = 0.130963% +buckets with 9 minimizer positions = 0.103125% +buckets with 10 minimizer positions = 0.0833692% +buckets with 11 minimizer positions = 0.0686651% +buckets with 12 minimizer positions = 0.0573341% +buckets with 13 minimizer positions = 0.0477331% +buckets with 14 minimizer positions = 0.0393099% +buckets with 15 minimizer positions = 0.0322112% +buckets with 16 minimizer positions = 0.0260335% +max_bucket_size = 10077 +=== step 7 (build sparse and skew index): 7.44639 [sec] (8.32641 [ns/kmer]) +=== total time: 71.1525 [sec] (79.5613 [ns/kmer]) +total index size: 1435128052 [B] -- 1435.13 [MB] +SPACE BREAKDOWN: + mphf: 0.576902 [bits/kmer] (2.84727 [bits/key]) -- 4.49375% + strings_offsets: 0.333373 [bits/kmer] -- 2.59679% + control_codewords: 6.4837 [bits/kmer] -- 50.5045% + mid_load_buckets: 2.2306 [bits/kmer] -- 17.3752% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 1.86743e-05% + strings: 3.10303 [bits/kmer] -- 24.171% + skew_index: 0.110247 [bits/kmer] -- 0.858769% + weights: 1.64596e-06 [bits/kmer] -- 1.28212e-05% + -------------- + total: 12.8379 [bits/kmer] +2026-03-12 19:01:16: saving data structure to disk... +2026-03-12 19:01:16: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.time.log new file mode 100644 index 0000000..b06ba61 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.canon.sshash --canonical" + User time (seconds): 419.40 + System time (seconds): 67.82 + Percent of CPU this job got: 253% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:12.28 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 20218544 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 18281198 + Voluntary context switches: 50067 + Involuntary context switches: 22927 + Swaps: 0 + File system inputs: 184 + File system outputs: 115582936 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.canon.sshash --canonical" + User time (seconds): 111.55 + System time (seconds): 23.88 + Percent of CPU this job got: 188% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:11.92 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8873208 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7478473 + Voluntary context switches: 4618 + Involuntary context switches: 2440 + Swaps: 0 + File system inputs: 88 + File system outputs: 34040352 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.json new file mode 100644 index 0000000..d7c61d7 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.965377", "negative lookup (avg_nanosec_per_kmer)": "801.283831", "access (avg_nanosec_per_kmer)": "350.630928", "iterator (avg_nanosec_per_kmer)": "2.473226"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "745.743526", "negative lookup (avg_nanosec_per_kmer)": "805.266435", "access (avg_nanosec_per_kmer)": "352.424480", "iterator (avg_nanosec_per_kmer)": "2.539748"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "742.960087", "negative lookup (avg_nanosec_per_kmer)": "810.794927", "access (avg_nanosec_per_kmer)": "357.143960", "iterator (avg_nanosec_per_kmer)": "2.490872"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "748.608657", "negative lookup (avg_nanosec_per_kmer)": "748.400305", "access (avg_nanosec_per_kmer)": "360.088998", "iterator (avg_nanosec_per_kmer)": "2.423180"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.984374", "negative lookup (avg_nanosec_per_kmer)": "745.903256", "access (avg_nanosec_per_kmer)": "355.940049", "iterator (avg_nanosec_per_kmer)": "2.401252"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash", "k": "31", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "748.182094", "negative lookup (avg_nanosec_per_kmer)": "749.834049", "access (avg_nanosec_per_kmer)": "360.008162", "iterator (avg_nanosec_per_kmer)": "2.425332"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.log new file mode 100644 index 0000000..2954917 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 744.965 +negative lookup (avg_nanosec_per_kmer) 801.284 +access (avg_nanosec_per_kmer) = 350.631 +iterator (avg_nanosec_per_kmer) = 2.47323 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 745.744 +negative lookup (avg_nanosec_per_kmer) 805.266 +access (avg_nanosec_per_kmer) = 352.424 +iterator (avg_nanosec_per_kmer) = 2.53975 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 742.96 +negative lookup (avg_nanosec_per_kmer) 810.795 +access (avg_nanosec_per_kmer) = 357.144 +iterator (avg_nanosec_per_kmer) = 2.49087 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 748.609 +negative lookup (avg_nanosec_per_kmer) 748.4 +access (avg_nanosec_per_kmer) = 360.089 +iterator (avg_nanosec_per_kmer) = 2.42318 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 744.984 +negative lookup (avg_nanosec_per_kmer) 745.903 +access (avg_nanosec_per_kmer) = 355.94 +iterator (avg_nanosec_per_kmer) = 2.40125 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 748.182 +negative lookup (avg_nanosec_per_kmer) 749.834 +access (avg_nanosec_per_kmer) = 360.008 +iterator (avg_nanosec_per_kmer) = 2.42533 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.json new file mode 100644 index 0000000..a6b6a9f --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "23", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7276170", "step 2 (compute minimizer tuples)": "4390948", "step 3 (merging minimizer tuples)": "18464771", "step 4 (build mphf)": "23143925", "step 5 (replacing minimizer values with MPHF hashes)": "18124752", "step 6 (merging minimizers tuples)": "65243269", "step 7.1 (build sparse index)": "5516728", "step 7.2 (build skew index)": "3252369", "step 7 (build sparse and skew index)": "9894867", "total_build_time_in_microsec": "146538702", "index_size_in_bytes": "3064616298", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "23", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4581915", "step 2 (compute minimizer tuples)": "1558491", "step 3 (merging minimizer tuples)": "9337051", "step 4 (build mphf)": "8006547", "step 5 (replacing minimizer values with MPHF hashes)": "6701511", "step 6 (merging minimizers tuples)": "22688209", "step 7.1 (build sparse index)": "3368619", "step 7.2 (build skew index)": "1701978", "step 7 (build sparse and skew index)": "5536760", "total_build_time_in_microsec": "58410484", "index_size_in_bytes": "1262753438", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.log new file mode 100644 index 0000000..a5388e2 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.log @@ -0,0 +1,292 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash +2026-03-12 18:35:57: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.27617 [sec] (2.90387 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.21.bin'... +=== step 2 (compute minimizer tuples): 4.39095 [sec] (1.7524 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 18.4648 [sec] (7.36917 [ns/kmer]) +num_minimizers = 475581904 +num_minimizer_positions = 506249274 +num_super_kmers = 506249274 +building minimizers MPHF with 16 threads and 159 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 23.1439 [sec] (9.23659 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.1248 [sec] (7.23347 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773336957098264120.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 65.2433 [sec] (26.0382 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6152101 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10321744/475581904 (2.17034%) +num_buckets_in_skew_index 26252/475581904 (0.00551997%) +max_bucket_size 18871 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 37402432/506249274 (7.38815%) +num_minimizer_positions_of_buckets_in_skew_index 3612934/506249274 (0.713667%) +=== step 7.1 (build sparse index): 5.51673 [sec] (2.20169 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 6633538 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3801973 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1789922 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1063216 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 458398 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 179505 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 58862 + partition = 7: num kmers in buckets of size > 8192 and <= 18871: 118689 +num kmers in skew index = 14104103 (0.562886%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 6633538 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 6633538 kmers; bits/key = 2.74291 + built positions[0] for 6633538 kmers; bits/key = 7.00006 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3801973 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3801973 kmers; bits/key = 2.89717 + built positions[1] for 3801973 kmers; bits/key = 8.00009 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1789922 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1789922)... + built mphs[2] for 1789922 kmers; bits/key = 2.56028 + built positions[2] for 1789922 kmers; bits/key = 9.00019 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1063216 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1063216)... + built mphs[3] for 1063216 kmers; bits/key = 2.41822 + built positions[3] for 1063216 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 458398 + building MPHF with 16 threads and 1 partitions (avg. partition size = 458398)... + built mphs[4] for 458398 kmers; bits/key = 2.42092 + built positions[4] for 458398 kmers; bits/key = 11.0008 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 179505 + building MPHF with 16 threads and 1 partitions (avg. partition size = 179505)... + built mphs[5] for 179505 kmers; bits/key = 2.42828 + built positions[5] for 179505 kmers; bits/key = 12.0021 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 58862 + building MPHF with 16 threads and 1 partitions (avg. partition size = 58862)... + built mphs[6] for 58862 kmers; bits/key = 2.45238 + built positions[6] for 58862 kmers; bits/key = 13.0061 + lower = 8192; upper = 18871; num_bits_per_pos = 15; num_kmers_in_partition = 118689 + building MPHF with 16 threads and 1 partitions (avg. partition size = 118689)... + built mphs[7] for 118689 kmers; bits/key = 2.43473 + built positions[7] for 118689 kmers; bits/key = 15.0028 +=== step 7.2 (build skew index): 3.25237 [sec] (1.298 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.8241% +buckets with 2 minimizer positions = 1.29359% +buckets with 3 minimizer positions = 0.36348% +buckets with 4 minimizer positions = 0.164718% +buckets with 5 minimizer positions = 0.0925248% +buckets with 6 minimizer positions = 0.0581803% +buckets with 7 minimizer positions = 0.0395053% +buckets with 8 minimizer positions = 0.0282174% +buckets with 9 minimizer positions = 0.0210927% +buckets with 10 minimizer positions = 0.0161608% +buckets with 11 minimizer positions = 0.0127076% +buckets with 12 minimizer positions = 0.0102739% +buckets with 13 minimizer positions = 0.00836806% +buckets with 14 minimizer positions = 0.00698492% +buckets with 15 minimizer positions = 0.00590224% +buckets with 16 minimizer positions = 0.00493669% +max_bucket_size = 18871 +=== step 7 (build sparse and skew index): 9.89487 [sec] (3.94898 [ns/kmer]) +=== total time: 146.539 [sec] (58.4826 [ns/kmer]) +total index size: 3064616298 [B] -- 3064.62 [MB] +SPACE BREAKDOWN: + mphf: 0.538149 [bits/kmer] (2.83533 [bits/key]) -- 5.49999% + strings_offsets: 0.153147 [bits/kmer] -- 1.56519% + control_codewords: 6.26345 [bits/kmer] -- 64.0137% + mid_load_buckets: 0.477666 [bits/kmer] -- 4.88184% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 8.74498e-06% + strings: 2.24545 [bits/kmer] -- 22.949% + skew_index: 0.106675 [bits/kmer] -- 1.09024% + weights: 5.87466e-07 [bits/kmer] -- 6.00401e-06% + -------------- + total: 9.78455 [bits/kmer] +2026-03-12 18:38:23: saving data structure to disk... +2026-03-12 18:38:25: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash +2026-03-12 18:58:33: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.58192 [sec] (5.12341 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.55849 [sec] (1.74267 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.33705 [sec] (10.4405 [ns/kmer]) +num_minimizers = 158748620 +num_minimizer_positions = 191353440 +num_super_kmers = 191353440 +building minimizers MPHF with 16 threads and 53 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 8.00655 [sec] (8.95276 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338313771501400.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.70151 [sec] (7.4935 [ns/kmer]) +=== step 6 (merging minimizers tuples): 22.6882 [sec] (25.3695 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 9116811 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14601368/158748620 (9.19779%) +num_buckets_in_skew_index 5516/158748620 (0.00347468%) +max_bucket_size 7859 +log2_max_bucket_size 13 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 46185028/191353440 (24.136%) +num_minimizer_positions_of_buckets_in_skew_index 1026676/191353440 (0.536534%) +=== step 7.1 (build sparse index): 3.36862 [sec] (3.76672 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 1093146 + partition = 1: num kmers in buckets of size > 128 and <= 256: 730547 + partition = 2: num kmers in buckets of size > 256 and <= 512: 466286 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 374508 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 246983 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 176969 + partition = 6: num kmers in buckets of size > 4096 and <= 7859: 65371 +num kmers in skew index = 3153810 (0.352653%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1093146 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1093146)... + built mphs[0] for 1093146 kmers; bits/key = 2.56102 + built positions[0] for 1093146 kmers; bits/key = 7.0003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 730547 + building MPHF with 16 threads and 1 partitions (avg. partition size = 730547)... + built mphs[1] for 730547 kmers; bits/key = 2.56198 + built positions[1] for 730547 kmers; bits/key = 8.00049 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 466286 + building MPHF with 16 threads and 1 partitions (avg. partition size = 466286)... + built mphs[2] for 466286 kmers; bits/key = 2.42087 + built positions[2] for 466286 kmers; bits/key = 9.00076 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 374508 + building MPHF with 16 threads and 1 partitions (avg. partition size = 374508)... + built mphs[3] for 374508 kmers; bits/key = 2.56469 + built positions[3] for 374508 kmers; bits/key = 10.0009 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 246983 + building MPHF with 16 threads and 1 partitions (avg. partition size = 246983)... + built mphs[4] for 246983 kmers; bits/key = 2.5675 + built positions[4] for 246983 kmers; bits/key = 11.0015 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 176969 + building MPHF with 16 threads and 1 partitions (avg. partition size = 176969)... + built mphs[5] for 176969 kmers; bits/key = 2.42854 + built positions[5] for 176969 kmers; bits/key = 12.0019 + lower = 4096; upper = 7859; num_bits_per_pos = 13; num_kmers_in_partition = 65371 + building MPHF with 16 threads and 1 partitions (avg. partition size = 65371)... + built mphs[6] for 65371 kmers; bits/key = 2.59124 + built positions[6] for 65371 kmers; bits/key = 13.0054 +=== step 7.2 (build skew index): 1.70198 [sec] (1.90312 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 90.7987% +buckets with 2 minimizer positions = 5.74292% +buckets with 3 minimizer positions = 1.48431% +buckets with 4 minimizer positions = 0.656331% +buckets with 5 minimizer positions = 0.383178% +buckets with 6 minimizer positions = 0.256369% +buckets with 7 minimizer positions = 0.181256% +buckets with 8 minimizer positions = 0.130823% +buckets with 9 minimizer positions = 0.0936229% +buckets with 10 minimizer positions = 0.066531% +buckets with 11 minimizer positions = 0.047162% +buckets with 12 minimizer positions = 0.0335197% +buckets with 13 minimizer positions = 0.024193% +buckets with 14 minimizer positions = 0.0177551% +buckets with 15 minimizer positions = 0.0133941% +buckets with 16 minimizer positions = 0.0103069% +max_bucket_size = 7859 +=== step 7 (build sparse and skew index): 5.53676 [sec] (6.1911 [ns/kmer]) +=== total time: 58.4105 [sec] (65.3135 [ns/kmer]) +total index size: 1262753438 [B] -- 1262.75 [MB] +SPACE BREAKDOWN: + mphf: 0.503369 [bits/kmer] (2.83573 [bits/key]) -- 4.45621% + strings_offsets: 0.333373 [bits/kmer] -- 2.95127% + control_codewords: 5.68031 [bits/kmer] -- 50.2865% + mid_load_buckets: 1.60094 [bits/kmer] -- 14.1728% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.12235e-05% + strings: 3.10303 [bits/kmer] -- 27.4705% + skew_index: 0.0748644 [bits/kmer] -- 0.662758% + weights: 1.64596e-06 [bits/kmer] -- 1.45713e-05% + -------------- + total: 11.2959 [bits/kmer] +2026-03-12 18:59:32: saving data structure to disk... +2026-03-12 18:59:32: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.time.log new file mode 100644 index 0000000..ee0eb3a --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m23.sshash" + User time (seconds): 339.59 + System time (seconds): 57.53 + Percent of CPU this job got: 267% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:28.22 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18362708 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 15606110 + Voluntary context switches: 41028 + Involuntary context switches: 18497 + Swaps: 0 + File system inputs: 184 + File system outputs: 94004976 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m23.sshash" + User time (seconds): 94.29 + System time (seconds): 20.33 + Percent of CPU this job got: 193% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.09 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7495252 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6219443 + Voluntary context switches: 2597 + Involuntary context switches: 2147 + Swaps: 0 + File system inputs: 80 + File system outputs: 28264856 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.json new file mode 100644 index 0000000..14f2f23 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "662.056868", "negative lookup (avg_nanosec_per_kmer)": "515.139536", "access (avg_nanosec_per_kmer)": "357.108168", "iterator (avg_nanosec_per_kmer)": "2.716595"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "664.989067", "negative lookup (avg_nanosec_per_kmer)": "513.092154", "access (avg_nanosec_per_kmer)": "359.399214", "iterator (avg_nanosec_per_kmer)": "2.735661"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "668.606795", "negative lookup (avg_nanosec_per_kmer)": "507.921720", "access (avg_nanosec_per_kmer)": "355.174846", "iterator (avg_nanosec_per_kmer)": "2.729945"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1041.561730", "negative lookup (avg_nanosec_per_kmer)": "509.870464", "access (avg_nanosec_per_kmer)": "407.896556", "iterator (avg_nanosec_per_kmer)": "2.823413"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1040.462314", "negative lookup (avg_nanosec_per_kmer)": "501.033373", "access (avg_nanosec_per_kmer)": "405.897587", "iterator (avg_nanosec_per_kmer)": "2.765083"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1031.887352", "negative lookup (avg_nanosec_per_kmer)": "495.961104", "access (avg_nanosec_per_kmer)": "404.229566", "iterator (avg_nanosec_per_kmer)": "2.763875"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.log new file mode 100644 index 0000000..6372547 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 662.057 +negative lookup (avg_nanosec_per_kmer) 515.14 +access (avg_nanosec_per_kmer) = 357.108 +iterator (avg_nanosec_per_kmer) = 2.71659 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 664.989 +negative lookup (avg_nanosec_per_kmer) 513.092 +access (avg_nanosec_per_kmer) = 359.399 +iterator (avg_nanosec_per_kmer) = 2.73566 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 668.607 +negative lookup (avg_nanosec_per_kmer) 507.922 +access (avg_nanosec_per_kmer) = 355.175 +iterator (avg_nanosec_per_kmer) = 2.72994 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1041.56 +negative lookup (avg_nanosec_per_kmer) 509.87 +access (avg_nanosec_per_kmer) = 407.897 +iterator (avg_nanosec_per_kmer) = 2.82341 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1040.46 +negative lookup (avg_nanosec_per_kmer) 501.033 +access (avg_nanosec_per_kmer) = 405.898 +iterator (avg_nanosec_per_kmer) = 2.76508 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1031.89 +negative lookup (avg_nanosec_per_kmer) 495.961 +access (avg_nanosec_per_kmer) = 404.23 +iterator (avg_nanosec_per_kmer) = 2.76388 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.json new file mode 100644 index 0000000..9eaddbf --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6843133", "step 2 (compute minimizer tuples)": "3440541", "step 3 (merging minimizer tuples)": "7906151", "step 4 (build mphf)": "6776815", "step 5 (replacing minimizer values with MPHF hashes)": "5827265", "step 6 (merging minimizers tuples)": "27621413", "step 7.1 (build sparse index)": "2188132", "step 7.2 (build skew index)": "23597500", "step 7 (build sparse and skew index)": "26172377", "total_build_time_in_microsec": "84587695", "index_size_in_bytes": "1840730772", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7195356", "step 2 (compute minimizer tuples)": "2408555", "step 3 (merging minimizer tuples)": "5658373", "step 4 (build mphf)": "2414871", "step 5 (replacing minimizer values with MPHF hashes)": "3343139", "step 6 (merging minimizers tuples)": "12511928", "step 7.1 (build sparse index)": "2667928", "step 7.2 (build skew index)": "43335697", "step 7 (build sparse and skew index)": "46313261", "total_build_time_in_microsec": "79845483", "index_size_in_bytes": "1542803956", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.log new file mode 100644 index 0000000..d80a9b3 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash --canonical +2026-03-12 19:10:39: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.84313 [sec] (2.46927 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.44054 [sec] (1.24148 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.90615 [sec] (2.85285 [ns/kmer]) +num_minimizers = 139797214 +num_minimizer_positions = 165033079 +num_super_kmers = 173839970 +building minimizers MPHF with 16 threads and 47 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.77682 [sec] (2.44534 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339039414260676.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.82726 [sec] (2.10271 [ns/kmer]) +=== step 6 (merging minimizers tuples): 27.6214 [sec] (9.96689 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2481409 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4236780/139797214 (3.03066%) +num_buckets_in_skew_index 38839/139797214 (0.0277824%) +max_bucket_size 114159 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 17160680/165033079 (10.3983%) +num_minimizer_positions_of_buckets_in_skew_index 12350804/165033079 (7.48384%) +=== step 7.1 (build sparse index): 2.18813 [sec] (0.789564 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 30095089 + partition = 1: num kmers in buckets of size > 128 and <= 256: 28268953 + partition = 2: num kmers in buckets of size > 256 and <= 512: 25570431 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 22401743 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 21728886 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 20291027 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 15634440 + partition = 7: num kmers in buckets of size > 8192 and <= 114159: 29999968 +num kmers in skew index = 193990537 (6.99994%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 30095089 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 30095089 kmers; bits/key = 2.60995 + built positions[0] for 30095089 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 28268953 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[1] for 28268953 kmers; bits/key = 2.55451 + built positions[1] for 28268953 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 25570431 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 25570431 kmers; bits/key = 2.56222 + built positions[2] for 25570431 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 22401743 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[3] for 22401743 kmers; bits/key = 2.57871 + built positions[3] for 22401743 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 21728886 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[4] for 21728886 kmers; bits/key = 2.62595 + built positions[4] for 21728886 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 20291027 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[5] for 20291027 kmers; bits/key = 2.5501 + built positions[5] for 20291027 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 15634440 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[6] for 15634440 kmers; bits/key = 2.66473 + built positions[6] for 15634440 kmers; bits/key = 13 + lower = 8192; upper = 114159; num_bits_per_pos = 17; num_kmers_in_partition = 29999968 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 29999968 kmers; bits/key = 2.5597 + built positions[7] for 29999968 kmers; bits/key = 17 +=== step 7.2 (build skew index): 23.5975 [sec] (8.51491 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9416% +buckets with 2 minimizer positions = 1.77501% +buckets with 3 minimizer positions = 0.493381% +buckets with 4 minimizer positions = 0.220854% +buckets with 5 minimizer positions = 0.126099% +buckets with 6 minimizer positions = 0.0804294% +buckets with 7 minimizer positions = 0.0563159% +buckets with 8 minimizer positions = 0.0415831% +buckets with 9 minimizer positions = 0.0319327% +buckets with 10 minimizer positions = 0.0249848% +buckets with 11 minimizer positions = 0.0202121% +buckets with 12 minimizer positions = 0.0167915% +buckets with 13 minimizer positions = 0.0140496% +buckets with 14 minimizer positions = 0.0118801% +buckets with 15 minimizer positions = 0.0102899% +buckets with 16 minimizer positions = 0.00901806% +max_bucket_size = 114159 +=== step 7 (build sparse and skew index): 26.1724 [sec] (9.44402 [ns/kmer]) +=== total time: 84.5877 [sec] (30.5226 [ns/kmer]) +total index size: 1840730772 [B] -- 1840.73 [MB] +SPACE BREAKDOWN: + mphf: 0.143567 [bits/kmer] (2.84605 [bits/key]) -- 2.70185% + strings_offsets: 0.11255 [bits/kmer] -- 2.11813% + control_codewords: 1.66466 [bits/kmer] -- 31.328% + mid_load_buckets: 0.198152 [bits/kmer] -- 3.7291% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.45594e-05% + strings: 2.11826 [bits/kmer] -- 39.8643% + skew_index: 1.07648 [bits/kmer] -- 20.2586% + weights: 5.31156e-07 [bits/kmer] -- 9.99603e-06% + -------------- + total: 5.31367 [bits/kmer] +2026-03-12 19:12:04: saving data structure to disk... +2026-03-12 19:12:04: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash --canonical +2026-03-12 19:24:29: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.19536 [sec] (4.71856 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.40855 [sec] (1.57948 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.65837 [sec] (3.71064 [ns/kmer]) +num_minimizers = 46779738 +num_minimizer_positions = 105716944 +num_super_kmers = 110181580 +building minimizers MPHF with 16 threads and 16 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.41487 [sec] (1.58362 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339869878466745.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.34314 [sec] (2.19236 [ns/kmer]) +=== step 6 (merging minimizers tuples): 12.5119 [sec] (8.20506 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 4820617 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9940997/46779738 (21.2506%) +num_buckets_in_skew_index 133897/46779738 (0.286229%) +max_bucket_size 138753 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 51203982/105716944 (48.435%) +num_minimizer_positions_of_buckets_in_skew_index 17808118/105716944 (16.8451%) +=== step 7.1 (build sparse index): 2.66793 [sec] (1.74957 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 144668915 + partition = 1: num kmers in buckets of size > 128 and <= 256: 55623411 + partition = 2: num kmers in buckets of size > 256 and <= 512: 26231592 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 15646830 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 6805157 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 4336113 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3752174 + partition = 7: num kmers in buckets of size > 8192 and <= 138753: 11539715 +num kmers in skew index = 268603907 (17.6145%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 144668915 + building MPHF with 16 threads and 49 partitions (avg. partition size = 3000000)... + built mphs[0] for 144668915 kmers; bits/key = 2.5646 + built positions[0] for 144668915 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 55623411 + building MPHF with 16 threads and 19 partitions (avg. partition size = 3000000)... + built mphs[1] for 55623411 kmers; bits/key = 2.53569 + built positions[1] for 55623411 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 26231592 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[2] for 26231592 kmers; bits/key = 2.54079 + built positions[2] for 26231592 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 15646830 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 15646830 kmers; bits/key = 2.60816 + built positions[3] for 15646830 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 6805157 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 6805157 kmers; bits/key = 2.68426 + built positions[4] for 6805157 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 4336113 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 4336113 kmers; bits/key = 2.78923 + built positions[5] for 4336113 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3752174 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3752174 kmers; bits/key = 2.93007 + built positions[6] for 3752174 kmers; bits/key = 13.0001 + lower = 8192; upper = 138753; num_bits_per_pos = 18; num_kmers_in_partition = 11539715 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 11539715 kmers; bits/key = 2.57092 + built positions[7] for 11539715 kmers; bits/key = 18 +=== step 7.2 (build skew index): 43.3357 [sec] (28.4186 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 78.4631% +buckets with 2 minimizer positions = 10.3049% +buckets with 3 minimizer positions = 3.78004% +buckets with 4 minimizer positions = 1.85919% +buckets with 5 minimizer positions = 1.10306% +buckets with 6 minimizer positions = 0.730786% +buckets with 7 minimizer positions = 0.519347% +buckets with 8 minimizer positions = 0.39044% +buckets with 9 minimizer positions = 0.303572% +buckets with 10 minimizer positions = 0.245117% +buckets with 11 minimizer positions = 0.199482% +buckets with 12 minimizer positions = 0.166839% +buckets with 13 minimizer positions = 0.141617% +buckets with 14 minimizer positions = 0.121414% +buckets with 15 minimizer positions = 0.105691% +buckets with 16 minimizer positions = 0.093579% +max_bucket_size = 138753 +=== step 7 (build sparse and skew index): 46.3133 [sec] (30.3713 [ns/kmer]) +=== total time: 79.8455 [sec] (52.361 [ns/kmer]) +total index size: 1542803956 [B] -- 1542.8 [MB] +SPACE BREAKDOWN: + mphf: 0.0883332 [bits/kmer] (2.87945 [bits/key]) -- 1.09135% + strings_offsets: 0.274587 [bits/kmer] -- 3.39252% + control_codewords: 1.01235 [bits/kmer] -- 12.5075% + mid_load_buckets: 1.07451 [bits/kmer] -- 13.2756% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.7371e-05% + strings: 3.35283 [bits/kmer] -- 41.4241% + skew_index: 2.2913 [bits/kmer] -- 28.3089% + weights: 9.65307e-07 [bits/kmer] -- 1.19263e-05% + -------------- + total: 8.09391 [bits/kmer] +2026-03-12 19:25:49: saving data structure to disk... +2026-03-12 19:25:50: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.time.log new file mode 100644 index 0000000..6af6865 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.canon.sshash --canonical" + User time (seconds): 202.85 + System time (seconds): 21.16 + Percent of CPU this job got: 261% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:25.61 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7215992 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7633357 + Voluntary context switches: 2451 + Involuntary context switches: 3201 + Swaps: 0 + File system inputs: 48 + File system outputs: 26879144 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.canon.sshash --canonical" + User time (seconds): 212.96 + System time (seconds): 21.91 + Percent of CPU this job got: 291% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:20.71 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 12523780 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8126137 + Voluntary context switches: 3242 + Involuntary context switches: 2954 + Swaps: 0 + File system inputs: 8 + File system outputs: 16292848 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.json new file mode 100644 index 0000000..2777d1e --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "839.144506", "negative lookup (avg_nanosec_per_kmer)": "871.111222", "access (avg_nanosec_per_kmer)": "357.495040", "iterator (avg_nanosec_per_kmer)": "2.798919"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "834.234616", "negative lookup (avg_nanosec_per_kmer)": "869.922730", "access (avg_nanosec_per_kmer)": "357.888933", "iterator (avg_nanosec_per_kmer)": "2.719108"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "828.868428", "negative lookup (avg_nanosec_per_kmer)": "861.382823", "access (avg_nanosec_per_kmer)": "361.640703", "iterator (avg_nanosec_per_kmer)": "2.830342"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1233.832317", "negative lookup (avg_nanosec_per_kmer)": "844.628596", "access (avg_nanosec_per_kmer)": "411.128282", "iterator (avg_nanosec_per_kmer)": "2.768412"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1233.867523", "negative lookup (avg_nanosec_per_kmer)": "837.935098", "access (avg_nanosec_per_kmer)": "404.127150", "iterator (avg_nanosec_per_kmer)": "2.754362"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1231.741520", "negative lookup (avg_nanosec_per_kmer)": "838.708885", "access (avg_nanosec_per_kmer)": "410.199954", "iterator (avg_nanosec_per_kmer)": "2.752154"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.log new file mode 100644 index 0000000..8f087d3 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 839.145 +negative lookup (avg_nanosec_per_kmer) 871.111 +access (avg_nanosec_per_kmer) = 357.495 +iterator (avg_nanosec_per_kmer) = 2.79892 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 834.235 +negative lookup (avg_nanosec_per_kmer) 869.923 +access (avg_nanosec_per_kmer) = 357.889 +iterator (avg_nanosec_per_kmer) = 2.71911 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 828.868 +negative lookup (avg_nanosec_per_kmer) 861.383 +access (avg_nanosec_per_kmer) = 361.641 +iterator (avg_nanosec_per_kmer) = 2.83034 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 1233.83 +negative lookup (avg_nanosec_per_kmer) 844.629 +access (avg_nanosec_per_kmer) = 411.128 +iterator (avg_nanosec_per_kmer) = 2.76841 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 1233.87 +negative lookup (avg_nanosec_per_kmer) 837.935 +access (avg_nanosec_per_kmer) = 404.127 +iterator (avg_nanosec_per_kmer) = 2.75436 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash +positive lookup (avg_nanosec_per_kmer) = 1231.74 +negative lookup (avg_nanosec_per_kmer) 838.709 +access (avg_nanosec_per_kmer) = 410.2 +iterator (avg_nanosec_per_kmer) = 2.75215 diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.json new file mode 100644 index 0000000..fd945cc --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6803857", "step 2 (compute minimizer tuples)": "2003733", "step 3 (merging minimizer tuples)": "6015064", "step 4 (build mphf)": "5769698", "step 5 (replacing minimizer values with MPHF hashes)": "4453823", "step 6 (merging minimizers tuples)": "17908509", "step 7.1 (build sparse index)": "1620190", "step 7.2 (build skew index)": "20859215", "step 7 (build sparse and skew index)": "22767480", "total_build_time_in_microsec": "65722164", "index_size_in_bytes": "1662968662", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7217331", "step 2 (compute minimizer tuples)": "1401177", "step 3 (merging minimizer tuples)": "4589724", "step 4 (build mphf)": "2169930", "step 5 (replacing minimizer values with MPHF hashes)": "2727639", "step 6 (merging minimizers tuples)": "7033786", "step 7.1 (build sparse index)": "2173166", "step 7.2 (build skew index)": "21210538", "step 7 (build sparse and skew index)": "23634478", "total_build_time_in_microsec": "48774065", "index_size_in_bytes": "1308600762", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.log new file mode 100644 index 0000000..3939778 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.log @@ -0,0 +1,271 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash +2026-03-12 19:08:56: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.80386 [sec] (2.4551 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.00373 [sec] (0.723026 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.01506 [sec] (2.17047 [ns/kmer]) +num_minimizers = 114630823 +num_minimizer_positions = 133941871 +num_super_kmers = 133941871 +building minimizers MPHF with 16 threads and 39 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.7697 [sec] (2.08193 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338936148936739.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.45382 [sec] (1.60711 [ns/kmer]) +=== step 6 (merging minimizers tuples): 17.9085 [sec] (6.4621 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1865555 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3205420/114630823 (2.7963%) +num_buckets_in_skew_index 30539/114630823 (0.0266412%) +max_bucket_size 144417 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 13206032/133941871 (9.85952%) +num_minimizer_positions_of_buckets_in_skew_index 9340975/133941871 (6.9739%) +=== step 7.1 (build sparse index): 1.62019 [sec] (0.584628 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 28419064 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26193017 + partition = 2: num kmers in buckets of size > 256 and <= 512: 23235875 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 21307296 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 21639070 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 14802709 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 11015934 + partition = 7: num kmers in buckets of size > 8192 and <= 144417: 25297902 +num kmers in skew index = 171910867 (6.20322%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 28419064 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 28419064 kmers; bits/key = 2.54322 + built positions[0] for 28419064 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26193017 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26193017 kmers; bits/key = 2.56028 + built positions[1] for 26193017 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 23235875 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 23235875 kmers; bits/key = 2.51953 + built positions[2] for 23235875 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 21307296 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[3] for 21307296 kmers; bits/key = 2.64955 + built positions[3] for 21307296 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 21639070 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[4] for 21639070 kmers; bits/key = 2.63512 + built positions[4] for 21639070 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14802709 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 14802709 kmers; bits/key = 2.58827 + built positions[5] for 14802709 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 11015934 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 11015934 kmers; bits/key = 2.59557 + built positions[6] for 11015934 kmers; bits/key = 13 + lower = 8192; upper = 144417; num_bits_per_pos = 18; num_kmers_in_partition = 25297902 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[7] for 25297902 kmers; bits/key = 2.58534 + built positions[7] for 25297902 kmers; bits/key = 18 +=== step 7.2 (build skew index): 20.8592 [sec] (7.52683 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1771% +buckets with 2 minimizer positions = 1.62745% +buckets with 3 minimizer positions = 0.448276% +buckets with 4 minimizer positions = 0.206145% +buckets with 5 minimizer positions = 0.117318% +buckets with 6 minimizer positions = 0.0757466% +buckets with 7 minimizer positions = 0.0534289% +buckets with 8 minimizer positions = 0.0390384% +buckets with 9 minimizer positions = 0.0306235% +buckets with 10 minimizer positions = 0.0239229% +buckets with 11 minimizer positions = 0.0193962% +buckets with 12 minimizer positions = 0.0161143% +buckets with 13 minimizer positions = 0.013731% +buckets with 14 minimizer positions = 0.0116155% +buckets with 15 minimizer positions = 0.0101212% +buckets with 16 minimizer positions = 0.00879083% +max_bucket_size = 144417 +=== step 7 (build sparse and skew index): 22.7675 [sec] (8.2154 [ns/kmer]) +=== total time: 65.7222 [sec] (23.7151 [ns/kmer]) +total index size: 1662968662 [B] -- 1662.97 [MB] +SPACE BREAKDOWN: + mphf: 0.118565 [bits/kmer] (2.86643 [bits/key]) -- 2.46984% + strings_offsets: 0.11255 [bits/kmer] -- 2.34454% + control_codewords: 1.36499 [bits/kmer] -- 28.4342% + mid_load_buckets: 0.152488 [bits/kmer] -- 3.1765% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.61158e-05% + strings: 2.11826 [bits/kmer] -- 44.1256% + skew_index: 0.933667 [bits/kmer] -- 19.4493% + weights: 5.31156e-07 [bits/kmer] -- 1.10646e-05% + -------------- + total: 4.80052 [bits/kmer] +2026-03-12 19:10:01: saving data structure to disk... +2026-03-12 19:10:02: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash +2026-03-12 19:22:57: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.21733 [sec] (4.73297 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.40118 [sec] (0.918862 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.bin' +=== step 3 (merging minimizer tuples): 4.58972 [sec] (3.00984 [ns/kmer]) +num_minimizers = 42223953 +num_minimizer_positions = 88153026 +num_super_kmers = 88153026 +building minimizers MPHF with 16 threads and 15 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.16993 [sec] (1.42299 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339777813992848.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.72764 [sec] (1.78873 [ns/kmer]) +=== step 6 (merging minimizers tuples): 7.03379 [sec] (4.61261 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 4115025 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 8433173/42223953 (19.9725%) +num_buckets_in_skew_index 60832/42223953 (0.14407%) +max_bucket_size 168647 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 45431249/88153026 (51.5368%) +num_minimizer_positions_of_buckets_in_skew_index 8991829/88153026 (10.2002%) +=== step 7.1 (build sparse index): 2.17317 [sec] (1.42512 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 68687849 + partition = 1: num kmers in buckets of size > 128 and <= 256: 30010754 + partition = 2: num kmers in buckets of size > 256 and <= 512: 17457833 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 8659045 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4997430 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3708524 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 4271574 + partition = 7: num kmers in buckets of size > 8192 and <= 168647: 9612673 +num kmers in skew index = 147405682 (9.66655%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 68687849 + building MPHF with 16 threads and 23 partitions (avg. partition size = 3000000)... + built mphs[0] for 68687849 kmers; bits/key = 2.56319 + built positions[0] for 68687849 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 30010754 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[1] for 30010754 kmers; bits/key = 2.61613 + built positions[1] for 30010754 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 17457833 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 17457833 kmers; bits/key = 2.57718 + built positions[2] for 17457833 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 8659045 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8659045 kmers; bits/key = 2.49564 + built positions[3] for 8659045 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4997430 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4997430 kmers; bits/key = 2.64673 + built positions[4] for 4997430 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3708524 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3708524 kmers; bits/key = 3.07525 + built positions[5] for 3708524 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 4271574 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 4271574 kmers; bits/key = 2.82511 + built positions[6] for 4271574 kmers; bits/key = 13.0001 + lower = 8192; upper = 168647; num_bits_per_pos = 18; num_kmers_in_partition = 9612673 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 9612673 kmers; bits/key = 2.73536 + built positions[7] for 9612673 kmers; bits/key = 18 +=== step 7.2 (build skew index): 21.2105 [sec] (13.9094 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 79.8834% +buckets with 2 minimizer positions = 9.74571% +buckets with 3 minimizer positions = 3.40689% +buckets with 4 minimizer positions = 1.67718% +buckets with 5 minimizer positions = 1.00592% +buckets with 6 minimizer positions = 0.671446% +buckets with 7 minimizer positions = 0.481746% +buckets with 8 minimizer positions = 0.362363% +buckets with 9 minimizer positions = 0.283046% +buckets with 10 minimizer positions = 0.227854% +buckets with 11 minimizer positions = 0.187259% +buckets with 12 minimizer positions = 0.159864% +buckets with 13 minimizer positions = 0.135354% +buckets with 14 minimizer positions = 0.119532% +buckets with 15 minimizer positions = 0.103905% +buckets with 16 minimizer positions = 0.0927151% +max_bucket_size = 168647 +=== step 7 (build sparse and skew index): 23.6345 [sec] (15.499 [ns/kmer]) +=== total time: 48.7741 [sec] (31.985 [ns/kmer]) +total index size: 1308600762 [B] -- 1308.6 [MB] +SPACE BREAKDOWN: + mphf: 0.0823667 [bits/kmer] (2.97465 [bits/key]) -- 1.19977% + strings_offsets: 0.274587 [bits/kmer] -- 3.99968% + control_codewords: 0.913756 [bits/kmer] -- 13.3099% + mid_load_buckets: 0.953372 [bits/kmer] -- 13.887% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.04799e-05% + strings: 3.35283 [bits/kmer] -- 48.8378% + skew_index: 1.28831 [bits/kmer] -- 18.7658% + weights: 9.65307e-07 [bits/kmer] -- 1.40608e-05% + -------------- + total: 6.86522 [bits/kmer] +2026-03-12 19:23:46: saving data structure to disk... +2026-03-12 19:23:47: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.time.log new file mode 100644 index 0000000..93ae23d --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m23/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m23.sshash" + User time (seconds): 160.47 + System time (seconds): 17.92 + Percent of CPU this job got: 267% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:06.66 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5901780 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6692773 + Voluntary context switches: 2233 + Involuntary context switches: 2590 + Swaps: 0 + File system inputs: 80 + File system outputs: 21437360 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m23.sshash" + User time (seconds): 120.35 + System time (seconds): 13.03 + Percent of CPU this job got: 269% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:49.53 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7755792 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5279832 + Voluntary context switches: 4078 + Involuntary context switches: 2004 + Swaps: 0 + File system inputs: 0 + File system outputs: 13355784 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.json new file mode 100644 index 0000000..71d3393 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "552.403220", "negative lookup (avg_nanosec_per_kmer)": "428.787217", "access (avg_nanosec_per_kmer)": "348.239617", "iterator (avg_nanosec_per_kmer)": "2.461038"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "562.063732", "negative lookup (avg_nanosec_per_kmer)": "427.827110", "access (avg_nanosec_per_kmer)": "347.602901", "iterator (avg_nanosec_per_kmer)": "2.490798"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "549.039334", "negative lookup (avg_nanosec_per_kmer)": "428.203292", "access (avg_nanosec_per_kmer)": "353.487529", "iterator (avg_nanosec_per_kmer)": "2.491884"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "557.972904", "negative lookup (avg_nanosec_per_kmer)": "403.435230", "access (avg_nanosec_per_kmer)": "356.393800", "iterator (avg_nanosec_per_kmer)": "2.424196"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "557.657692", "negative lookup (avg_nanosec_per_kmer)": "402.887574", "access (avg_nanosec_per_kmer)": "358.001679", "iterator (avg_nanosec_per_kmer)": "2.412647"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash", "k": "31", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "557.339880", "negative lookup (avg_nanosec_per_kmer)": "401.058483", "access (avg_nanosec_per_kmer)": "360.466095", "iterator (avg_nanosec_per_kmer)": "2.428846"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.log new file mode 100644 index 0000000..b34cd48 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 552.403 +negative lookup (avg_nanosec_per_kmer) 428.787 +access (avg_nanosec_per_kmer) = 348.24 +iterator (avg_nanosec_per_kmer) = 2.46104 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 562.064 +negative lookup (avg_nanosec_per_kmer) 427.827 +access (avg_nanosec_per_kmer) = 347.603 +iterator (avg_nanosec_per_kmer) = 2.4908 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 549.039 +negative lookup (avg_nanosec_per_kmer) 428.203 +access (avg_nanosec_per_kmer) = 353.488 +iterator (avg_nanosec_per_kmer) = 2.49188 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 557.973 +negative lookup (avg_nanosec_per_kmer) 403.435 +access (avg_nanosec_per_kmer) = 356.394 +iterator (avg_nanosec_per_kmer) = 2.4242 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 557.658 +negative lookup (avg_nanosec_per_kmer) 402.888 +access (avg_nanosec_per_kmer) = 358.002 +iterator (avg_nanosec_per_kmer) = 2.41265 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 557.34 +negative lookup (avg_nanosec_per_kmer) 401.058 +access (avg_nanosec_per_kmer) = 360.466 +iterator (avg_nanosec_per_kmer) = 2.42885 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.json new file mode 100644 index 0000000..d9568b9 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7260859", "step 2 (compute minimizer tuples)": "6099066", "step 3 (merging minimizer tuples)": "31057071", "step 4 (build mphf)": "32670157", "step 5 (replacing minimizer values with MPHF hashes)": "26592994", "step 6 (merging minimizers tuples)": "113702817", "step 7.1 (build sparse index)": "8217711", "step 7.2 (build skew index)": "2337796", "step 7 (build sparse and skew index)": "12192514", "total_build_time_in_microsec": "229575478", "index_size_in_bytes": "4161298350", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4560775", "step 2 (compute minimizer tuples)": "2779155", "step 3 (merging minimizer tuples)": "10321918", "step 4 (build mphf)": "11264875", "step 5 (replacing minimizer values with MPHF hashes)": "10022634", "step 6 (merging minimizers tuples)": "40733131", "step 7.1 (build sparse index)": "4769733", "step 7.2 (build skew index)": "968710", "step 7 (build sparse and skew index)": "6392108", "total_build_time_in_microsec": "86074596", "index_size_in_bytes": "1654446626", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.log new file mode 100644 index 0000000..f0d2b45 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.log @@ -0,0 +1,300 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash --canonical +2026-03-12 18:46:25: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26086 [sec] (2.89776 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.33.bin'... +=== step 2 (compute minimizer tuples): 6.09907 [sec] (2.4341 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +=== step 3 (merging minimizer tuples): 31.0571 [sec] (12.3947 [ns/kmer]) +num_minimizers = 715091775 +num_minimizer_positions = 750921879 +num_super_kmers = 774111236 +building minimizers MPHF with 16 threads and 239 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 32.6702 [sec] (13.0384 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 26.593 [sec] (10.6131 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337585705210498.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +=== step 6 (merging minimizers tuples): 113.703 [sec] (45.3781 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8261206 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 13642458/715091775 (1.90779%) +num_buckets_in_skew_index 20253/715091775 (0.00283222%) +max_bucket_size 5091 +log2_max_bucket_size 13 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 47038651/750921879 (6.26412%) +num_minimizer_positions_of_buckets_in_skew_index 2454164/750921879 (0.32682%) +=== step 7.1 (build sparse index): 8.21771 [sec] (3.27963 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 3984661 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1892953 + partition = 2: num kmers in buckets of size > 256 and <= 512: 840206 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 343358 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 92264 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 59646 + partition = 6: num kmers in buckets of size > 4096 and <= 5091: 13312 +num kmers in skew index = 7226400 (0.288401%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 3984661 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 3984661 kmers; bits/key = 2.99856 + built positions[0] for 3984661 kmers; bits/key = 7.00009 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1892953 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1892953)... + built mphs[1] for 1892953 kmers; bits/key = 2.5602 + built positions[1] for 1892953 kmers; bits/key = 8.0002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 840206 + building MPHF with 16 threads and 1 partitions (avg. partition size = 840206)... + built mphs[2] for 840206 kmers; bits/key = 2.41868 + built positions[2] for 840206 kmers; bits/key = 9.00038 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 343358 + building MPHF with 16 threads and 1 partitions (avg. partition size = 343358)... + built mphs[3] for 343358 kmers; bits/key = 2.42261 + built positions[3] for 343358 kmers; bits/key = 10.001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 92264 + building MPHF with 16 threads and 1 partitions (avg. partition size = 92264)... + built mphs[4] for 92264 kmers; bits/key = 2.43978 + built positions[4] for 92264 kmers; bits/key = 11.0036 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 59646 + building MPHF with 16 threads and 1 partitions (avg. partition size = 59646)... + built mphs[5] for 59646 kmers; bits/key = 2.45046 + built positions[5] for 59646 kmers; bits/key = 12.0058 + lower = 4096; upper = 5091; num_bits_per_pos = 13; num_kmers_in_partition = 13312 + building MPHF with 16 threads and 1 partitions (avg. partition size = 13312)... + built mphs[6] for 13312 kmers; bits/key = 2.57572 + built positions[6] for 13312 kmers; bits/key = 13.024 +=== step 7.2 (build skew index): 2.3378 [sec] (0.932999 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 98.0894% +buckets with 2 minimizer positions = 1.15527% +buckets with 3 minimizer positions = 0.326791% +buckets with 4 minimizer positions = 0.143567% +buckets with 5 minimizer positions = 0.0788867% +buckets with 6 minimizer positions = 0.0489618% +buckets with 7 minimizer positions = 0.0325175% +buckets with 8 minimizer positions = 0.0231419% +buckets with 9 minimizer positions = 0.0169571% +buckets with 10 minimizer positions = 0.0129273% +buckets with 11 minimizer positions = 0.0100885% +buckets with 12 minimizer positions = 0.00808036% +buckets with 13 minimizer positions = 0.00651007% +buckets with 14 minimizer positions = 0.00530589% +buckets with 15 minimizer positions = 0.00443593% +buckets with 16 minimizer positions = 0.0037612% +max_bucket_size = 5091 +=== step 7 (build sparse and skew index): 12.1925 [sec] (4.86595 [ns/kmer]) +=== total time: 229.575 [sec] (91.6221 [ns/kmer]) +total index size: 4161298350 [B] -- 4161.3 [MB] +SPACE BREAKDOWN: + mphf: 0.80715 [bits/kmer] (2.82825 [bits/key]) -- 6.07521% + strings_offsets: 0.153147 [bits/kmer] -- 1.15269% + control_codewords: 9.41782 [bits/kmer] -- 70.8854% + mid_load_buckets: 0.60073 [bits/kmer] -- 4.52154% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 6.4403e-06% + strings: 2.24545 [bits/kmer] -- 16.9009% + skew_index: 0.0616742 [bits/kmer] -- 0.464205% + weights: 5.87466e-07 [bits/kmer] -- 4.4217e-06% + -------------- + total: 13.286 [bits/kmer] +2026-03-12 18:50:15: saving data structure to disk... +2026-03-12 18:50:17: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash --canonical +2026-03-12 19:03:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56077 [sec] (5.09977 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.16.bin'... +=== step 2 (compute minimizer tuples): 2.77915 [sec] (3.1076 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.3219 [sec] (11.5418 [ns/kmer]) +num_minimizers = 235252948 +num_minimizer_positions = 277816935 +num_super_kmers = 285815152 +building minimizers MPHF with 16 threads and 79 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 11.2649 [sec] (12.5962 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338604128839032.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 10.0226 [sec] (11.2071 [ns/kmer]) +=== step 6 (merging minimizers tuples): 40.7331 [sec] (45.547 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 13262351 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 20231767/235252948 (8.60001%) +num_buckets_in_skew_index 5208/235252948 (0.00221379%) +max_bucket_size 3426 +log2_max_bucket_size 12 +num_partitions in skew index 6 +num_minimizer_positions_of_buckets_larger_than_1 62010874/277816935 (22.3208%) +num_minimizer_positions_of_buckets_in_skew_index 790088/277816935 (0.284392%) +=== step 7.1 (build sparse index): 4.76973 [sec] (5.33342 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 767657 + partition = 1: num kmers in buckets of size > 128 and <= 256: 469396 + partition = 2: num kmers in buckets of size > 256 and <= 512: 293742 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 135327 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 78476 + partition = 5: num kmers in buckets of size > 2048 and <= 3426: 31837 +num kmers in skew index = 1776435 (0.198637%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 767657 + building MPHF with 16 threads and 1 partitions (avg. partition size = 767657)... + built mphs[0] for 767657 kmers; bits/key = 2.41891 + built positions[0] for 767657 kmers; bits/key = 7.00046 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 469396 + building MPHF with 16 threads and 1 partitions (avg. partition size = 469396)... + built mphs[1] for 469396 kmers; bits/key = 2.5636 + built positions[1] for 469396 kmers; bits/key = 8.00075 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 293742 + building MPHF with 16 threads and 1 partitions (avg. partition size = 293742)... + built mphs[2] for 293742 kmers; bits/key = 2.42335 + built positions[2] for 293742 kmers; bits/key = 9.00121 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 135327 + building MPHF with 16 threads and 1 partitions (avg. partition size = 135327)... + built mphs[3] for 135327 kmers; bits/key = 2.57486 + built positions[3] for 135327 kmers; bits/key = 10.0024 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 78476 + building MPHF with 16 threads and 1 partitions (avg. partition size = 78476)... + built mphs[4] for 78476 kmers; bits/key = 2.58545 + built positions[4] for 78476 kmers; bits/key = 11.0048 + lower = 2048; upper = 3426; num_bits_per_pos = 12; num_kmers_in_partition = 31837 + building MPHF with 16 threads and 1 partitions (avg. partition size = 31837)... + built mphs[5] for 31837 kmers; bits/key = 2.48265 + built positions[5] for 31837 kmers; bits/key = 12.0112 +=== step 7.2 (build skew index): 0.96871 [sec] (1.08319 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 91.3978% +buckets with 2 minimizer positions = 5.63749% +buckets with 3 minimizer positions = 1.35665% +buckets with 4 minimizer positions = 0.545938% +buckets with 5 minimizer positions = 0.293861% +buckets with 6 minimizer positions = 0.188852% +buckets with 7 minimizer positions = 0.134177% +buckets with 8 minimizer positions = 0.100458% +buckets with 9 minimizer positions = 0.0775642% +buckets with 10 minimizer positions = 0.0603589% +buckets with 11 minimizer positions = 0.0465473% +buckets with 12 minimizer positions = 0.0351094% +buckets with 13 minimizer positions = 0.0262951% +buckets with 14 minimizer positions = 0.0196312% +buckets with 15 minimizer positions = 0.0145154% +buckets with 16 minimizer positions = 0.0110294% +max_bucket_size = 3426 +=== step 7 (build sparse and skew index): 6.39211 [sec] (7.14753 [ns/kmer]) +=== total time: 86.0746 [sec] (96.2469 [ns/kmer]) +total index size: 1654446626 [B] -- 1654.45 [MB] +SPACE BREAKDOWN: + mphf: 0.747679 [bits/kmer] (2.84229 [bits/key]) -- 5.05197% + strings_offsets: 0.333373 [bits/kmer] -- 2.25255% + control_codewords: 8.41777 [bits/kmer] -- 56.8777% + mid_load_buckets: 2.14952 [bits/kmer] -- 14.524% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 1.61988e-05% + strings: 3.10303 [bits/kmer] -- 20.9668% + skew_index: 0.0483823 [bits/kmer] -- 0.326913% + weights: 1.64596e-06 [bits/kmer] -- 1.11215e-05% + -------------- + total: 14.7998 [bits/kmer] +2026-03-12 19:04:50: saving data structure to disk... +2026-03-12 19:04:50: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.time.log new file mode 100644 index 0000000..f72bd11 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.canon.sshash --canonical" + User time (seconds): 493.66 + System time (seconds): 81.93 + Percent of CPU this job got: 248% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:51.86 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 22736908 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 20944489 + Voluntary context switches: 78104 + Involuntary context switches: 23461 + Swaps: 0 + File system inputs: 232 + File system outputs: 142289200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.canon.sshash --canonical" + User time (seconds): 138.68 + System time (seconds): 29.10 + Percent of CPU this job got: 192% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:26.95 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 10693976 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 9028795 + Voluntary context switches: 16344 + Involuntary context switches: 2610 + Swaps: 0 + File system inputs: 144 + File system outputs: 41703656 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.json new file mode 100644 index 0000000..767e703 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.569297", "negative lookup (avg_nanosec_per_kmer)": "814.099292", "access (avg_nanosec_per_kmer)": "353.102084", "iterator (avg_nanosec_per_kmer)": "2.497547"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "742.171787", "negative lookup (avg_nanosec_per_kmer)": "819.863044", "access (avg_nanosec_per_kmer)": "356.472587", "iterator (avg_nanosec_per_kmer)": "2.490016"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "744.650053", "negative lookup (avg_nanosec_per_kmer)": "820.134432", "access (avg_nanosec_per_kmer)": "349.251980", "iterator (avg_nanosec_per_kmer)": "2.467816"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "732.726672", "negative lookup (avg_nanosec_per_kmer)": "755.373578", "access (avg_nanosec_per_kmer)": "358.182946", "iterator (avg_nanosec_per_kmer)": "2.419912"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "731.737152", "negative lookup (avg_nanosec_per_kmer)": "764.888265", "access (avg_nanosec_per_kmer)": "358.969528", "iterator (avg_nanosec_per_kmer)": "2.429287"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash", "k": "31", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "729.155521", "negative lookup (avg_nanosec_per_kmer)": "754.596075", "access (avg_nanosec_per_kmer)": "364.394536", "iterator (avg_nanosec_per_kmer)": "2.423598"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.log new file mode 100644 index 0000000..aae799a --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 744.569 +negative lookup (avg_nanosec_per_kmer) 814.099 +access (avg_nanosec_per_kmer) = 353.102 +iterator (avg_nanosec_per_kmer) = 2.49755 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 742.172 +negative lookup (avg_nanosec_per_kmer) 819.863 +access (avg_nanosec_per_kmer) = 356.473 +iterator (avg_nanosec_per_kmer) = 2.49002 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 744.65 +negative lookup (avg_nanosec_per_kmer) 820.134 +access (avg_nanosec_per_kmer) = 349.252 +iterator (avg_nanosec_per_kmer) = 2.46782 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 732.727 +negative lookup (avg_nanosec_per_kmer) 755.374 +access (avg_nanosec_per_kmer) = 358.183 +iterator (avg_nanosec_per_kmer) = 2.41991 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 731.737 +negative lookup (avg_nanosec_per_kmer) 764.888 +access (avg_nanosec_per_kmer) = 358.97 +iterator (avg_nanosec_per_kmer) = 2.42929 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 729.156 +negative lookup (avg_nanosec_per_kmer) 754.596 +access (avg_nanosec_per_kmer) = 364.395 +iterator (avg_nanosec_per_kmer) = 2.4236 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.json new file mode 100644 index 0000000..69ec37b --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7227743", "step 2 (compute minimizer tuples)": "4416874", "step 3 (merging minimizer tuples)": "24447617", "step 4 (build mphf)": "28057747", "step 5 (replacing minimizer values with MPHF hashes)": "22039198", "step 6 (merging minimizers tuples)": "90549011", "step 7.1 (build sparse index)": "6369396", "step 7.2 (build skew index)": "2174933", "step 7 (build sparse and skew index)": "9869514", "total_build_time_in_microsec": "186607704", "index_size_in_bytes": "3614980256", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "4553915", "step 2 (compute minimizer tuples)": "1927604", "step 3 (merging minimizer tuples)": "8218762", "step 4 (build mphf)": "10187950", "step 5 (replacing minimizer values with MPHF hashes)": "8150260", "step 6 (merging minimizers tuples)": "31074689", "step 7.1 (build sparse index)": "3617424", "step 7.2 (build skew index)": "658024", "step 7 (build sparse and skew index)": "4810490", "total_build_time_in_microsec": "68923670", "index_size_in_bytes": "1453812340", "num_kmers": "894310084"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.log new file mode 100644 index 0000000..133851c --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.log @@ -0,0 +1,292 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash +2026-03-12 18:42:40: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22774 [sec] (2.88455 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.32.bin'... +=== step 2 (compute minimizer tuples): 4.41687 [sec] (1.76275 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 3 (merging minimizer tuples): 24.4476 [sec] (9.75688 [ns/kmer]) +num_minimizers = 605829868 +num_minimizer_positions = 631548480 +num_super_kmers = 631548480 +building minimizers MPHF with 16 threads and 202 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 28.0577 [sec] (11.1977 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 22.0392 [sec] (8.7957 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773337360846194749.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 6 (merging minimizers tuples): 90.549 [sec] (36.1375 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6551714 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10577384/605829868 (1.74593%) +num_buckets_in_skew_index 10822/605829868 (0.00178631%) +max_bucket_size 2904 +log2_max_bucket_size 12 +num_partitions in skew index 6 +num_minimizer_positions_of_buckets_larger_than_1 35023021/631548480 (5.54558%) +num_minimizer_positions_of_buckets_in_skew_index 1283797/631548480 (0.203278%) +=== step 7.1 (build sparse index): 6.3694 [sec] (2.54198 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2305503 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1016930 + partition = 2: num kmers in buckets of size > 256 and <= 512: 444134 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 162315 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 66380 + partition = 5: num kmers in buckets of size > 2048 and <= 2904: 7695 +num kmers in skew index = 4002957 (0.159755%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2305503 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2305503)... + built mphs[0] for 2305503 kmers; bits/key = 2.56002 + built positions[0] for 2305503 kmers; bits/key = 7.00016 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1016930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 1016930)... + built mphs[1] for 1016930 kmers; bits/key = 2.41834 + built positions[1] for 1016930 kmers; bits/key = 8.00036 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 444134 + building MPHF with 16 threads and 1 partitions (avg. partition size = 444134)... + built mphs[2] for 444134 kmers; bits/key = 2.56377 + built positions[2] for 444134 kmers; bits/key = 9.00082 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 162315 + building MPHF with 16 threads and 1 partitions (avg. partition size = 162315)... + built mphs[3] for 162315 kmers; bits/key = 2.42925 + built positions[3] for 162315 kmers; bits/key = 10.0021 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 66380 + building MPHF with 16 threads and 1 partitions (avg. partition size = 66380)... + built mphs[4] for 66380 kmers; bits/key = 2.44821 + built positions[4] for 66380 kmers; bits/key = 11.0057 + lower = 2048; upper = 2904; num_bits_per_pos = 12; num_kmers_in_partition = 7695 + building MPHF with 16 threads and 1 partitions (avg. partition size = 7695)... + built mphs[5] for 7695 kmers; bits/key = 2.69474 + built positions[5] for 7695 kmers; bits/key = 12.0431 +=== step 7.2 (build skew index): 2.17493 [sec] (0.868002 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 98.2523% +buckets with 2 minimizer positions = 1.08144% +buckets with 3 minimizer positions = 0.297403% +buckets with 4 minimizer positions = 0.129162% +buckets with 5 minimizer positions = 0.0697042% +buckets with 6 minimizer positions = 0.0424238% +buckets with 7 minimizer positions = 0.0278986% +buckets with 8 minimizer positions = 0.0194946% +buckets with 9 minimizer positions = 0.0141116% +buckets with 10 minimizer positions = 0.010535% +buckets with 11 minimizer positions = 0.00816615% +buckets with 12 minimizer positions = 0.00643316% +buckets with 13 minimizer positions = 0.00519453% +buckets with 14 minimizer positions = 0.00424476% +buckets with 15 minimizer positions = 0.00354935% +buckets with 16 minimizer positions = 0.00295694% +max_bucket_size = 2904 +=== step 7 (build sparse and skew index): 9.86951 [sec] (3.93886 [ns/kmer]) +=== total time: 186.608 [sec] (74.4739 [ns/kmer]) +total index size: 3614980256 [B] -- 3614.98 [MB] +SPACE BREAKDOWN: + mphf: 0.684332 [bits/kmer] (2.83036 [bits/key]) -- 5.9292% + strings_offsets: 0.153147 [bits/kmer] -- 1.3269% + control_codewords: 7.97883 [bits/kmer] -- 69.1303% + mid_load_buckets: 0.447279 [bits/kmer] -- 3.87532% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 7.4136e-06% + strings: 2.24545 [bits/kmer] -- 19.4551% + skew_index: 0.0326772 [bits/kmer] -- 0.283122% + weights: 5.87466e-07 [bits/kmer] -- 5.08993e-06% + -------------- + total: 11.5417 [bits/kmer] +2026-03-12 18:45:47: saving data structure to disk... +2026-03-12 18:45:49: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash +2026-03-12 19:01:41: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55391 [sec] (5.0921 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.16.bin'... +=== step 2 (compute minimizer tuples): 1.9276 [sec] (2.15541 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.21876 [sec] (9.19006 [ns/kmer]) +num_minimizers = 206122326 +num_minimizer_positions = 235139020 +num_super_kmers = 235139020 +building minimizers MPHF with 16 threads and 69 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 10.188 [sec] (11.392 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773338501978880096.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 8.15026 [sec] (9.11346 [ns/kmer]) +=== step 6 (merging minimizers tuples): 31.0747 [sec] (34.7471 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 9917500 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 15023852/206122326 (7.2888%) +num_buckets_in_skew_index 3692/206122326 (0.00179117%) +max_bucket_size 2664 +log2_max_bucket_size 12 +num_partitions in skew index 6 +num_minimizer_positions_of_buckets_larger_than_1 43502314/235139020 (18.5007%) +num_minimizer_positions_of_buckets_in_skew_index 541924/235139020 (0.23047%) +=== step 7.1 (build sparse index): 3.61742 [sec] (4.04493 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 544605 + partition = 1: num kmers in buckets of size > 128 and <= 256: 344629 + partition = 2: num kmers in buckets of size > 256 and <= 512: 168203 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 115642 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 33877 + partition = 5: num kmers in buckets of size > 2048 and <= 2664: 7847 +num kmers in skew index = 1214803 (0.135837%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 544605 + building MPHF with 16 threads and 1 partitions (avg. partition size = 544605)... + built mphs[0] for 544605 kmers; bits/key = 2.41999 + built positions[0] for 544605 kmers; bits/key = 7.00068 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 344629 + building MPHF with 16 threads and 1 partitions (avg. partition size = 344629)... + built mphs[1] for 344629 kmers; bits/key = 2.42236 + built positions[1] for 344629 kmers; bits/key = 8.001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 168203 + building MPHF with 16 threads and 1 partitions (avg. partition size = 168203)... + built mphs[2] for 168203 kmers; bits/key = 2.57175 + built positions[2] for 168203 kmers; bits/key = 9.00207 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 115642 + building MPHF with 16 threads and 1 partitions (avg. partition size = 115642)... + built mphs[3] for 115642 kmers; bits/key = 2.43441 + built positions[3] for 115642 kmers; bits/key = 10.0033 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 33877 + building MPHF with 16 threads and 1 partitions (avg. partition size = 33877)... + built mphs[4] for 33877 kmers; bits/key = 2.47861 + built positions[4] for 33877 kmers; bits/key = 11.0102 + lower = 2048; upper = 2664; num_bits_per_pos = 12; num_kmers_in_partition = 7847 + building MPHF with 16 threads and 1 partitions (avg. partition size = 7847)... + built mphs[5] for 7847 kmers; bits/key = 2.69147 + built positions[5] for 7847 kmers; bits/key = 12.0464 +=== step 7.2 (build skew index): 0.658024 [sec] (0.73579 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 92.7094% +buckets with 2 minimizer positions = 4.81146% +buckets with 3 minimizer positions = 1.15081% +buckets with 4 minimizer positions = 0.504037% +buckets with 5 minimizer positions = 0.289372% +buckets with 6 minimizer positions = 0.181218% +buckets with 7 minimizer positions = 0.116387% +buckets with 8 minimizer positions = 0.0743437% +buckets with 9 minimizer positions = 0.0470308% +buckets with 10 minimizer positions = 0.0304334% +buckets with 11 minimizer positions = 0.0200114% +buckets with 12 minimizer positions = 0.01377% +buckets with 13 minimizer positions = 0.00975489% +buckets with 14 minimizer positions = 0.00723502% +buckets with 15 minimizer positions = 0.00553458% +buckets with 16 minimizer positions = 0.00427901% +max_bucket_size = 2664 +=== step 7 (build sparse and skew index): 4.81049 [sec] (5.379 [ns/kmer]) +=== total time: 68.9237 [sec] (77.0691 [ns/kmer]) +total index size: 1453812340 [B] -- 1453.81 [MB] +SPACE BREAKDOWN: + mphf: 0.652253 [bits/kmer] (2.82995 [bits/key]) -- 5.0154% + strings_offsets: 0.333373 [bits/kmer] -- 2.56342% + control_codewords: 7.37542 [bits/kmer] -- 56.7122% + mid_load_buckets: 1.50795 [bits/kmer] -- 11.5951% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 1.84343e-05% + strings: 3.10303 [bits/kmer] -- 23.8603% + skew_index: 0.0329646 [bits/kmer] -- 0.253476% + weights: 1.64596e-06 [bits/kmer] -- 1.26564e-05% + -------------- + total: 13.005 [bits/kmer] +2026-03-12 19:02:50: saving data structure to disk... +2026-03-12 19:02:51: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.time.log new file mode 100644 index 0000000..530778c --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k31/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k31.m25.sshash" + User time (seconds): 409.62 + System time (seconds): 68.74 + Percent of CPU this job got: 253% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:08.51 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 20240008 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 18327643 + Voluntary context switches: 45665 + Involuntary context switches: 23205 + Swaps: 0 + File system inputs: 232 + File system outputs: 117303744 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k31.m25.sshash" + User time (seconds): 114.91 + System time (seconds): 24.88 + Percent of CPU this job got: 200% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:09.68 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9126704 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7576953 + Voluntary context switches: 5526 + Involuntary context switches: 2099 + Swaps: 0 + File system inputs: 96 + File system outputs: 34933792 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.json new file mode 100644 index 0000000..05c2196 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "647.128782", "negative lookup (avg_nanosec_per_kmer)": "518.674509", "access (avg_nanosec_per_kmer)": "360.110516", "iterator (avg_nanosec_per_kmer)": "2.717493"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "664.394489", "negative lookup (avg_nanosec_per_kmer)": "512.108830", "access (avg_nanosec_per_kmer)": "360.317319", "iterator (avg_nanosec_per_kmer)": "2.717427"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "652.909889", "negative lookup (avg_nanosec_per_kmer)": "510.894586", "access (avg_nanosec_per_kmer)": "360.390819", "iterator (avg_nanosec_per_kmer)": "2.713021"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1020.597707", "negative lookup (avg_nanosec_per_kmer)": "501.600858", "access (avg_nanosec_per_kmer)": "404.845483", "iterator (avg_nanosec_per_kmer)": "2.825161"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1021.570680", "negative lookup (avg_nanosec_per_kmer)": "499.627955", "access (avg_nanosec_per_kmer)": "410.433088", "iterator (avg_nanosec_per_kmer)": "2.846580"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1015.373916", "negative lookup (avg_nanosec_per_kmer)": "506.619869", "access (avg_nanosec_per_kmer)": "404.518169", "iterator (avg_nanosec_per_kmer)": "2.767007"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.log new file mode 100644 index 0000000..6428dc9 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 647.129 +negative lookup (avg_nanosec_per_kmer) 518.675 +access (avg_nanosec_per_kmer) = 360.111 +iterator (avg_nanosec_per_kmer) = 2.71749 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 664.394 +negative lookup (avg_nanosec_per_kmer) 512.109 +access (avg_nanosec_per_kmer) = 360.317 +iterator (avg_nanosec_per_kmer) = 2.71743 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 652.91 +negative lookup (avg_nanosec_per_kmer) 510.895 +access (avg_nanosec_per_kmer) = 360.391 +iterator (avg_nanosec_per_kmer) = 2.71302 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1020.6 +negative lookup (avg_nanosec_per_kmer) 501.601 +access (avg_nanosec_per_kmer) = 404.845 +iterator (avg_nanosec_per_kmer) = 2.82516 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1021.57 +negative lookup (avg_nanosec_per_kmer) 499.628 +access (avg_nanosec_per_kmer) = 410.433 +iterator (avg_nanosec_per_kmer) = 2.84658 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1015.37 +negative lookup (avg_nanosec_per_kmer) 506.62 +access (avg_nanosec_per_kmer) = 404.518 +iterator (avg_nanosec_per_kmer) = 2.76701 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.json new file mode 100644 index 0000000..685c51b --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6786065", "step 2 (compute minimizer tuples)": "3509410", "step 3 (merging minimizer tuples)": "8280554", "step 4 (build mphf)": "7796026", "step 5 (replacing minimizer values with MPHF hashes)": "6173036", "step 6 (merging minimizers tuples)": "18471053", "step 7.1 (build sparse index)": "2221843", "step 7.2 (build skew index)": "20386614", "step 7 (build sparse and skew index)": "22996496", "total_build_time_in_microsec": "74012640", "index_size_in_bytes": "1839839296", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7208980", "step 2 (compute minimizer tuples)": "2414257", "step 3 (merging minimizer tuples)": "11942662", "step 4 (build mphf)": "3107161", "step 5 (replacing minimizer values with MPHF hashes)": "3568331", "step 6 (merging minimizers tuples)": "9234918", "step 7.1 (build sparse index)": "2780038", "step 7.2 (build skew index)": "36744793", "step 7 (build sparse and skew index)": "39835588", "total_build_time_in_microsec": "77311897", "index_size_in_bytes": "1527316782", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.log new file mode 100644 index 0000000..0df382e --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash --canonical +2026-03-12 19:14:17: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.78606 [sec] (2.44868 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.50941 [sec] (1.26633 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.28055 [sec] (2.98795 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 16 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.79603 [sec] (2.81311 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339257176022343.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.17304 [sec] (2.22747 [ns/kmer]) +=== step 6 (merging minimizers tuples): 18.4711 [sec] (6.66508 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4082749/149769567 (2.72602%) +num_buckets_in_skew_index 35781/149769567 (0.0238907%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 16495406/173272792 (9.51991%) +num_minimizer_positions_of_buckets_in_skew_index 11126349/173272792 (6.42129%) +=== step 7.1 (build sparse index): 2.22184 [sec] (0.801728 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 1: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 2: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 7: num kmers in buckets of size > 8192 and <= 284250: 28690575 +num kmers in skew index = 166030448 (5.99103%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 26742724 kmers; bits/key = 2.56429 + built positions[0] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 24475836 kmers; bits/key = 2.62316 + built positions[1] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 21113117 kmers; bits/key = 2.6904 + built positions[2] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 19260150 kmers; bits/key = 2.59757 + built positions[3] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17989259 kmers; bits/key = 2.53715 + built positions[4] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 15443443 kmers; bits/key = 2.69254 + built positions[5] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 12315344 kmers; bits/key = 2.67895 + built positions[6] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 28690575 + building MPHF with 16 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 28690575 kmers; bits/key = 2.55295 + built positions[7] for 28690575 kmers; bits/key = 19 +=== step 7.2 (build skew index): 20.3866 [sec] (7.35629 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 22.9965 [sec] (8.29804 [ns/kmer]) +=== total time: 74.0126 [sec] (26.7067 [ns/kmer]) +total index size: 1839839296 [B] -- 1839.84 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.87406% + strings_offsets: 0.11255 [bits/kmer] -- 2.11915% + control_codewords: 1.78341 [bits/kmer] -- 33.579% + mid_load_buckets: 0.19047 [bits/kmer] -- 3.58627% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.45665e-05% + strings: 2.11826 [bits/kmer] -- 39.8836% + skew_index: 0.95376 [bits/kmer] -- 17.9579% + weights: 5.31156e-07 [bits/kmer] -- 1.00009e-05% + -------------- + total: 5.31109 [bits/kmer] +2026-03-12 19:15:31: saving data structure to disk... +2026-03-12 19:15:32: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash --canonical +2026-03-12 19:28:00: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.20898 [sec] (4.7275 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.41426 [sec] (1.58322 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 11.9427 [sec] (7.83175 [ns/kmer]) +num_minimizers = 51792030 +num_minimizer_positions = 110322252 +num_super_kmers = 115060992 +building minimizers MPHF with 16 threads and 18 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.10716 [sec] (2.03761 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340080596284484.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.56833 [sec] (2.34004 [ns/kmer]) +=== step 6 (merging minimizers tuples): 9.23492 [sec] (6.05606 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5184470 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10477309/51792030 (20.2296%) +num_buckets_in_skew_index 122483/51792030 (0.23649%) +max_bucket_size 213518 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 52798737/110322252 (47.8586%) +num_minimizer_positions_of_buckets_in_skew_index 16331277/110322252 (14.8032%) +=== step 7.1 (build sparse index): 2.78004 [sec] (1.82309 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 127767608 + partition = 1: num kmers in buckets of size > 128 and <= 256: 46156729 + partition = 2: num kmers in buckets of size > 256 and <= 512: 22160903 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 12468160 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 5298406 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3581377 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2879207 + partition = 7: num kmers in buckets of size > 8192 and <= 213518: 15629139 +num kmers in skew index = 235941529 (15.4725%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 127767608 + building MPHF with 16 threads and 43 partitions (avg. partition size = 3000000)... + built mphs[0] for 127767608 kmers; bits/key = 2.55689 + built positions[0] for 127767608 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 46156729 + building MPHF with 16 threads and 16 partitions (avg. partition size = 3000000)... + built mphs[1] for 46156729 kmers; bits/key = 2.5153 + built positions[1] for 46156729 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 22160903 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 22160903 kmers; bits/key = 2.58287 + built positions[2] for 22160903 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 12468160 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 12468160 kmers; bits/key = 2.65123 + built positions[3] for 12468160 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 5298406 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 5298406 kmers; bits/key = 2.60093 + built positions[4] for 5298406 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3581377 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3581377 kmers; bits/key = 3.04997 + built positions[5] for 3581377 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2879207 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2879207)... + built mphs[6] for 2879207 kmers; bits/key = 2.55982 + built positions[6] for 2879207 kmers; bits/key = 13.0001 + lower = 8192; upper = 213518; num_bits_per_pos = 18; num_kmers_in_partition = 15629139 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[7] for 15629139 kmers; bits/key = 2.66548 + built positions[7] for 15629139 kmers; bits/key = 18 +=== step 7.2 (build skew index): 36.7448 [sec] (24.0965 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 79.5339% +buckets with 2 minimizer positions = 10.0102% +buckets with 3 minimizer positions = 3.59324% +buckets with 4 minimizer positions = 1.7473% +buckets with 5 minimizer positions = 1.02488% +buckets with 6 minimizer positions = 0.677402% +buckets with 7 minimizer positions = 0.478906% +buckets with 8 minimizer positions = 0.359042% +buckets with 9 minimizer positions = 0.278761% +buckets with 10 minimizer positions = 0.222915% +buckets with 11 minimizer positions = 0.182316% +buckets with 12 minimizer positions = 0.152323% +buckets with 13 minimizer positions = 0.129448% +buckets with 14 minimizer positions = 0.110938% +buckets with 15 minimizer positions = 0.0946439% +buckets with 16 minimizer positions = 0.0839608% +max_bucket_size = 213518 +=== step 7 (build sparse and skew index): 39.8356 [sec] (26.1233 [ns/kmer]) +=== total time: 77.3119 [sec] (50.6995 [ns/kmer]) +total index size: 1527316782 [B] -- 1527.32 [MB] +SPACE BREAKDOWN: + mphf: 0.0991468 [bits/kmer] (2.91916 [bits/key]) -- 1.23738% + strings_offsets: 0.274587 [bits/kmer] -- 3.42692% + control_codewords: 1.12082 [bits/kmer] -- 13.9881% + mid_load_buckets: 1.10798 [bits/kmer] -- 13.8278% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.75471e-05% + strings: 3.35283 [bits/kmer] -- 41.8441% + skew_index: 2.0573 [bits/kmer] -- 25.6756% + weights: 9.65307e-07 [bits/kmer] -- 1.20473e-05% + -------------- + total: 8.01266 [bits/kmer] +2026-03-12 19:29:17: saving data structure to disk... +2026-03-12 19:29:18: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.time.log new file mode 100644 index 0000000..bba5132 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.canon.sshash --canonical" + User time (seconds): 190.34 + System time (seconds): 21.59 + Percent of CPU this job got: 282% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:15.03 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7406216 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7934469 + Voluntary context switches: 2151 + Involuntary context switches: 2829 + Swaps: 0 + File system inputs: 40 + File system outputs: 28147464 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.canon.sshash --canonical" + User time (seconds): 193.46 + System time (seconds): 21.16 + Percent of CPU this job got: 274% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:18.17 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 11393212 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7738012 + Voluntary context switches: 2376 + Involuntary context switches: 2461 + Swaps: 0 + File system inputs: 24 + File system outputs: 16957624 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.json new file mode 100644 index 0000000..609c0fa --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "840.933700", "negative lookup (avg_nanosec_per_kmer)": "866.032231", "access (avg_nanosec_per_kmer)": "356.944857", "iterator (avg_nanosec_per_kmer)": "2.709185"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "834.580692", "negative lookup (avg_nanosec_per_kmer)": "875.979241", "access (avg_nanosec_per_kmer)": "358.203774", "iterator (avg_nanosec_per_kmer)": "2.778140"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "832.685094", "negative lookup (avg_nanosec_per_kmer)": "871.682709", "access (avg_nanosec_per_kmer)": "358.025231", "iterator (avg_nanosec_per_kmer)": "2.715822"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1193.409795", "negative lookup (avg_nanosec_per_kmer)": "841.914244", "access (avg_nanosec_per_kmer)": "409.531185", "iterator (avg_nanosec_per_kmer)": "2.765669"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1206.225056", "negative lookup (avg_nanosec_per_kmer)": "837.416580", "access (avg_nanosec_per_kmer)": "404.155607", "iterator (avg_nanosec_per_kmer)": "2.767935"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1207.607085", "negative lookup (avg_nanosec_per_kmer)": "834.733798", "access (avg_nanosec_per_kmer)": "419.875206", "iterator (avg_nanosec_per_kmer)": "2.807658"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.log new file mode 100644 index 0000000..2bda184 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 840.934 +negative lookup (avg_nanosec_per_kmer) 866.032 +access (avg_nanosec_per_kmer) = 356.945 +iterator (avg_nanosec_per_kmer) = 2.70919 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 834.581 +negative lookup (avg_nanosec_per_kmer) 875.979 +access (avg_nanosec_per_kmer) = 358.204 +iterator (avg_nanosec_per_kmer) = 2.77814 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 832.685 +negative lookup (avg_nanosec_per_kmer) 871.683 +access (avg_nanosec_per_kmer) = 358.025 +iterator (avg_nanosec_per_kmer) = 2.71582 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 1193.41 +negative lookup (avg_nanosec_per_kmer) 841.914 +access (avg_nanosec_per_kmer) = 409.531 +iterator (avg_nanosec_per_kmer) = 2.76567 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 1206.23 +negative lookup (avg_nanosec_per_kmer) 837.417 +access (avg_nanosec_per_kmer) = 404.156 +iterator (avg_nanosec_per_kmer) = 2.76793 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash +positive lookup (avg_nanosec_per_kmer) = 1207.61 +negative lookup (avg_nanosec_per_kmer) 834.734 +access (avg_nanosec_per_kmer) = 419.875 +iterator (avg_nanosec_per_kmer) = 2.80766 diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.json new file mode 100644 index 0000000..7190581 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6833688", "step 2 (compute minimizer tuples)": "2034370", "step 3 (merging minimizer tuples)": "6288820", "step 4 (build mphf)": "6142456", "step 5 (replacing minimizer values with MPHF hashes)": "4711353", "step 6 (merging minimizers tuples)": "20925309", "step 7.1 (build sparse index)": "1645559", "step 7.2 (build skew index)": "16942875", "step 7 (build sparse and skew index)": "18886380", "total_build_time_in_microsec": "65822376", "index_size_in_bytes": "1647878160", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7239404", "step 2 (compute minimizer tuples)": "1402236", "step 3 (merging minimizer tuples)": "4762175", "step 4 (build mphf)": "2361995", "step 5 (replacing minimizer values with MPHF hashes)": "2974228", "step 6 (merging minimizers tuples)": "13436592", "step 7.1 (build sparse index)": "2264286", "step 7.2 (build skew index)": "17720390", "step 7 (build sparse and skew index)": "20239082", "total_build_time_in_microsec": "52415712", "index_size_in_bytes": "1296690416", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.log new file mode 100644 index 0000000..596b87d --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.log @@ -0,0 +1,271 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash +2026-03-12 19:12:33: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.83369 [sec] (2.46586 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.03437 [sec] (0.734081 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.28882 [sec] (2.26925 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 16 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.14246 [sec] (2.21644 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339153889946824.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.71135 [sec] (1.70004 [ns/kmer]) +=== step 6 (merging minimizers tuples): 20.9253 [sec] (7.55068 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3097190/122838669 (2.52135%) +num_buckets_in_skew_index 28203/122838669 (0.0229594%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12724460/140756047 (9.04008%) +num_minimizer_positions_of_buckets_in_skew_index 8318311/140756047 (5.90974%) +=== step 7.1 (build sparse index): 1.64556 [sec] (0.593783 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 7: num kmers in buckets of size > 8192 and <= 147936: 20255122 +num kmers in skew index = 145458128 (5.2487%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 25196923 kmers; bits/key = 2.56001 + built positions[0] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21919654 kmers; bits/key = 2.60671 + built positions[1] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19634878 kmers; bits/key = 2.55594 + built positions[2] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 18051454 kmers; bits/key = 2.60116 + built positions[3] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17018125 kmers; bits/key = 2.58264 + built positions[4] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 14085569 kmers; bits/key = 2.54674 + built positions[5] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9296403 kmers; bits/key = 2.72206 + built positions[6] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 20255122 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 20255122 kmers; bits/key = 2.57505 + built positions[7] for 20255122 kmers; bits/key = 18 +=== step 7.2 (build skew index): 16.9429 [sec] (6.11366 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 18.8864 [sec] (6.81495 [ns/kmer]) +=== total time: 65.8224 [sec] (23.7513 [ns/kmer]) +total index size: 1647878160 [B] -- 1647.88 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.63218% + strings_offsets: 0.11255 [bits/kmer] -- 2.36601% + control_codewords: 1.46273 [bits/kmer] -- 30.7492% + mid_load_buckets: 0.146928 [bits/kmer] -- 3.08869% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.62633e-05% + strings: 2.11826 [bits/kmer] -- 44.5296% + skew_index: 0.791283 [bits/kmer] -- 16.6342% + weights: 5.31156e-07 [bits/kmer] -- 1.11659e-05% + -------------- + total: 4.75695 [bits/kmer] +2026-03-12 19:13:39: saving data structure to disk... +2026-03-12 19:13:40: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash +2026-03-12 19:26:25: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.2394 [sec] (4.74745 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.40224 [sec] (0.919557 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.bin' +=== step 3 (merging minimizer tuples): 4.76218 [sec] (3.12293 [ns/kmer]) +num_minimizers = 46563469 +num_minimizer_positions = 92022512 +num_super_kmers = 92022512 +building minimizers MPHF with 16 threads and 16 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.36199 [sec] (1.54895 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339985391022241.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.97423 [sec] (1.95044 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.4366 [sec] (8.81143 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 4388359 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 8820557/46563469 (18.9431%) +num_buckets_in_skew_index 53596/46563469 (0.115103%) +max_bucket_size 107228 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 46186993/92022512 (50.191%) +num_minimizer_positions_of_buckets_in_skew_index 8146203/92022512 (8.8524%) +=== step 7.1 (build sparse index): 2.26429 [sec] (1.48487 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 56688503 + partition = 1: num kmers in buckets of size > 128 and <= 256: 25980381 + partition = 2: num kmers in buckets of size > 256 and <= 512: 14889997 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6676604 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4044006 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3131389 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 4331001 + partition = 7: num kmers in buckets of size > 8192 and <= 107228: 9448602 +num kmers in skew index = 125190483 (8.20973%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 56688503 + building MPHF with 16 threads and 19 partitions (avg. partition size = 3000000)... + built mphs[0] for 56688503 kmers; bits/key = 2.55634 + built positions[0] for 56688503 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 25980381 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 25980381 kmers; bits/key = 2.52835 + built positions[1] for 25980381 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 14889997 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 14889997 kmers; bits/key = 2.54675 + built positions[2] for 14889997 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6676604 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6676604 kmers; bits/key = 2.7921 + built positions[3] for 6676604 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4044006 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4044006 kmers; bits/key = 2.96066 + built positions[4] for 4044006 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3131389 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3131389 kmers; bits/key = 3.15474 + built positions[5] for 3131389 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 4331001 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 4331001 kmers; bits/key = 2.89101 + built positions[6] for 4331001 kmers; bits/key = 13.0001 + lower = 8192; upper = 107228; num_bits_per_pos = 17; num_kmers_in_partition = 9448602 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 9448602 kmers; bits/key = 2.77564 + built positions[7] for 9448602 kmers; bits/key = 17 +=== step 7.2 (build skew index): 17.7204 [sec] (11.6207 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 80.9418% +buckets with 2 minimizer positions = 9.42447% +buckets with 3 minimizer positions = 3.22348% +buckets with 4 minimizer positions = 1.57084% +buckets with 5 minimizer positions = 0.930612% +buckets with 6 minimizer positions = 0.620257% +buckets with 7 minimizer positions = 0.44432% +buckets with 8 minimizer positions = 0.331925% +buckets with 9 minimizer positions = 0.259727% +buckets with 10 minimizer positions = 0.208507% +buckets with 11 minimizer positions = 0.173359% +buckets with 12 minimizer positions = 0.146426% +buckets with 13 minimizer positions = 0.124913% +buckets with 14 minimizer positions = 0.109693% +buckets with 15 minimizer positions = 0.0975658% +buckets with 16 minimizer positions = 0.0868986% +max_bucket_size = 107228 +=== step 7 (build sparse and skew index): 20.2391 [sec] (13.2724 [ns/kmer]) +=== total time: 52.4157 [sec] (34.3731 [ns/kmer]) +total index size: 1296690416 [B] -- 1296.69 [MB] +SPACE BREAKDOWN: + mphf: 0.0882742 [bits/kmer] (2.89089 [bits/key]) -- 1.29763% + strings_offsets: 0.274587 [bits/kmer] -- 4.03642% + control_codewords: 1.00767 [bits/kmer] -- 14.8127% + mid_load_buckets: 0.969231 [bits/kmer] -- 14.2477% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.0668e-05% + strings: 3.35283 [bits/kmer] -- 49.2864% + skew_index: 1.11015 [bits/kmer] -- 16.3192% + weights: 9.65307e-07 [bits/kmer] -- 1.419e-05% + -------------- + total: 6.80274 [bits/kmer] +2026-03-12 19:27:17: saving data structure to disk... +2026-03-12 19:27:18: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.time.log new file mode 100644 index 0000000..9d38be6 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m25/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m25.sshash" + User time (seconds): 146.07 + System time (seconds): 17.62 + Percent of CPU this job got: 245% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:06.74 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6210280 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6333945 + Voluntary context switches: 2576 + Involuntary context switches: 2438 + Swaps: 0 + File system inputs: 16 + File system outputs: 22410008 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m25.sshash" + User time (seconds): 110.43 + System time (seconds): 12.67 + Percent of CPU this job got: 231% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:53.15 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6732956 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4972830 + Voluntary context switches: 3874 + Involuntary context switches: 1834 + Swaps: 0 + File system inputs: 32 + File system outputs: 13889944 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.json new file mode 100644 index 0000000..e7c159a --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "636.590594", "negative lookup (avg_nanosec_per_kmer)": "504.943458", "access (avg_nanosec_per_kmer)": "359.692541", "iterator (avg_nanosec_per_kmer)": "2.760957"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "636.595011", "negative lookup (avg_nanosec_per_kmer)": "513.571637", "access (avg_nanosec_per_kmer)": "361.497723", "iterator (avg_nanosec_per_kmer)": "2.714145"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "636.252908", "negative lookup (avg_nanosec_per_kmer)": "509.475444", "access (avg_nanosec_per_kmer)": "358.384543", "iterator (avg_nanosec_per_kmer)": "2.921708"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1004.577381", "negative lookup (avg_nanosec_per_kmer)": "496.553727", "access (avg_nanosec_per_kmer)": "407.476253", "iterator (avg_nanosec_per_kmer)": "2.776453"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1013.442972", "negative lookup (avg_nanosec_per_kmer)": "498.025569", "access (avg_nanosec_per_kmer)": "408.269385", "iterator (avg_nanosec_per_kmer)": "2.760501"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash", "k": "63", "m": "27", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1005.876462", "negative lookup (avg_nanosec_per_kmer)": "498.778767", "access (avg_nanosec_per_kmer)": "407.571854", "iterator (avg_nanosec_per_kmer)": "2.763190"} diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.log new file mode 100644 index 0000000..3376991 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 636.591 +negative lookup (avg_nanosec_per_kmer) 504.943 +access (avg_nanosec_per_kmer) = 359.693 +iterator (avg_nanosec_per_kmer) = 2.76096 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 636.595 +negative lookup (avg_nanosec_per_kmer) 513.572 +access (avg_nanosec_per_kmer) = 361.498 +iterator (avg_nanosec_per_kmer) = 2.71414 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 636.253 +negative lookup (avg_nanosec_per_kmer) 509.475 +access (avg_nanosec_per_kmer) = 358.385 +iterator (avg_nanosec_per_kmer) = 2.92171 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1004.58 +negative lookup (avg_nanosec_per_kmer) 496.554 +access (avg_nanosec_per_kmer) = 407.476 +iterator (avg_nanosec_per_kmer) = 2.77645 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1013.44 +negative lookup (avg_nanosec_per_kmer) 498.026 +access (avg_nanosec_per_kmer) = 408.269 +iterator (avg_nanosec_per_kmer) = 2.7605 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1005.88 +negative lookup (avg_nanosec_per_kmer) 498.779 +access (avg_nanosec_per_kmer) = 407.572 +iterator (avg_nanosec_per_kmer) = 2.76319 diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.json new file mode 100644 index 0000000..1d387d1 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "27", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6842267", "step 2 (compute minimizer tuples)": "3581362", "step 3 (merging minimizer tuples)": "8680874", "step 4 (build mphf)": "7974211", "step 5 (replacing minimizer values with MPHF hashes)": "6484495", "step 6 (merging minimizers tuples)": "24096255", "step 7.1 (build sparse index)": "2258315", "step 7.2 (build skew index)": "16605917", "step 7 (build sparse and skew index)": "19268538", "total_build_time_in_microsec": "76928002", "index_size_in_bytes": "1814306466", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "27", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7237859", "step 2 (compute minimizer tuples)": "2448907", "step 3 (merging minimizer tuples)": "6087425", "step 4 (build mphf)": "3575069", "step 5 (replacing minimizer values with MPHF hashes)": "3647058", "step 6 (merging minimizers tuples)": "15794664", "step 7.1 (build sparse index)": "2863627", "step 7.2 (build skew index)": "30528762", "step 7 (build sparse and skew index)": "33712395", "total_build_time_in_microsec": "72503377", "index_size_in_bytes": "1494935154", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.log new file mode 100644 index 0000000..e2b56d0 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.log @@ -0,0 +1,272 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash --canonical +2026-03-12 19:17:40: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.84227 [sec] (2.46896 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.58136 [sec] (1.2923 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.68087 [sec] (3.1324 [ns/kmer]) +num_minimizers = 159034753 +num_minimizer_positions = 180722313 +num_super_kmers = 190227125 +building minimizers MPHF with 16 threads and 54 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.97421 [sec] (2.87741 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339460766065799.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.48449 [sec] (2.33986 [ns/kmer]) +=== step 6 (merging minimizers tuples): 24.0963 [sec] (8.69488 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2350234 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3990167/159034753 (2.50899%) +num_buckets_in_skew_index 33587/159034753 (0.0211193%) +max_bucket_size 64902 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 15996532/180722313 (8.85144%) +num_minimizer_positions_of_buckets_in_skew_index 9714782/180722313 (5.37553%) +=== step 7.1 (build sparse index): 2.25832 [sec] (0.814889 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 24234974 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21879045 + partition = 2: num kmers in buckets of size > 256 and <= 512: 18710168 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 16278323 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 14639382 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 13821171 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 9246514 + partition = 7: num kmers in buckets of size > 8192 and <= 64902: 20013479 +num kmers in skew index = 138823056 (5.00928%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 24234974 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 24234974 kmers; bits/key = 2.6451 + built positions[0] for 24234974 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21879045 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21879045 kmers; bits/key = 2.61078 + built positions[1] for 21879045 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 18710168 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 18710168 kmers; bits/key = 2.6617 + built positions[2] for 18710168 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 16278323 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 16278323 kmers; bits/key = 2.62845 + built positions[3] for 16278323 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 14639382 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 14639382 kmers; bits/key = 2.52468 + built positions[4] for 14639382 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 13821171 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 13821171 kmers; bits/key = 2.58749 + built positions[5] for 13821171 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9246514 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9246514 kmers; bits/key = 2.78088 + built positions[6] for 9246514 kmers; bits/key = 13 + lower = 8192; upper = 64902; num_bits_per_pos = 16; num_kmers_in_partition = 20013479 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 20013479 kmers; bits/key = 2.53689 + built positions[7] for 20013479 kmers; bits/key = 16 +=== step 7.2 (build skew index): 16.6059 [sec] (5.99207 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4699% +buckets with 2 minimizer positions = 1.47781% +buckets with 3 minimizer positions = 0.408901% +buckets with 4 minimizer positions = 0.182018% +buckets with 5 minimizer positions = 0.102671% +buckets with 6 minimizer positions = 0.0659944% +buckets with 7 minimizer positions = 0.0456208% +buckets with 8 minimizer positions = 0.0337461% +buckets with 9 minimizer positions = 0.0257994% +buckets with 10 minimizer positions = 0.0205263% +buckets with 11 minimizer positions = 0.0165322% +buckets with 12 minimizer positions = 0.0137982% +buckets with 13 minimizer positions = 0.0114013% +buckets with 14 minimizer positions = 0.00971737% +buckets with 15 minimizer positions = 0.00833277% +buckets with 16 minimizer positions = 0.00726885% +max_bucket_size = 64902 +=== step 7 (build sparse and skew index): 19.2685 [sec] (6.95285 [ns/kmer]) +=== total time: 76.928 [sec] (27.7587 [ns/kmer]) +total index size: 1814306466 [B] -- 1814.31 [MB] +SPACE BREAKDOWN: + mphf: 0.164214 [bits/kmer] (2.86157 [bits/key]) -- 3.13542% + strings_offsets: 0.11255 [bits/kmer] -- 2.14898% + control_codewords: 1.89374 [bits/kmer] -- 36.1581% + mid_load_buckets: 0.18471 [bits/kmer] -- 3.52676% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.47715e-05% + strings: 2.11826 [bits/kmer] -- 40.4449% + skew_index: 0.763917 [bits/kmer] -- 14.5858% + weights: 5.31156e-07 [bits/kmer] -- 1.01416e-05% + -------------- + total: 5.23739 [bits/kmer] +2026-03-12 19:18:57: saving data structure to disk... +2026-03-12 19:18:58: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash --canonical +2026-03-12 19:31:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23786 [sec] (4.74644 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.44891 [sec] (1.60594 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.08742 [sec] (3.992 [ns/kmer]) +num_minimizers = 56717844 +num_minimizer_positions = 114374599 +num_super_kmers = 119219483 +building minimizers MPHF with 16 threads and 19 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.57507 [sec] (2.34445 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340284403173787.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.64706 [sec] (2.39166 [ns/kmer]) +=== step 6 (merging minimizers tuples): 15.7947 [sec] (10.3578 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5527727 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10966624/56717844 (19.3354%) +num_buckets_in_skew_index 111039/56717844 (0.195774%) +max_bucket_size 111739 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 54133525/114374599 (47.33%) +num_minimizer_positions_of_buckets_in_skew_index 14600893/114374599 (12.7659%) +=== step 7.1 (build sparse index): 2.86363 [sec] (1.87791 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 111049177 + partition = 1: num kmers in buckets of size > 128 and <= 256: 38507569 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19600969 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 10295863 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4834323 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3669538 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3141576 + partition = 7: num kmers in buckets of size > 8192 and <= 111739: 10246664 +num kmers in skew index = 201345679 (13.2038%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 111049177 + building MPHF with 16 threads and 38 partitions (avg. partition size = 3000000)... + built mphs[0] for 111049177 kmers; bits/key = 2.53561 + built positions[0] for 111049177 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 38507569 + building MPHF with 16 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[1] for 38507569 kmers; bits/key = 2.56484 + built positions[1] for 38507569 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19600969 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19600969 kmers; bits/key = 2.55965 + built positions[2] for 19600969 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 10295863 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[3] for 10295863 kmers; bits/key = 2.66475 + built positions[3] for 10295863 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4834323 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4834323 kmers; bits/key = 2.72198 + built positions[4] for 4834323 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3669538 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3669538 kmers; bits/key = 2.9867 + built positions[5] for 3669538 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3141576 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3141576 kmers; bits/key = 3.14586 + built positions[6] for 3141576 kmers; bits/key = 13.0001 + lower = 8192; upper = 111739; num_bits_per_pos = 17; num_kmers_in_partition = 10246664 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10246664 kmers; bits/key = 2.67552 + built positions[7] for 10246664 kmers; bits/key = 17 +=== step 7.2 (build skew index): 30.5288 [sec] (20.0201 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 80.4688% +buckets with 2 minimizer positions = 9.74601% +buckets with 3 minimizer positions = 3.42667% +buckets with 4 minimizer positions = 1.64523% +buckets with 5 minimizer positions = 0.961667% +buckets with 6 minimizer positions = 0.629389% +buckets with 7 minimizer positions = 0.44635% +buckets with 8 minimizer positions = 0.331915% +buckets with 9 minimizer positions = 0.257037% +buckets with 10 minimizer positions = 0.20565% +buckets with 11 minimizer positions = 0.167291% +buckets with 12 minimizer positions = 0.139408% +buckets with 13 minimizer positions = 0.117751% +buckets with 14 minimizer positions = 0.1006% +buckets with 15 minimizer positions = 0.0879% +buckets with 16 minimizer positions = 0.077212% +max_bucket_size = 111739 +=== step 7 (build sparse and skew index): 33.7124 [sec] (22.1079 [ns/kmer]) +=== total time: 72.5034 [sec] (47.5462 [ns/kmer]) +total index size: 1494935154 [B] -- 1494.94 [MB] +SPACE BREAKDOWN: + mphf: 0.105214 [bits/kmer] (2.82877 [bits/key]) -- 1.34154% + strings_offsets: 0.274587 [bits/kmer] -- 3.50115% + control_codewords: 1.22741 [bits/kmer] -- 15.6503% + mid_load_buckets: 1.13599 [bits/kmer] -- 14.4845% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.79272e-05% + strings: 3.35283 [bits/kmer] -- 42.7505% + skew_index: 1.74674 [bits/kmer] -- 22.272% + weights: 9.65307e-07 [bits/kmer] -- 1.23082e-05% + -------------- + total: 7.84278 [bits/kmer] +2026-03-12 19:32:36: saving data structure to disk... +2026-03-12 19:32:37: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.time.log new file mode 100644 index 0000000..cd4af97 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.canon.sshash --canonical" + User time (seconds): 175.70 + System time (seconds): 21.78 + Percent of CPU this job got: 253% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:17.93 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7764784 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7543564 + Voluntary context switches: 2275 + Involuntary context switches: 2851 + Swaps: 0 + File system inputs: 48 + File system outputs: 29241936 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.canon.sshash --canonical" + User time (seconds): 173.16 + System time (seconds): 19.08 + Percent of CPU this job got: 262% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:13.36 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 10289236 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7370891 + Voluntary context switches: 2277 + Involuntary context switches: 2616 + Swaps: 0 + File system inputs: 32 + File system outputs: 17501168 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.json new file mode 100644 index 0000000..97ff9cd --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "819.585861", "negative lookup (avg_nanosec_per_kmer)": "863.936260", "access (avg_nanosec_per_kmer)": "357.540876", "iterator (avg_nanosec_per_kmer)": "2.749546"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "810.536836", "negative lookup (avg_nanosec_per_kmer)": "867.915970", "access (avg_nanosec_per_kmer)": "358.960531", "iterator (avg_nanosec_per_kmer)": "2.716229"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "815.201959", "negative lookup (avg_nanosec_per_kmer)": "865.209726", "access (avg_nanosec_per_kmer)": "356.640541", "iterator (avg_nanosec_per_kmer)": "2.713262"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1166.155498", "negative lookup (avg_nanosec_per_kmer)": "842.427529", "access (avg_nanosec_per_kmer)": "402.201651", "iterator (avg_nanosec_per_kmer)": "2.752703"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1180.073869", "negative lookup (avg_nanosec_per_kmer)": "842.002626", "access (avg_nanosec_per_kmer)": "404.982152", "iterator (avg_nanosec_per_kmer)": "2.865233"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash", "k": "63", "m": "27", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1167.555926", "negative lookup (avg_nanosec_per_kmer)": "836.456681", "access (avg_nanosec_per_kmer)": "406.153139", "iterator (avg_nanosec_per_kmer)": "2.759708"} diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.log new file mode 100644 index 0000000..320036c --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 819.586 +negative lookup (avg_nanosec_per_kmer) 863.936 +access (avg_nanosec_per_kmer) = 357.541 +iterator (avg_nanosec_per_kmer) = 2.74955 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 810.537 +negative lookup (avg_nanosec_per_kmer) 867.916 +access (avg_nanosec_per_kmer) = 358.961 +iterator (avg_nanosec_per_kmer) = 2.71623 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 815.202 +negative lookup (avg_nanosec_per_kmer) 865.21 +access (avg_nanosec_per_kmer) = 356.641 +iterator (avg_nanosec_per_kmer) = 2.71326 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 1166.16 +negative lookup (avg_nanosec_per_kmer) 842.428 +access (avg_nanosec_per_kmer) = 402.202 +iterator (avg_nanosec_per_kmer) = 2.7527 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 1180.07 +negative lookup (avg_nanosec_per_kmer) 842.003 +access (avg_nanosec_per_kmer) = 404.982 +iterator (avg_nanosec_per_kmer) = 2.86523 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash +positive lookup (avg_nanosec_per_kmer) = 1167.56 +negative lookup (avg_nanosec_per_kmer) 836.457 +access (avg_nanosec_per_kmer) = 406.153 +iterator (avg_nanosec_per_kmer) = 2.75971 diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.json new file mode 100644 index 0000000..c386608 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "27", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6811102", "step 2 (compute minimizer tuples)": "2089202", "step 3 (merging minimizer tuples)": "6579954", "step 4 (build mphf)": "6303376", "step 5 (replacing minimizer values with MPHF hashes)": "4937364", "step 6 (merging minimizers tuples)": "20091493", "step 7.1 (build sparse index)": "1666417", "step 7.2 (build skew index)": "14037110", "step 7 (build sparse and skew index)": "16009861", "total_build_time_in_microsec": "62822352", "index_size_in_bytes": "1624766418", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "27", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7252488", "step 2 (compute minimizer tuples)": "1442412", "step 3 (merging minimizer tuples)": "4980157", "step 4 (build mphf)": "3137442", "step 5 (replacing minimizer values with MPHF hashes)": "2988418", "step 6 (merging minimizers tuples)": "13444235", "step 7.1 (build sparse index)": "2298173", "step 7.2 (build skew index)": "13149434", "step 7 (build sparse and skew index)": "15706085", "total_build_time_in_microsec": "48951237", "index_size_in_bytes": "1281740678", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.log new file mode 100644 index 0000000..a18fff4 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.log @@ -0,0 +1,271 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash +2026-03-12 19:16:00: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.8111 [sec] (2.45771 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.0892 [sec] (0.753866 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.57995 [sec] (2.37431 [ns/kmer]) +num_minimizers = 130071300 +num_minimizer_positions = 146543146 +num_super_kmers = 146543146 +building minimizers MPHF with 16 threads and 44 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.30338 [sec] (2.27451 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339360962878124.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.93736 [sec] (1.7816 [ns/kmer]) +=== step 6 (merging minimizers tuples): 20.0915 [sec] (7.2498 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1762565 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3011883/130071300 (2.31556%) +num_buckets_in_skew_index 26030/130071300 (0.0200121%) +max_bucket_size 42360 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12299259/146543146 (8.39293%) +num_minimizer_positions_of_buckets_in_skew_index 7210500/146543146 (4.92039%) +=== step 7.1 (build sparse index): 1.66642 [sec] (0.601309 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 22313814 + partition = 1: num kmers in buckets of size > 128 and <= 256: 19460787 + partition = 2: num kmers in buckets of size > 256 and <= 512: 16944265 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 15047921 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 14260069 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 10568543 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 6525938 + partition = 7: num kmers in buckets of size > 8192 and <= 42360: 14580520 +num kmers in skew index = 119701857 (4.31931%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 22313814 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 22313814 kmers; bits/key = 2.56802 + built positions[0] for 22313814 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 19460787 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19460787 kmers; bits/key = 2.57509 + built positions[1] for 19460787 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 16944265 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 16944265 kmers; bits/key = 2.5415 + built positions[2] for 16944265 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 15047921 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[3] for 15047921 kmers; bits/key = 2.63844 + built positions[3] for 15047921 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 14260069 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 14260069 kmers; bits/key = 2.55071 + built positions[4] for 14260069 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 10568543 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[5] for 10568543 kmers; bits/key = 2.68784 + built positions[5] for 10568543 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 6525938 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[6] for 6525938 kmers; bits/key = 2.84696 + built positions[6] for 6525938 kmers; bits/key = 13.0001 + lower = 8192; upper = 42360; num_bits_per_pos = 16; num_kmers_in_partition = 14580520 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[7] for 14580520 kmers; bits/key = 2.56259 + built positions[7] for 14580520 kmers; bits/key = 16 +=== step 7.2 (build skew index): 14.0371 [sec] (5.06514 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.6644% +buckets with 2 minimizer positions = 1.35508% +buckets with 3 minimizer positions = 0.370926% +buckets with 4 minimizer positions = 0.168651% +buckets with 5 minimizer positions = 0.0961819% +buckets with 6 minimizer positions = 0.0623197% +buckets with 7 minimizer positions = 0.0436453% +buckets with 8 minimizer positions = 0.0320417% +buckets with 9 minimizer positions = 0.0248602% +buckets with 10 minimizer positions = 0.0197899% +buckets with 11 minimizer positions = 0.0160981% +buckets with 12 minimizer positions = 0.0132581% +buckets with 13 minimizer positions = 0.0111969% +buckets with 14 minimizer positions = 0.0096155% +buckets with 15 minimizer positions = 0.008277% +buckets with 16 minimizer positions = 0.00717914% +max_bucket_size = 42360 +=== step 7 (build sparse and skew index): 16.0099 [sec] (5.77699 [ns/kmer]) +=== total time: 62.8224 [sec] (22.6688 [ns/kmer]) +total index size: 1624766418 [B] -- 1624.77 [MB] +SPACE BREAKDOWN: + mphf: 0.134094 [bits/kmer] (2.85703 [bits/key]) -- 2.85901% + strings_offsets: 0.11255 [bits/kmer] -- 2.39967% + control_codewords: 1.54885 [bits/kmer] -- 33.0228% + mid_load_buckets: 0.142018 [bits/kmer] -- 3.02795% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.64947e-05% + strings: 2.11826 [bits/kmer] -- 45.1631% + skew_index: 0.634469 [bits/kmer] -- 13.5274% + weights: 5.31156e-07 [bits/kmer] -- 1.13247e-05% + -------------- + total: 4.69024 [bits/kmer] +2026-03-12 19:17:03: saving data structure to disk... +2026-03-12 19:17:04: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash +2026-03-12 19:29:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.25249 [sec] (4.75603 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.44241 [sec] (0.945903 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.bin' +=== step 3 (merging minimizer tuples): 4.98016 [sec] (3.26588 [ns/kmer]) +num_minimizers = 50673845 +num_minimizer_positions = 95153889 +num_super_kmers = 95153889 +building minimizers MPHF with 16 threads and 17 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.13744 [sec] (2.05747 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340193283358553.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.98842 [sec] (1.95974 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.4442 [sec] (8.81645 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 4628284 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9138560/50673845 (18.0341%) +num_buckets_in_skew_index 46863/50673845 (0.0924797%) +max_bucket_size 77852 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 46457486/95153889 (48.8235%) +num_minimizer_positions_of_buckets_in_skew_index 7207981/95153889 (7.57508%) +=== step 7.1 (build sparse index): 2.29817 [sec] (1.50709 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 46607828 + partition = 1: num kmers in buckets of size > 128 and <= 256: 22825153 + partition = 2: num kmers in buckets of size > 256 and <= 512: 12442877 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 5369213 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4198881 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3548421 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3185865 + partition = 7: num kmers in buckets of size > 8192 and <= 77852: 6375414 +num kmers in skew index = 104553652 (6.85641%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 46607828 + building MPHF with 16 threads and 16 partitions (avg. partition size = 3000000)... + built mphs[0] for 46607828 kmers; bits/key = 2.55934 + built positions[0] for 46607828 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 22825153 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 22825153 kmers; bits/key = 2.51981 + built positions[1] for 22825153 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 12442877 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 12442877 kmers; bits/key = 2.69023 + built positions[2] for 12442877 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 5369213 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 5369213 kmers; bits/key = 2.65194 + built positions[3] for 5369213 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4198881 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4198881 kmers; bits/key = 2.86678 + built positions[4] for 4198881 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3548421 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3548421 kmers; bits/key = 3.07443 + built positions[5] for 3548421 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3185865 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3185865 kmers; bits/key = 3.37696 + built positions[6] for 3185865 kmers; bits/key = 13.0001 + lower = 8192; upper = 77852; num_bits_per_pos = 17; num_kmers_in_partition = 6375414 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 6375414 kmers; bits/key = 2.83713 + built positions[7] for 6375414 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 13.1494 [sec] (8.62312 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 81.8734% +buckets with 2 minimizer positions = 9.13348% +buckets with 3 minimizer positions = 3.06003% +buckets with 4 minimizer positions = 1.48128% +buckets with 5 minimizer positions = 0.872813% +buckets with 6 minimizer positions = 0.575293% +buckets with 7 minimizer positions = 0.409154% +buckets with 8 minimizer positions = 0.309063% +buckets with 9 minimizer positions = 0.239915% +buckets with 10 minimizer positions = 0.192214% +buckets with 11 minimizer positions = 0.158833% +buckets with 12 minimizer positions = 0.135916% +buckets with 13 minimizer positions = 0.116608% +buckets with 14 minimizer positions = 0.102509% +buckets with 15 minimizer positions = 0.0901057% +buckets with 16 minimizer positions = 0.0810477% +max_bucket_size = 77852 +=== step 7 (build sparse and skew index): 15.7061 [sec] (10.2997 [ns/kmer]) +=== total time: 48.9512 [sec] (32.1012 [ns/kmer]) +total index size: 1281740678 [B] -- 1281.74 [MB] +SPACE BREAKDOWN: + mphf: 0.0941185 [bits/kmer] (2.83226 [bits/key]) -- 1.39968% + strings_offsets: 0.274587 [bits/kmer] -- 4.0835% + control_codewords: 1.09662 [bits/kmer] -- 16.3083% + mid_load_buckets: 0.974907 [bits/kmer] -- 14.4983% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.09091e-05% + strings: 3.35283 [bits/kmer] -- 49.8613% + skew_index: 0.93125 [bits/kmer] -- 13.849% + weights: 9.65307e-07 [bits/kmer] -- 1.43555e-05% + -------------- + total: 6.72431 [bits/kmer] +2026-03-12 19:30:42: saving data structure to disk... +2026-03-12 19:30:42: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.time.log new file mode 100644 index 0000000..2b4ab60 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m27/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m27.sshash" + User time (seconds): 135.00 + System time (seconds): 17.50 + Percent of CPU this job got: 239% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:03.72 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6513044 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6113557 + Voluntary context switches: 2554 + Involuntary context switches: 2414 + Swaps: 0 + File system inputs: 40 + File system outputs: 23236624 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 27 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m27.sshash" + User time (seconds): 95.82 + System time (seconds): 12.41 + Percent of CPU this job got: 217% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:49.66 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5659716 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4700765 + Voluntary context switches: 1668 + Involuntary context switches: 2269 + Swaps: 0 + File system inputs: 24 + File system outputs: 14332920 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.json new file mode 100644 index 0000000..b06ee52 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "628.231507", "negative lookup (avg_nanosec_per_kmer)": "506.184447", "access (avg_nanosec_per_kmer)": "361.937360", "iterator (avg_nanosec_per_kmer)": "2.723076"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "630.768260", "negative lookup (avg_nanosec_per_kmer)": "509.070160", "access (avg_nanosec_per_kmer)": "358.719726", "iterator (avg_nanosec_per_kmer)": "2.800912"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "634.154590", "negative lookup (avg_nanosec_per_kmer)": "520.136916", "access (avg_nanosec_per_kmer)": "361.949167", "iterator (avg_nanosec_per_kmer)": "2.723394"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "989.333571", "negative lookup (avg_nanosec_per_kmer)": "502.073278", "access (avg_nanosec_per_kmer)": "417.188820", "iterator (avg_nanosec_per_kmer)": "2.768935"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "988.978203", "negative lookup (avg_nanosec_per_kmer)": "498.173164", "access (avg_nanosec_per_kmer)": "405.278343", "iterator (avg_nanosec_per_kmer)": "2.768424"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash", "k": "63", "m": "29", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "992.555967", "negative lookup (avg_nanosec_per_kmer)": "500.488115", "access (avg_nanosec_per_kmer)": "405.771969", "iterator (avg_nanosec_per_kmer)": "2.819722"} diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.log new file mode 100644 index 0000000..0fdf0d4 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 628.232 +negative lookup (avg_nanosec_per_kmer) 506.184 +access (avg_nanosec_per_kmer) = 361.937 +iterator (avg_nanosec_per_kmer) = 2.72308 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 630.768 +negative lookup (avg_nanosec_per_kmer) 509.07 +access (avg_nanosec_per_kmer) = 358.72 +iterator (avg_nanosec_per_kmer) = 2.80091 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 634.155 +negative lookup (avg_nanosec_per_kmer) 520.137 +access (avg_nanosec_per_kmer) = 361.949 +iterator (avg_nanosec_per_kmer) = 2.72339 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 989.334 +negative lookup (avg_nanosec_per_kmer) 502.073 +access (avg_nanosec_per_kmer) = 417.189 +iterator (avg_nanosec_per_kmer) = 2.76893 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 988.978 +negative lookup (avg_nanosec_per_kmer) 498.173 +access (avg_nanosec_per_kmer) = 405.278 +iterator (avg_nanosec_per_kmer) = 2.76842 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 992.556 +negative lookup (avg_nanosec_per_kmer) 500.488 +access (avg_nanosec_per_kmer) = 405.772 +iterator (avg_nanosec_per_kmer) = 2.81972 diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.json new file mode 100644 index 0000000..4aa2c54 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "29", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6805446", "step 2 (compute minimizer tuples)": "3638966", "step 3 (merging minimizer tuples)": "9503434", "step 4 (build mphf)": "8439935", "step 5 (replacing minimizer values with MPHF hashes)": "6886594", "step 6 (merging minimizers tuples)": "23910860", "step 7.1 (build sparse index)": "2344280", "step 7.2 (build skew index)": "13579338", "step 7 (build sparse and skew index)": "16371976", "total_build_time_in_microsec": "75557211", "index_size_in_bytes": "1817287930", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "29", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7237222", "step 2 (compute minimizer tuples)": "2503519", "step 3 (merging minimizer tuples)": "6469449", "step 4 (build mphf)": "3600416", "step 5 (replacing minimizer values with MPHF hashes)": "3875257", "step 6 (merging minimizers tuples)": "19304107", "step 7.1 (build sparse index)": "2953066", "step 7.2 (build skew index)": "26318643", "step 7 (build sparse and skew index)": "29605774", "total_build_time_in_microsec": "72595744", "index_size_in_bytes": "1476857010", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.log new file mode 100644 index 0000000..a508a46 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.log @@ -0,0 +1,273 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash --canonical +2026-03-12 19:21:12: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.80545 [sec] (2.45567 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 3.63897 [sec] (1.31308 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.50343 [sec] (3.42921 [ns/kmer]) +num_minimizers = 170479160 +num_minimizer_positions = 190764803 +num_super_kmers = 200762252 +building minimizers MPHF with 16 threads and 57 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 8.43994 [sec] (3.04546 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339672850428238.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.88659 [sec] (2.48495 [ns/kmer]) +=== step 6 (merging minimizers tuples): 23.9109 [sec] (8.62798 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2344986 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3959466/170479160 (2.32255%) +num_buckets_in_skew_index 31659/170479160 (0.0185706%) +max_bucket_size 65778 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 15756956/190764803 (8.25989%) +num_minimizer_positions_of_buckets_in_skew_index 8519812/190764803 (4.46613%) +=== step 7.1 (build sparse index): 2.34428 [sec] (0.845909 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 21569370 + partition = 1: num kmers in buckets of size > 128 and <= 256: 19283532 + partition = 2: num kmers in buckets of size > 256 and <= 512: 16271450 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 14300071 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 12852843 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 10991851 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 6352871 + partition = 7: num kmers in buckets of size > 8192 and <= 65778: 12834373 +num kmers in skew index = 114456361 (4.13004%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 21569370 + building MPHF with 16 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[0] for 21569370 kmers; bits/key = 2.6423 + built positions[0] for 21569370 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 19283532 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19283532 kmers; bits/key = 2.59492 + built positions[1] for 19283532 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 16271450 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 16271450 kmers; bits/key = 2.62938 + built positions[2] for 16271450 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 14300071 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 14300071 kmers; bits/key = 2.54475 + built positions[3] for 14300071 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 12852843 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[4] for 12852843 kmers; bits/key = 2.61769 + built positions[4] for 12852843 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 10991851 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[5] for 10991851 kmers; bits/key = 2.60036 + built positions[5] for 10991851 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 6352871 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[6] for 6352871 kmers; bits/key = 2.84574 + built positions[6] for 6352871 kmers; bits/key = 13.0001 + lower = 8192; upper = 65778; num_bits_per_pos = 17; num_kmers_in_partition = 12834373 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[7] for 12834373 kmers; bits/key = 2.62085 + built positions[7] for 12834373 kmers; bits/key = 17 +=== step 7.2 (build skew index): 13.5793 [sec] (4.89996 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.6589% +buckets with 2 minimizer positions = 1.37553% +buckets with 3 minimizer positions = 0.377476% +buckets with 4 minimizer positions = 0.166942% +buckets with 5 minimizer positions = 0.0942543% +buckets with 6 minimizer positions = 0.0601235% +buckets with 7 minimizer positions = 0.0422374% +buckets with 8 minimizer positions = 0.03087% +buckets with 9 minimizer positions = 0.0236962% +buckets with 10 minimizer positions = 0.018651% +buckets with 11 minimizer positions = 0.0151561% +buckets with 12 minimizer positions = 0.0124696% +buckets with 13 minimizer positions = 0.0104271% +buckets with 14 minimizer positions = 0.00895828% +buckets with 15 minimizer positions = 0.00765548% +buckets with 16 minimizer positions = 0.00669701% +max_bucket_size = 65778 +=== step 7 (build sparse and skew index): 16.372 [sec] (5.90765 [ns/kmer]) +=== total time: 75.5572 [sec] (27.264 [ns/kmer]) +total index size: 1817287930 [B] -- 1817.29 [MB] +SPACE BREAKDOWN: + mphf: 0.174812 [bits/kmer] (2.84175 [bits/key]) -- 3.33229% + strings_offsets: 0.11255 [bits/kmer] -- 2.14545% + control_codewords: 2.03001 [bits/kmer] -- 38.6965% + mid_load_buckets: 0.181943 [bits/kmer] -- 3.46824% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.47473e-05% + strings: 2.11826 [bits/kmer] -- 40.3785% + skew_index: 0.628416 [bits/kmer] -- 11.979% + weights: 5.31156e-07 [bits/kmer] -- 1.0125e-05% + -------------- + total: 5.24599 [bits/kmer] +2026-03-12 19:22:28: saving data structure to disk... +2026-03-12 19:22:29: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash --canonical +2026-03-12 19:34:43: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23722 [sec] (4.74602 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.50352 [sec] (1.64175 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.46945 [sec] (4.24253 [ns/kmer]) +num_minimizers = 62681379 +num_minimizer_positions = 119779058 +num_super_kmers = 124884655 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.60042 [sec] (2.36108 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340483966220817.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.87526 [sec] (2.54131 [ns/kmer]) +=== step 6 (merging minimizers tuples): 19.3041 [sec] (12.6592 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5936004 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 11559644/62681379 (18.4419%) +num_buckets_in_skew_index 98917/62681379 (0.157809%) +max_bucket_size 104378 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 55820644/119779058 (46.603%) +num_minimizer_positions_of_buckets_in_skew_index 12935596/119779058 (10.7995%) +=== step 7.1 (build sparse index): 2.95307 [sec] (1.93656 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 93482236 + partition = 1: num kmers in buckets of size > 128 and <= 256: 32219549 + partition = 2: num kmers in buckets of size > 256 and <= 512: 16799373 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 8679646 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4344549 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3106813 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2772978 + partition = 7: num kmers in buckets of size > 8192 and <= 104378: 7803155 +num kmers in skew index = 169208299 (11.0963%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 93482236 + building MPHF with 16 threads and 32 partitions (avg. partition size = 3000000)... + built mphs[0] for 93482236 kmers; bits/key = 2.52573 + built positions[0] for 93482236 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 32219549 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[1] for 32219549 kmers; bits/key = 2.54511 + built positions[1] for 32219549 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 16799373 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[2] for 16799373 kmers; bits/key = 2.55983 + built positions[2] for 16799373 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 8679646 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8679646 kmers; bits/key = 2.58946 + built positions[3] for 8679646 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4344549 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4344549 kmers; bits/key = 2.78463 + built positions[4] for 4344549 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3106813 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3106813 kmers; bits/key = 3.17642 + built positions[5] for 3106813 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2772978 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2772978)... + built mphs[6] for 2772978 kmers; bits/key = 2.55984 + built positions[6] for 2772978 kmers; bits/key = 13.0001 + lower = 8192; upper = 104378; num_bits_per_pos = 17; num_kmers_in_partition = 7803155 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 7803155 kmers; bits/key = 2.66882 + built positions[7] for 7803155 kmers; bits/key = 17 +=== step 7.2 (build skew index): 26.3186 [sec] (17.2592 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 81.4003% +buckets with 2 minimizer positions = 9.47012% +buckets with 3 minimizer positions = 3.26104% +buckets with 4 minimizer positions = 1.54756% +buckets with 5 minimizer positions = 0.894521% +buckets with 6 minimizer positions = 0.584387% +buckets with 7 minimizer positions = 0.410934% +buckets with 8 minimizer positions = 0.305931% +buckets with 9 minimizer positions = 0.236914% +buckets with 10 minimizer positions = 0.187659% +buckets with 11 minimizer positions = 0.153783% +buckets with 12 minimizer positions = 0.127216% +buckets with 13 minimizer positions = 0.108761% +buckets with 14 minimizer positions = 0.0919523% +buckets with 15 minimizer positions = 0.0801163% +buckets with 16 minimizer positions = 0.0705999% +max_bucket_size = 104378 +=== step 7 (build sparse and skew index): 29.6058 [sec] (19.4148 [ns/kmer]) +=== total time: 72.5957 [sec] (47.6068 [ns/kmer]) +total index size: 1476857010 [B] -- 1476.86 [MB] +SPACE BREAKDOWN: + mphf: 0.116287 [bits/kmer] (2.82902 [bits/key]) -- 1.50088% + strings_offsets: 0.274587 [bits/kmer] -- 3.544% + control_codewords: 1.35647 [bits/kmer] -- 17.5075% + mid_load_buckets: 1.17139 [bits/kmer] -- 15.1188% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.81466e-05% + strings: 3.35283 [bits/kmer] -- 43.2738% + skew_index: 1.47637 [bits/kmer] -- 19.055% + weights: 9.65307e-07 [bits/kmer] -- 1.24589e-05% + -------------- + total: 7.74793 [bits/kmer] +2026-03-12 19:35:56: saving data structure to disk... +2026-03-12 19:35:57: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.time.log new file mode 100644 index 0000000..32085e9 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/canon-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.canon.sshash --canonical" + User time (seconds): 167.45 + System time (seconds): 22.71 + Percent of CPU this job got: 248% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:16.56 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8211520 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7617127 + Voluntary context switches: 2622 + Involuntary context switches: 2608 + Swaps: 0 + File system inputs: 56 + File system outputs: 30756008 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.canon.sshash --canonical" + User time (seconds): 158.63 + System time (seconds): 19.43 + Percent of CPU this job got: 242% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:13.42 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9164168 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6909716 + Voluntary context switches: 1936 + Involuntary context switches: 2570 + Swaps: 0 + File system inputs: 32 + File system outputs: 18274456 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.json new file mode 100644 index 0000000..0c19a08 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "806.838857", "negative lookup (avg_nanosec_per_kmer)": "863.651948", "access (avg_nanosec_per_kmer)": "360.448245", "iterator (avg_nanosec_per_kmer)": "2.727123"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "817.208459", "negative lookup (avg_nanosec_per_kmer)": "871.676759", "access (avg_nanosec_per_kmer)": "359.005950", "iterator (avg_nanosec_per_kmer)": "2.718154"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "804.554434", "negative lookup (avg_nanosec_per_kmer)": "861.740223", "access (avg_nanosec_per_kmer)": "357.580548", "iterator (avg_nanosec_per_kmer)": "2.727518"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1141.851774", "negative lookup (avg_nanosec_per_kmer)": "843.022727", "access (avg_nanosec_per_kmer)": "406.431439", "iterator (avg_nanosec_per_kmer)": "2.756901"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1128.143842", "negative lookup (avg_nanosec_per_kmer)": "836.246510", "access (avg_nanosec_per_kmer)": "406.933071", "iterator (avg_nanosec_per_kmer)": "2.763445"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash", "k": "63", "m": "29", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1131.916917", "negative lookup (avg_nanosec_per_kmer)": "839.925496", "access (avg_nanosec_per_kmer)": "412.851045", "iterator (avg_nanosec_per_kmer)": "2.754495"} diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.log new file mode 100644 index 0000000..88a6d74 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-bench.log @@ -0,0 +1,30 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 806.839 +negative lookup (avg_nanosec_per_kmer) 863.652 +access (avg_nanosec_per_kmer) = 360.448 +iterator (avg_nanosec_per_kmer) = 2.72712 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 817.208 +negative lookup (avg_nanosec_per_kmer) 871.677 +access (avg_nanosec_per_kmer) = 359.006 +iterator (avg_nanosec_per_kmer) = 2.71815 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 804.554 +negative lookup (avg_nanosec_per_kmer) 861.74 +access (avg_nanosec_per_kmer) = 357.581 +iterator (avg_nanosec_per_kmer) = 2.72752 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 1141.85 +negative lookup (avg_nanosec_per_kmer) 843.023 +access (avg_nanosec_per_kmer) = 406.431 +iterator (avg_nanosec_per_kmer) = 2.7569 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 1128.14 +negative lookup (avg_nanosec_per_kmer) 836.247 +access (avg_nanosec_per_kmer) = 406.933 +iterator (avg_nanosec_per_kmer) = 2.76344 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash +positive lookup (avg_nanosec_per_kmer) = 1131.92 +negative lookup (avg_nanosec_per_kmer) 839.925 +access (avg_nanosec_per_kmer) = 412.851 +iterator (avg_nanosec_per_kmer) = 2.7545 diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.json new file mode 100644 index 0000000..fe073ac --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.json @@ -0,0 +1,2 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "29", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "6810790", "step 2 (compute minimizer tuples)": "2129831", "step 3 (merging minimizer tuples)": "6922940", "step 4 (build mphf)": "6679091", "step 5 (replacing minimizer values with MPHF hashes)": "5227773", "step 6 (merging minimizers tuples)": "25048678", "step 7.1 (build sparse index)": "1733422", "step 7.2 (build skew index)": "13917183", "step 7 (build sparse and skew index)": "15973466", "total_build_time_in_microsec": "68792569", "index_size_in_bytes": "1624015518", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "29", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7269970", "step 2 (compute minimizer tuples)": "1460349", "step 3 (merging minimizer tuples)": "5270029", "step 4 (build mphf)": "3257661", "step 5 (replacing minimizer values with MPHF hashes)": "3112852", "step 6 (merging minimizers tuples)": "14474739", "step 7.1 (build sparse index)": "2320989", "step 7.2 (build skew index)": "12887880", "step 7 (build sparse and skew index)": "15473940", "total_build_time_in_microsec": "50319540", "index_size_in_bytes": "1279822350", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.log new file mode 100644 index 0000000..7607dba --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.log @@ -0,0 +1,271 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash +2026-03-12 19:19:27: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.81079 [sec] (2.4576 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.12983 [sec] (0.768527 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.92294 [sec] (2.49807 [ns/kmer]) +num_minimizers = 139436384 +num_minimizer_positions = 154804094 +num_super_kmers = 154804094 +building minimizers MPHF with 16 threads and 47 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.67909 [sec] (2.41008 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773339567082556925.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.22777 [sec] (1.88639 [ns/kmer]) +=== step 6 (merging minimizers tuples): 25.0487 [sec] (9.03855 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1753708 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 2979820/139436384 (2.13705%) +num_buckets_in_skew_index 24417/139436384 (0.0175112%) +max_bucket_size 40884 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12054119/154804094 (7.78669%) +num_minimizer_positions_of_buckets_in_skew_index 6317828/154804094 (4.08118%) +=== step 7.1 (build sparse index): 1.73342 [sec] (0.625487 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 19739209 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16833808 + partition = 2: num kmers in buckets of size > 256 and <= 512: 14958516 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 13227454 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 11942095 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 7689642 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 5564840 + partition = 7: num kmers in buckets of size > 8192 and <= 40884: 8383238 +num kmers in skew index = 98338802 (3.54845%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 19739209 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[0] for 19739209 kmers; bits/key = 2.56634 + built positions[0] for 19739209 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16833808 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16833808 kmers; bits/key = 2.55545 + built positions[1] for 16833808 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 14958516 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 14958516 kmers; bits/key = 2.56564 + built positions[2] for 14958516 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 13227454 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 13227454 kmers; bits/key = 2.62014 + built positions[3] for 13227454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 11942095 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[4] for 11942095 kmers; bits/key = 2.606 + built positions[4] for 11942095 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 7689642 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[5] for 7689642 kmers; bits/key = 2.59056 + built positions[5] for 7689642 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 5564840 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 5564840 kmers; bits/key = 2.57334 + built positions[6] for 5564840 kmers; bits/key = 13.0001 + lower = 8192; upper = 40884; num_bits_per_pos = 16; num_kmers_in_partition = 8383238 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 8383238 kmers; bits/key = 2.56406 + built positions[7] for 8383238 kmers; bits/key = 16 +=== step 7.2 (build skew index): 13.9172 [sec] (5.02187 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.8454% +buckets with 2 minimizer positions = 1.25771% +buckets with 3 minimizer positions = 0.340993% +buckets with 4 minimizer positions = 0.154501% +buckets with 5 minimizer positions = 0.0881843% +buckets with 6 minimizer positions = 0.0573272% +buckets with 7 minimizer positions = 0.0397421% +buckets with 8 minimizer positions = 0.0296451% +buckets with 9 minimizer positions = 0.0227315% +buckets with 10 minimizer positions = 0.0182047% +buckets with 11 minimizer positions = 0.0145177% +buckets with 12 minimizer positions = 0.0119173% +buckets with 13 minimizer positions = 0.0101602% +buckets with 14 minimizer positions = 0.00879397% +buckets with 15 minimizer positions = 0.00746362% +buckets with 16 minimizer positions = 0.00635702% +max_bucket_size = 40884 +=== step 7 (build sparse and skew index): 15.9735 [sec] (5.76386 [ns/kmer]) +=== total time: 68.7926 [sec] (24.8231 [ns/kmer]) +total index size: 1624015518 [B] -- 1624.02 [MB] +SPACE BREAKDOWN: + mphf: 0.143296 [bits/kmer] (2.84803 [bits/key]) -- 3.05661% + strings_offsets: 0.11255 [bits/kmer] -- 2.40078% + control_codewords: 1.66037 [bits/kmer] -- 35.4168% + mid_load_buckets: 0.139187 [bits/kmer] -- 2.96897% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.65023e-05% + strings: 2.11826 [bits/kmer] -- 45.1839% + skew_index: 0.514413 [bits/kmer] -- 10.9728% + weights: 5.31156e-07 [bits/kmer] -- 1.13299e-05% + -------------- + total: 4.68807 [bits/kmer] +2026-03-12 19:20:35: saving data structure to disk... +2026-03-12 19:20:36: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash +2026-03-12 19:33:12: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26997 [sec] (4.76749 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.46035 [sec] (0.957666 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.bin' +=== step 3 (merging minimizer tuples): 5.27003 [sec] (3.45597 [ns/kmer]) +num_minimizers = 55793988 +num_minimizer_positions = 99572100 +num_super_kmers = 99572100 +building minimizers MPHF with 16 threads and 19 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.25766 [sec] (2.13631 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340392073953180.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.11285 [sec] (2.04134 [ns/kmer]) +=== step 6 (merging minimizers tuples): 14.4747 [sec] (9.49223 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 4918992 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9535880/55793988 (17.0912%) +num_buckets_in_skew_index 41466/55793988 (0.0743198%) +max_bucket_size 51542 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 46963800/99572100 (47.1656%) +num_minimizer_positions_of_buckets_in_skew_index 6391658/99572100 (6.41913%) +=== step 7.1 (build sparse index): 2.32099 [sec] (1.52206 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 38546018 + partition = 1: num kmers in buckets of size > 128 and <= 256: 19334648 + partition = 2: num kmers in buckets of size > 256 and <= 512: 10326951 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 5121442 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 3217650 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2895061 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3125815 + partition = 7: num kmers in buckets of size > 8192 and <= 51542: 5421078 +num kmers in skew index = 87988663 (5.77011%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 38546018 + building MPHF with 16 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[0] for 38546018 kmers; bits/key = 2.55158 + built positions[0] for 38546018 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 19334648 + building MPHF with 16 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[1] for 19334648 kmers; bits/key = 2.58917 + built positions[1] for 19334648 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 10326951 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[2] for 10326951 kmers; bits/key = 2.69947 + built positions[2] for 10326951 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 5121442 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 5121442 kmers; bits/key = 2.67639 + built positions[3] for 5121442 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3217650 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3217650 kmers; bits/key = 3.21452 + built positions[4] for 3217650 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2895061 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2895061)... + built mphs[5] for 2895061 kmers; bits/key = 2.55983 + built positions[5] for 2895061 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3125815 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3125815 kmers; bits/key = 3.29675 + built positions[6] for 3125815 kmers; bits/key = 13.0001 + lower = 8192; upper = 51542; num_bits_per_pos = 16; num_kmers_in_partition = 5421078 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 5421078 kmers; bits/key = 2.63053 + built positions[7] for 5421078 kmers; bits/key = 16.0001 +=== step 7.2 (build skew index): 12.8879 [sec] (8.4516 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.8344% +buckets with 2 minimizer positions = 8.81635% +buckets with 3 minimizer positions = 2.89281% +buckets with 4 minimizer positions = 1.38001% +buckets with 5 minimizer positions = 0.809225% +buckets with 6 minimizer positions = 0.533776% +buckets with 7 minimizer positions = 0.378103% +buckets with 8 minimizer positions = 0.283937% +buckets with 9 minimizer positions = 0.220511% +buckets with 10 minimizer positions = 0.177992% +buckets with 11 minimizer positions = 0.147695% +buckets with 12 minimizer positions = 0.124728% +buckets with 13 minimizer positions = 0.108526% +buckets with 14 minimizer positions = 0.0952934% +buckets with 15 minimizer positions = 0.0846346% +buckets with 16 minimizer positions = 0.0760243% +max_bucket_size = 51542 +=== step 7 (build sparse and skew index): 15.4739 [sec] (10.1475 [ns/kmer]) +=== total time: 50.3195 [sec] (32.9985 [ns/kmer]) +total index size: 1279822350 [B] -- 1279.82 [MB] +SPACE BREAKDOWN: + mphf: 0.104962 [bits/kmer] (2.86871 [bits/key]) -- 1.56327% + strings_offsets: 0.274587 [bits/kmer] -- 4.08962% + control_codewords: 1.20742 [bits/kmer] -- 17.983% + mid_load_buckets: 0.985532 [bits/kmer] -- 14.6782% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.09404e-05% + strings: 3.35283 [bits/kmer] -- 49.936% + skew_index: 0.788914 [bits/kmer] -- 11.7499% + weights: 9.65307e-07 [bits/kmer] -- 1.4377e-05% + -------------- + total: 6.71424 [bits/kmer] +2026-03-12 19:34:02: saving data structure to disk... +2026-03-12 19:34:02: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.time.log new file mode 100644 index 0000000..98d818b --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m29/k63/regular-build.time.log @@ -0,0 +1,46 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/human.k63.m29.sshash" + User time (seconds): 133.33 + System time (seconds): 18.91 + Percent of CPU this job got: 218% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:09.68 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6875392 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6590982 + Voluntary context switches: 3842 + Involuntary context switches: 2384 + Swaps: 0 + File system inputs: 24 + File system outputs: 24436496 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 29 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m29.sshash" + User time (seconds): 91.49 + System time (seconds): 12.54 + Percent of CPU this job got: 203% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:51.03 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5472272 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4724156 + Voluntary context switches: 3842 + Involuntary context switches: 1808 + Swaps: 0 + File system inputs: 8 + File system outputs: 14979024 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.json b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.json new file mode 100644 index 0000000..b7bc7fb --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.json @@ -0,0 +1,3 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "975.187344", "negative lookup (avg_nanosec_per_kmer)": "502.192634", "access (avg_nanosec_per_kmer)": "407.389034", "iterator (avg_nanosec_per_kmer)": "2.873984"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1003.755755", "negative lookup (avg_nanosec_per_kmer)": "502.466808", "access (avg_nanosec_per_kmer)": "408.450261", "iterator (avg_nanosec_per_kmer)": "2.751370"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "969.901285", "negative lookup (avg_nanosec_per_kmer)": "498.931621", "access (avg_nanosec_per_kmer)": "408.347183", "iterator (avg_nanosec_per_kmer)": "2.758011"} diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.log new file mode 100644 index 0000000..a8956d1 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-bench.log @@ -0,0 +1,15 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 975.187 +negative lookup (avg_nanosec_per_kmer) 502.193 +access (avg_nanosec_per_kmer) = 407.389 +iterator (avg_nanosec_per_kmer) = 2.87398 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1003.76 +negative lookup (avg_nanosec_per_kmer) 502.467 +access (avg_nanosec_per_kmer) = 408.45 +iterator (avg_nanosec_per_kmer) = 2.75137 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 969.901 +negative lookup (avg_nanosec_per_kmer) 498.932 +access (avg_nanosec_per_kmer) = 408.347 +iterator (avg_nanosec_per_kmer) = 2.75801 diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.json b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.json new file mode 100644 index 0000000..91e8342 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.json @@ -0,0 +1 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7263414", "step 2 (compute minimizer tuples)": "2537017", "step 3 (merging minimizer tuples)": "6815128", "step 4 (build mphf)": "3766078", "step 5 (replacing minimizer values with MPHF hashes)": "4131869", "step 6 (merging minimizers tuples)": "18016558", "step 7.1 (build sparse index)": "3090550", "step 7.2 (build skew index)": "23625261", "step 7 (build sparse and skew index)": "27091928", "total_build_time_in_microsec": "69621992", "index_size_in_bytes": "1481048960", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.log new file mode 100644 index 0000000..2259159 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.log @@ -0,0 +1,143 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash --canonical +2026-03-12 19:38:03: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26341 [sec] (4.76319 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 2.53702 [sec] (1.66372 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.81513 [sec] (4.46922 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 16 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.76608 [sec] (2.46971 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340683421120469.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.13187 [sec] (2.70959 [ns/kmer]) +=== step 6 (merging minimizers tuples): 18.0166 [sec] (11.8149 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 12190335/69577229 (17.5206%) +num_buckets_in_skew_index 86973/69577229 (0.125002%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 57511599/126350163 (45.5176%) +num_minimizer_positions_of_buckets_in_skew_index 11538643/126350163 (9.13227%) +=== step 7.1 (build sparse index): 3.09055 [sec] (2.02672 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 7: num kmers in buckets of size > 8192 and <= 245177: 10756294 +num kmers in skew index = 143622784 (9.41848%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 16 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[0] for 77399356 kmers; bits/key = 2.54863 + built positions[0] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 16 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26776297 kmers; bits/key = 2.54559 + built positions[1] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 16 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13974034 kmers; bits/key = 2.56375 + built positions[2] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6504754 kmers; bits/key = 2.78902 + built positions[3] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3263868 kmers; bits/key = 3.30619 + built positions[4] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[5] for 2527517 kmers; bits/key = 2.55992 + built positions[5] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[6] for 2420664 kmers; bits/key = 2.55997 + built positions[6] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 10756294 + building MPHF with 16 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10756294 kmers; bits/key = 2.64819 + built positions[7] for 10756294 kmers; bits/key = 18 +=== step 7.2 (build skew index): 23.6253 [sec] (15.4929 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 27.0919 [sec] (17.7663 [ns/kmer]) +=== total time: 69.622 [sec] (45.6566 [ns/kmer]) +total index size: 1481048960 [B] -- 1481.05 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.7032% + strings_offsets: 0.274587 [bits/kmer] -- 3.53397% + control_codewords: 1.5057 [bits/kmer] -- 19.3786% + mid_load_buckets: 1.20688 [bits/kmer] -- 15.5327% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.80953e-05% + strings: 3.35283 [bits/kmer] -- 43.1513% + skew_index: 1.2976 [bits/kmer] -- 16.7002% + weights: 9.65307e-07 [bits/kmer] -- 1.24236e-05% + -------------- + total: 7.76993 [bits/kmer] +2026-03-12 19:39:13: saving data structure to disk... +2026-03-12 19:39:13: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.time.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.time.log new file mode 100644 index 0000000..d8220e1 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/canon-build.time.log @@ -0,0 +1,23 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.canon.sshash --canonical" + User time (seconds): 149.67 + System time (seconds): 18.21 + Percent of CPU this job got: 238% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:10.47 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8143148 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6761908 + Voluntary context switches: 2531 + Involuntary context switches: 2627 + Swaps: 0 + File system inputs: 24 + File system outputs: 19280104 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.json b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.json new file mode 100644 index 0000000..9f8af34 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.json @@ -0,0 +1,3 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1103.985334", "negative lookup (avg_nanosec_per_kmer)": "842.216625", "access (avg_nanosec_per_kmer)": "408.706531", "iterator (avg_nanosec_per_kmer)": "2.843532"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1103.416688", "negative lookup (avg_nanosec_per_kmer)": "846.499387", "access (avg_nanosec_per_kmer)": "402.575654", "iterator (avg_nanosec_per_kmer)": "2.834018"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1099.428521", "negative lookup (avg_nanosec_per_kmer)": "837.974419", "access (avg_nanosec_per_kmer)": "406.739127", "iterator (avg_nanosec_per_kmer)": "2.765785"} diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.log new file mode 100644 index 0000000..b27f664 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-bench.log @@ -0,0 +1,15 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash +positive lookup (avg_nanosec_per_kmer) = 1103.99 +negative lookup (avg_nanosec_per_kmer) 842.217 +access (avg_nanosec_per_kmer) = 408.707 +iterator (avg_nanosec_per_kmer) = 2.84353 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash +positive lookup (avg_nanosec_per_kmer) = 1103.42 +negative lookup (avg_nanosec_per_kmer) 846.499 +access (avg_nanosec_per_kmer) = 402.576 +iterator (avg_nanosec_per_kmer) = 2.83402 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash +positive lookup (avg_nanosec_per_kmer) = 1099.43 +negative lookup (avg_nanosec_per_kmer) 837.974 +access (avg_nanosec_per_kmer) = 406.739 +iterator (avg_nanosec_per_kmer) = 2.76579 diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.json b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.json new file mode 100644 index 0000000..8b9a3c3 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.json @@ -0,0 +1 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "16", "step 1 (encode strings)": "7287930", "step 2 (compute minimizer tuples)": "1465089", "step 3 (merging minimizer tuples)": "5488128", "step 4 (build mphf)": "3506419", "step 5 (replacing minimizer values with MPHF hashes)": "3419159", "step 6 (merging minimizers tuples)": "13718436", "step 7.1 (build sparse index)": "2426825", "step 7.2 (build skew index)": "13159548", "step 7 (build sparse and skew index)": "15871088", "total_build_time_in_microsec": "50756249", "index_size_in_bytes": "1294767218", "num_kmers": "1524904156"} diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.log new file mode 100644 index 0000000..beaf31d --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.log @@ -0,0 +1,143 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash +2026-03-12 19:36:31: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.28793 [sec] (4.77927 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.15.bin'... +=== step 2 (compute minimizer tuples): 1.46509 [sec] (0.960774 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.48813 [sec] (3.599 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 16 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.50642 [sec] (2.29944 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1773340591434372031.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.41916 [sec] (2.24221 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.7184 [sec] (8.99626 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9999764/61951224 (16.1414%) +num_buckets_in_skew_index 36489/61951224 (0.0588996%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47622255/105337248 (45.2093%) +num_minimizer_positions_of_buckets_in_skew_index 5800022/105337248 (5.50615%) +=== step 7.1 (build sparse index): 2.42683 [sec] (1.59146 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 2: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 7: num kmers in buckets of size > 8192 and <= 144478: 7418130 +num kmers in skew index = 75796687 (4.97059%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 16 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 32078766 kmers; bits/key = 2.51437 + built positions[0] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 16 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16092632 kmers; bits/key = 2.65398 + built positions[1] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8174536 kmers; bits/key = 2.61889 + built positions[2] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 16 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3454318 kmers; bits/key = 3.14686 + built positions[3] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[4] for 2781070 kmers; bits/key = 2.55987 + built positions[4] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[5] for 2981930 kmers; bits/key = 2.55978 + built positions[5] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 16 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[6] for 2815305 kmers; bits/key = 2.55984 + built positions[6] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 7418130 + building MPHF with 16 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 7418130 kmers; bits/key = 2.72795 + built positions[7] for 7418130 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 13.1595 [sec] (8.62975 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 15.8711 [sec] (10.4079 [ns/kmer]) +=== total time: 50.7562 [sec] (33.2849 [ns/kmer]) +total index size: 1294767218 [B] -- 1294.77 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.70902% + strings_offsets: 0.274587 [bits/kmer] -- 4.04242% + control_codewords: 1.34067 [bits/kmer] -- 19.737% + mid_load_buckets: 0.99935 [bits/kmer] -- 14.7122% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.06987e-05% + strings: 3.35283 [bits/kmer] -- 49.3596% + skew_index: 0.709128 [bits/kmer] -- 10.4396% + weights: 9.65307e-07 [bits/kmer] -- 1.4211e-05% + -------------- + total: 6.79265 [bits/kmer] +2026-03-12 19:37:22: saving data structure to disk... +2026-03-12 19:37:22: DONE diff --git a/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.time.log b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.time.log new file mode 100644 index 0000000..0a34a96 --- /dev/null +++ b/benchmarks/results-sweep-m-12-03-26/m31/k63/regular-build.time.log @@ -0,0 +1,23 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 16 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-sweep-m-indexes/se.k63.m31.sshash" + User time (seconds): 87.87 + System time (seconds): 12.92 + Percent of CPU this job got: 195% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:51.48 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4933124 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4676923 + Voluntary context switches: 1777 + Involuntary context switches: 1805 + Swaps: 0 + File system inputs: 8 + File system outputs: 15833576 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/rss_anon.k31.log b/benchmarks/rss_anon.k31.log new file mode 100644 index 0000000..0fdaaff --- /dev/null +++ b/benchmarks/rss_anon.k31.log @@ -0,0 +1,2527 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 22:55:55: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +read 1000000 sequences, 260758668 bases, 230758668 kmers +read 2000000 sequences, 549832064 bases, 489832064 kmers +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +cost: 2.0 + 0.245658 [bits/kmer] +max string length = 31415 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 15 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.74657 [sec] (3.476 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.554609 [sec] (1.10378 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.bin' +=== step 3 (merging minimizer tuples): 4.00004 [sec] (7.96083 [ns/kmer]) +num_minimizers = 72381146 +num_minimizer_positions = 78455681 +num_super_kmers = 78455681 +building minimizers MPHF with 64 threads and 25 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.20337 [sec] (6.3753 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352555369377920.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.8624 [sec] (5.69672 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.1578 [sec] (22.2061 [ns/kmer]) +num_bits_per_offset = 30 +max_list_id = 1450641 +bits_for_list_id = 21 +num_bits_for_control = 31 +num_buckets_larger_than_1_not_in_skew_index 2259768/72381146 (3.12204%) +num_buckets_in_skew_index 4215/72381146 (0.00582334%) +max_bucket_size 70346 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 7357522/78455681 (9.37793%) +num_minimizer_positions_of_buckets_in_skew_index 980996/78455681 (1.25038%) +=== step 7.1 (build sparse index): 0.946342 [sec] (1.8834 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 1260546 + partition = 1: num kmers in buckets of size > 128 and <= 256: 994175 + partition = 2: num kmers in buckets of size > 256 and <= 512: 665719 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 408608 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 423234 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 271419 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 288814 + partition = 7: num kmers in buckets of size > 8192 and <= 70346: 428378 +num kmers in skew index = 4740893 (0.943527%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1260546 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1260546)... + built mphs[0] for 1260546 kmers; bits/key = 2.56076 + built positions[0] for 1260546 kmers; bits/key = 7.00029 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 994175 + building MPHF with 64 threads and 1 partitions (avg. partition size = 994175)... + built mphs[1] for 994175 kmers; bits/key = 2.41831 + built positions[1] for 994175 kmers; bits/key = 8.00033 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 665719 + building MPHF with 64 threads and 1 partitions (avg. partition size = 665719)... + built mphs[2] for 665719 kmers; bits/key = 2.41938 + built positions[2] for 665719 kmers; bits/key = 9.00051 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 408608 + building MPHF with 64 threads and 1 partitions (avg. partition size = 408608)... + built mphs[3] for 408608 kmers; bits/key = 2.42149 + built positions[3] for 408608 kmers; bits/key = 10.0008 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 423234 + building MPHF with 64 threads and 1 partitions (avg. partition size = 423234)... + built mphs[4] for 423234 kmers; bits/key = 2.56407 + built positions[4] for 423234 kmers; bits/key = 11.0009 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 271419 + building MPHF with 64 threads and 1 partitions (avg. partition size = 271419)... + built mphs[5] for 271419 kmers; bits/key = 2.42412 + built positions[5] for 271419 kmers; bits/key = 12.0014 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 288814 + building MPHF with 64 threads and 1 partitions (avg. partition size = 288814)... + built mphs[6] for 288814 kmers; bits/key = 2.42387 + built positions[6] for 288814 kmers; bits/key = 13.0013 + lower = 8192; upper = 70346; num_bits_per_pos = 17; num_kmers_in_partition = 428378 + building MPHF with 64 threads and 1 partitions (avg. partition size = 428378)... + built mphs[7] for 428378 kmers; bits/key = 2.56398 + built positions[7] for 428378 kmers; bits/key = 17.0008 +=== step 7.2 (build skew index): 2.55793 [sec] (5.09077 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.8721% +buckets with 2 minimizer positions = 2.00417% +buckets with 3 minimizer positions = 0.51765% +buckets with 4 minimizer positions = 0.211311% +buckets with 5 minimizer positions = 0.109826% +buckets with 6 minimizer positions = 0.0661802% +buckets with 7 minimizer positions = 0.0437061% +buckets with 8 minimizer positions = 0.0308464% +buckets with 9 minimizer positions = 0.0235614% +buckets with 10 minimizer positions = 0.0177228% +buckets with 11 minimizer positions = 0.0139967% +buckets with 12 minimizer positions = 0.0112902% +buckets with 13 minimizer positions = 0.00931044% +buckets with 14 minimizer positions = 0.00771748% +buckets with 15 minimizer positions = 0.00632347% +buckets with 16 minimizer positions = 0.00528591% +max_bucket_size = 70346 +=== step 7 (build sparse and skew index): 3.69064 [sec] (7.34506 [ns/kmer]) +=== total time: 27.2154 [sec] (54.1638 [ns/kmer]) +total index size: 495332377 [B] -- 495.332 [MB] +SPACE BREAKDOWN: + mphf: 0.41828 [bits/kmer] (2.90367 [bits/key]) -- 5.30379% + strings_offsets: 0.144419 [bits/kmer] -- 1.83123% + control_codewords: 4.46561 [bits/kmer] -- 56.624% + mid_load_buckets: 0.439286 [bits/kmer] -- 5.57015% + begin_buckets_of_size: 4.26696e-06 [bits/kmer] -- 5.41051e-05% + strings: 2.24566 [bits/kmer] -- 28.4749% + skew_index: 0.173169 [bits/kmer] -- 2.19578% + weights: 2.92956e-06 [bits/kmer] -- 3.71468e-05% + -------------- + total: 7.88643 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 27.35 seconds +Peak RssAnon: 3858124 kB +Peak RssAnon: 3.68 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 22:56:22: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +cost: 2.0 + 0.0303995 [bits/kmer] +max string length = 111973 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 20 +=== step 1 (encode strings): 2.90085 [sec] (2.5216 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.05053 [sec] (0.913186 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.6983 [sec] (7.56112 [ns/kmer]) +num_minimizers = 173258591 +num_minimizer_positions = 175959772 +num_super_kmers = 175959772 +building minimizers MPHF with 64 threads and 58 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.05998 [sec] (6.13699 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352582739810426.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.33067 [sec] (5.50302 [ns/kmer]) +=== step 6 (merging minimizers tuples): 17.8885 [sec] (15.5499 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 989641 +bits_for_list_id = 20 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 1311512/173258591 (0.756968%) +num_buckets_in_skew_index 1541/173258591 (0.000889422%) +max_bucket_size 3747 +log2_max_bucket_size 12 +num_partitions in skew index 6 +num_minimizer_positions_of_buckets_larger_than_1 3806575/175959772 (2.16332%) +num_minimizer_positions_of_buckets_in_skew_index 207659/175959772 (0.118015%) +=== step 7.1 (build sparse index): 1.52399 [sec] (1.32475 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 559507 + partition = 1: num kmers in buckets of size > 128 and <= 256: 317132 + partition = 2: num kmers in buckets of size > 256 and <= 512: 144139 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 106543 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 21308 + partition = 5: num kmers in buckets of size > 2048 and <= 3747: 36738 +num kmers in skew index = 1185367 (0.10304%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 559507 + building MPHF with 64 threads and 1 partitions (avg. partition size = 559507)... + built mphs[0] for 559507 kmers; bits/key = 2.41999 + built positions[0] for 559507 kmers; bits/key = 7.00068 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 317132 + building MPHF with 64 threads and 1 partitions (avg. partition size = 317132)... + built mphs[1] for 317132 kmers; bits/key = 2.42276 + built positions[1] for 317132 kmers; bits/key = 8.00111 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 144139 + building MPHF with 64 threads and 1 partitions (avg. partition size = 144139)... + built mphs[2] for 144139 kmers; bits/key = 2.43099 + built positions[2] for 144139 kmers; bits/key = 9.00242 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 106543 + building MPHF with 64 threads and 1 partitions (avg. partition size = 106543)... + built mphs[3] for 106543 kmers; bits/key = 2.43597 + built positions[3] for 106543 kmers; bits/key = 10.0034 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 21308 + building MPHF with 64 threads and 1 partitions (avg. partition size = 21308)... + built mphs[4] for 21308 kmers; bits/key = 2.51323 + built positions[4] for 21308 kmers; bits/key = 11.0171 + lower = 2048; upper = 3747; num_bits_per_pos = 12; num_kmers_in_partition = 36738 + building MPHF with 64 threads and 1 partitions (avg. partition size = 36738)... + built mphs[5] for 36738 kmers; bits/key = 2.4746 + built positions[5] for 36738 kmers; bits/key = 12.0098 +=== step 7.2 (build skew index): 0.658241 [sec] (0.572185 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.2421% +buckets with 2 minimizer positions = 0.571193% +buckets with 3 minimizer positions = 0.0840564% +buckets with 4 minimizer positions = 0.0345576% +buckets with 5 minimizer positions = 0.0184735% +buckets with 6 minimizer positions = 0.011514% +buckets with 7 minimizer positions = 0.00772718% +buckets with 8 minimizer positions = 0.00537809% +buckets with 9 minimizer positions = 0.00392246% +buckets with 10 minimizer positions = 0.00304862% +buckets with 11 minimizer positions = 0.00236583% +buckets with 12 minimizer positions = 0.0019139% +buckets with 13 minimizer positions = 0.00156356% +buckets with 14 minimizer positions = 0.00127613% +buckets with 15 minimizer positions = 0.00105276% +buckets with 16 minimizer positions = 0.000939636% +max_bucket_size = 3747 +=== step 7 (build sparse and skew index): 2.56082 [sec] (2.22603 [ns/kmer]) +=== total time: 46.4897 [sec] (40.4118 [ns/kmer]) +total index size: 1077792939 [B] -- 1077.79 [MB] +SPACE BREAKDOWN: + mphf: 0.426302 [bits/kmer] (2.83055 [bits/key]) -- 5.68775% + strings_offsets: 0.100021 [bits/kmer] -- 1.33449% + control_codewords: 4.81944 [bits/kmer] -- 64.3013% + mid_load_buckets: 0.102577 [bits/kmer] -- 1.36859% + begin_buckets_of_size: 1.8637e-06 [bits/kmer] -- 2.48656e-05% + strings: 2.0304 [bits/kmer] -- 27.0897% + skew_index: 0.0163494 [bits/kmer] -- 0.218135% + weights: 1.27956e-06 [bits/kmer] -- 1.70719e-05% + -------------- + total: 7.49509 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 46.60 seconds +Peak RssAnon: 6897472 kB +Peak RssAnon: 6.58 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 22:57:09: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.23708 [sec] (2.88827 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.72.bin'... +=== step 2 (compute minimizer tuples): 2.46119 [sec] (0.982244 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 21.8099 [sec] (8.7042 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 64 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 15.9192 [sec] (6.35324 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352629358501975.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 15.0668 [sec] (6.01305 [ns/kmer]) +=== step 6 (merging minimizers tuples): 45.5999 [sec] (18.1986 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6542948 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 10816752/386687326 (2.79729%) +num_buckets_in_skew_index 42372/386687326 (0.0109577%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 40422973/423023926 (9.55572%) +num_minimizer_positions_of_buckets_in_skew_index 6772751/423023926 (1.60103%) +=== step 7.1 (build sparse index): 5.06687 [sec] (2.02215 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 1: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 2: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 7: num kmers in buckets of size > 8192 and <= 22972: 207510 +num kmers in skew index = 32063746 (1.27964%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 11807213 kmers; bits/key = 2.55841 + built positions[0] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 8389556 kmers; bits/key = 2.61352 + built positions[1] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 5343660 kmers; bits/key = 2.66261 + built positions[2] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3076413 kmers; bits/key = 3.34298 + built positions[3] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[4] for 1855446 kmers; bits/key = 2.56022 + built positions[4] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[5] for 1008178 kmers; bits/key = 2.41829 + built positions[5] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 64 threads and 1 partitions (avg. partition size = 375770)... + built mphs[6] for 375770 kmers; bits/key = 2.42182 + built positions[6] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 207510 + building MPHF with 64 threads and 1 partitions (avg. partition size = 207510)... + built mphs[7] for 207510 kmers; bits/key = 2.42618 + built positions[7] for 207510 kmers; bits/key = 15.0018 +=== step 7.2 (build skew index): 6.95843 [sec] (2.77707 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 12.9647 [sec] (5.17412 [ns/kmer]) +=== total time: 121.059 [sec] (48.3137 [ns/kmer]) +total index size: 2716053224 [B] -- 2716.05 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.03015% + strings_offsets: 0.153147 [bits/kmer] -- 1.76606% + control_codewords: 5.0927 [bits/kmer] -- 58.7281% + mid_load_buckets: 0.516242 [bits/kmer] -- 5.95319% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.86726e-06% + strings: 2.24545 [bits/kmer] -- 25.8941% + skew_index: 0.227926 [bits/kmer] -- 2.6284% + weights: 5.87466e-07 [bits/kmer] -- 6.77454e-06% + -------------- + total: 8.67167 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 121.29 seconds +Peak RssAnon: 16399704 kB +Peak RssAnon: 15.64 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 22:59:10: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +read 1000000 sequences, 289026795 bases, 259026795 kmers +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +cost: 2.0 + 0.262431 [bits/kmer] +max string length = 234900 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 18 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.17094 [sec] (3.11251 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.533209 [sec] (1.41734 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.bin' +=== step 3 (merging minimizer tuples): 2.72906 [sec] (7.25417 [ns/kmer]) +num_minimizers = 52162715 +num_minimizer_positions = 55045821 +num_super_kmers = 55045821 +building minimizers MPHF with 64 threads and 18 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.46705 [sec] (6.55773 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352750662927986.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.01653 [sec] (5.36019 [ns/kmer]) +=== step 6 (merging minimizers tuples): 4.39231 [sec] (11.6753 [ns/kmer]) +num_bits_per_offset = 29 +max_list_id = 1848743 +bits_for_list_id = 21 +num_bits_for_control = 30 +num_buckets_larger_than_1_not_in_skew_index 2257124/52162715 (4.32708%) +num_buckets_in_skew_index 22/52162715 (4.21757e-05%) +max_bucket_size 322 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 5137481/55045821 (9.3331%) +num_minimizer_positions_of_buckets_in_skew_index 2771/55045821 (0.00503399%) +=== step 7.1 (build sparse index): 0.649419 [sec] (1.72624 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 7461 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2349 + partition = 2: num kmers in buckets of size > 256 and <= 322: 2299 +num kmers in skew index = 12109 (0.00321872%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 7461 + building MPHF with 64 threads and 1 partitions (avg. partition size = 7461)... + built mphs[0] for 7461 kmers; bits/key = 2.69991 + built positions[0] for 7461 kmers; bits/key = 7.05107 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2349 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2349)... + built mphs[1] for 2349 kmers; bits/key = 3.16731 + built positions[1] for 2349 kmers; bits/key = 8.14645 + lower = 256; upper = 322; num_bits_per_pos = 9; num_kmers_in_partition = 2299 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2299)... + built mphs[2] for 2299 kmers; bits/key = 3.18051 + built positions[2] for 2299 kmers; bits/key = 9.15876 +=== step 7.2 (build skew index): 0.027418 [sec] (0.0728804 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.6729% +buckets with 2 minimizer positions = 3.54418% +buckets with 3 minimizer positions = 0.560318% +buckets with 4 minimizer positions = 0.1372% +buckets with 5 minimizer positions = 0.0464911% +buckets with 6 minimizer positions = 0.0185669% +buckets with 7 minimizer positions = 0.00842556% +buckets with 8 minimizer positions = 0.00444379% +buckets with 9 minimizer positions = 0.00242894% +buckets with 10 minimizer positions = 0.00141289% +buckets with 11 minimizer positions = 0.000904861% +buckets with 12 minimizer positions = 0.000592377% +buckets with 13 minimizer positions = 0.000416006% +buckets with 14 minimizer positions = 0.000352742% +buckets with 15 minimizer positions = 0.000222381% +buckets with 16 minimizer positions = 0.000207044% +max_bucket_size = 322 +=== step 7 (build sparse and skew index): 0.812373 [sec] (2.15939 [ns/kmer]) +=== total time: 14.1215 [sec] (37.5366 [ns/kmer]) +total index size: 346391727 [B] -- 346.392 [MB] +SPACE BREAKDOWN: + mphf: 0.403884 [bits/kmer] (2.91287 [bits/key]) -- 5.48307% + strings_offsets: 0.14346 [bits/kmer] -- 1.9476% + control_codewords: 4.15965 [bits/kmer] -- 56.4708% + mid_load_buckets: 0.396027 [bits/kmer] -- 5.3764% + begin_buckets_of_size: 5.69902e-06 [bits/kmer] -- 7.73691e-05% + strings: 2.26243 [bits/kmer] -- 30.7144% + skew_index: 0.000554293 [bits/kmer] -- 0.00752501% + weights: 3.91276e-06 [bits/kmer] -- 5.31191e-05% + -------------- + total: 7.36602 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 14.23 seconds +Peak RssAnon: 2732856 kB +Peak RssAnon: 2.61 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 22:59:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.55808 [sec] (5.09676 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.64.bin'... +=== step 2 (compute minimizer tuples): 1.02041 [sec] (1.141 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.51295 [sec] (9.51902 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 64 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.44198 [sec] (6.08511 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352764909451370.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.58631 [sec] (6.2465 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.0075 [sec] (14.5447 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 8383084 +bits_for_list_id = 23 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 14059268/126246665 (11.1363%) +num_buckets_in_skew_index 8266/126246665 (0.0065475%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48164669/162006751 (29.73%) +num_minimizer_positions_of_buckets_in_skew_index 1662951/162006751 (1.02647%) +=== step 7.1 (build sparse index): 3.15056 [sec] (3.5229 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 2: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 7: num kmers in buckets of size > 8192 and <= 36894: 388502 +num kmers in skew index = 6466768 (0.723101%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[0] for 2254325 kmers; bits/key = 2.56001 + built positions[0] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[1] for 1183762 kmers; bits/key = 2.56081 + built positions[1] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 64 threads and 1 partitions (avg. partition size = 885561)... + built mphs[2] for 885561 kmers; bits/key = 2.56147 + built positions[2] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 64 threads and 1 partitions (avg. partition size = 591648)... + built mphs[3] for 591648 kmers; bits/key = 2.56263 + built positions[3] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 64 threads and 1 partitions (avg. partition size = 450833)... + built mphs[4] for 450833 kmers; bits/key = 2.42098 + built positions[4] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 64 threads and 1 partitions (avg. partition size = 373731)... + built mphs[5] for 373731 kmers; bits/key = 2.56484 + built positions[5] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 64 threads and 1 partitions (avg. partition size = 338406)... + built mphs[6] for 338406 kmers; bits/key = 2.4226 + built positions[6] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 388502 + building MPHF with 64 threads and 1 partitions (avg. partition size = 388502)... + built mphs[7] for 388502 kmers; bits/key = 2.56472 + built positions[7] for 388502 kmers; bits/key = 16.0009 +=== step 7.2 (build skew index): 3.55748 [sec] (3.9779 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 7.13154 [sec] (7.97434 [ns/kmer]) +=== total time: 45.2588 [sec] (50.6075 [ns/kmer]) +total index size: 1137030140 [B] -- 1137.03 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.98871% + strings_offsets: 0.333373 [bits/kmer] -- 3.2776% + control_codewords: 4.51733 [bits/kmer] -- 44.4128% + mid_load_buckets: 1.66956 [bits/kmer] -- 16.4145% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.35702e-05% + strings: 3.10303 [bits/kmer] -- 30.5079% + skew_index: 0.142237 [bits/kmer] -- 1.39842% + weights: 1.64596e-06 [bits/kmer] -- 1.61825e-05% + -------------- + total: 10.1712 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 45.39 seconds +Peak RssAnon: 6474952 kB +Peak RssAnon: 6.17 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:00:10: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +read 1000000 sequences, 159860354 bases, 129860354 kmers +read 2000000 sequences, 345519042 bases, 285519042 kmers +read 3000000 sequences, 569210425 bases, 479210425 kmers +read 4000000 sequences, 848332212 bases, 728332212 kmers +read 5000000 sequences, 1226889961 bases, 1076889961 kmers +read 6000000 sequences, 1800462808 bases, 1620462808 kmers +read 7000000 sequences, 1906975392 bases, 1696975392 kmers +read 8000000 sequences, 1964117272 bases, 1724117272 kmers +read 9000000 sequences, 2021346703 bases, 1751346703 kmers +read 10000000 sequences, 2078777105 bases, 1778777105 kmers +read 11000000 sequences, 2136245853 bases, 1806245853 kmers +read 12000000 sequences, 2193864516 bases, 1833864516 kmers +read 13000000 sequences, 2251713140 bases, 1861713140 kmers +read 14000000 sequences, 2309685311 bases, 1889685311 kmers +read 15000000 sequences, 2367830861 bases, 1917830861 kmers +read 16000000 sequences, 2426185107 bases, 1946185107 kmers +read 17000000 sequences, 2484756357 bases, 1974756357 kmers +read 18000000 sequences, 2543560790 bases, 2003560790 kmers +read 19000000 sequences, 2602544828 bases, 2032544828 kmers +read 20000000 sequences, 2661829332 bases, 2061829332 kmers +read 21000000 sequences, 2721408473 bases, 2091408473 kmers +read 22000000 sequences, 2781228842 bases, 2121228842 kmers +read 23000000 sequences, 2841415119 bases, 2151415119 kmers +read 24000000 sequences, 2901936379 bases, 2181936379 kmers +read 25000000 sequences, 2962750749 bases, 2212750749 kmers +read 26000000 sequences, 3023914429 bases, 2243914429 kmers +read 27000000 sequences, 3085556058 bases, 2275556058 kmers +read 28000000 sequences, 3147523815 bases, 2307523815 kmers +read 29000000 sequences, 3209891758 bases, 2339891758 kmers +read 30000000 sequences, 3272761181 bases, 2372761181 kmers +read 31000000 sequences, 3336150965 bases, 2406150965 kmers +read 32000000 sequences, 3400254734 bases, 2440254734 kmers +read 33000000 sequences, 3464886783 bases, 2474886783 kmers +read 34000000 sequences, 3530247184 bases, 2510247184 kmers +read 35000000 sequences, 3596273843 bases, 2546273843 kmers +read 36000000 sequences, 3663044813 bases, 2583044813 kmers +read 37000000 sequences, 3730743513 bases, 2620743513 kmers +read 38000000 sequences, 3799297920 bases, 2659297920 kmers +read 39000000 sequences, 3869022100 bases, 2699022100 kmers +read 40000000 sequences, 3939899906 bases, 2739899906 kmers +read 41000000 sequences, 4011944353 bases, 2781944353 kmers +read 42000000 sequences, 4085447760 bases, 2825447760 kmers +read 43000000 sequences, 4160667187 bases, 2870667187 kmers +read 44000000 sequences, 4237696486 bases, 2917696486 kmers +read 45000000 sequences, 4316730755 bases, 2966730755 kmers +read 46000000 sequences, 4398064724 bases, 3018064724 kmers +read 47000000 sequences, 4482251464 bases, 3072251464 kmers +read 48000000 sequences, 4569570617 bases, 3129570617 kmers +read 49000000 sequences, 4660631625 bases, 3190631625 kmers +read 50000000 sequences, 4756246344 bases, 3256246344 kmers +read 51000000 sequences, 4856753463 bases, 3326753463 kmers +read 52000000 sequences, 4964398717 bases, 3404398717 kmers +read 53000000 sequences, 5079791551 bases, 3489791551 kmers +read 54000000 sequences, 5205070836 bases, 3585070836 kmers +read 55000000 sequences, 5343495625 bases, 3693495625 kmers +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +cost: 2.0 + 0.890898 [bits/kmer] +max string length = 17920 +num bits per_absolute_offset = 33 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 17.4107 [sec] (4.68267 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.113.bin'... +=== step 2 (compute minimizer tuples): 6.33697 [sec] (1.70435 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 3 (merging minimizer tuples): 53.641 [sec] (14.4269 [ns/kmer]) +num_minimizers = 544808214 +num_minimizer_positions = 661139039 +num_super_kmers = 661139039 +building minimizers MPHF with 64 threads and 182 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 21.7536 [sec] (5.85069 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 22.4586 [sec] (6.04031 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773352810314179414.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 6 (merging minimizers tuples): 104.069 [sec] (27.9896 [ns/kmer]) +num_bits_per_offset = 33 +max_list_id = 39936749 +bits_for_list_id = 26 +num_bits_for_control = 34 +num_buckets_larger_than_1_not_in_skew_index 51666891/544808214 (9.4835%) +num_buckets_in_skew_index 108291/544808214 (0.0198769%) +max_bucket_size 81171 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 149056975/661139039 (22.5455%) +num_minimizer_positions_of_buckets_in_skew_index 19049032/661139039 (2.88124%) +=== step 7.1 (build sparse index): 11.3316 [sec] (3.04766 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 28872109 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21161002 + partition = 2: num kmers in buckets of size > 256 and <= 512: 14619521 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 8967010 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 5825185 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3889571 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2412718 + partition = 7: num kmers in buckets of size > 8192 and <= 81171: 2510316 +num kmers in skew index = 88257432 (2.37371%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 28872109 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 28872109 kmers; bits/key = 2.53953 + built positions[0] for 28872109 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21161002 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21161002 kmers; bits/key = 2.665 + built positions[1] for 21161002 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 14619521 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 14619521 kmers; bits/key = 2.58618 + built positions[2] for 14619521 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 8967010 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8967010 kmers; bits/key = 2.51981 + built positions[3] for 8967010 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 5825185 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 5825185 kmers; bits/key = 2.55049 + built positions[4] for 5825185 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3889571 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3889571 kmers; bits/key = 2.9515 + built positions[5] for 3889571 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2412718 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2412718)... + built mphs[6] for 2412718 kmers; bits/key = 2.41709 + built positions[6] for 2412718 kmers; bits/key = 13.0002 + lower = 8192; upper = 81171; num_bits_per_pos = 17; num_kmers_in_partition = 2510316 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2510316)... + built mphs[7] for 2510316 kmers; bits/key = 2.55991 + built positions[7] for 2510316 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 14.7325 [sec] (3.96234 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 90.4966% +buckets with 2 minimizer positions = 7.33042% +buckets with 3 minimizer positions = 1.04104% +buckets with 4 minimizer positions = 0.358078% +buckets with 5 minimizer positions = 0.190444% +buckets with 6 minimizer positions = 0.119606% +buckets with 7 minimizer positions = 0.0815559% +buckets with 8 minimizer positions = 0.059031% +buckets with 9 minimizer positions = 0.0448457% +buckets with 10 minimizer positions = 0.0349758% +buckets with 11 minimizer positions = 0.0279273% +buckets with 12 minimizer positions = 0.022929% +buckets with 13 minimizer positions = 0.0189582% +buckets with 14 minimizer positions = 0.0159649% +buckets with 15 minimizer positions = 0.0135407% +buckets with 16 minimizer positions = 0.0117506% +max_bucket_size = 81171 +=== step 7 (build sparse and skew index): 27.7064 [sec] (7.45173 [ns/kmer]) +=== total time: 253.376 [sec] (68.1463 [ns/kmer]) +total index size: 4810783166 [B] -- 4810.78 [MB] +SPACE BREAKDOWN: + mphf: 0.415447 [bits/kmer] (2.83528 [bits/key]) -- 4.01359% + strings_offsets: 0.300083 [bits/kmer] -- 2.89907% + control_codewords: 4.98195 [bits/kmer] -- 48.1301% + mid_load_buckets: 1.32295 [bits/kmer] -- 12.7809% + begin_buckets_of_size: 5.76635e-07 [bits/kmer] -- 5.57082e-06% + strings: 2.8909 [bits/kmer] -- 27.9287% + skew_index: 0.439676 [bits/kmer] -- 4.24766% + weights: 3.95899e-07 [bits/kmer] -- 3.82474e-06% + -------------- + total: 10.351 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 253.82 seconds +Peak RssAnon: 20536516 kB +Peak RssAnon: 19.59 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:04:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +read 1000000 sequences, 260758668 bases, 230758668 kmers +read 2000000 sequences, 549832064 bases, 489832064 kmers +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +cost: 2.0 + 0.245658 [bits/kmer] +max string length = 31415 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 15 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.45559 [sec] (2.8969 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.680128 [sec] (1.35358 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.bin' +=== step 3 (merging minimizer tuples): 4.99394 [sec] (9.93887 [ns/kmer]) +num_minimizers = 86163506 +num_minimizer_positions = 94463730 +num_super_kmers = 98209779 +building minimizers MPHF with 64 threads and 29 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.83602 [sec] (7.63441 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353064145196831.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.47943 [sec] (6.92471 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.9628 [sec] (27.7887 [ns/kmer]) +num_bits_per_offset = 30 +max_list_id = 1866277 +bits_for_list_id = 21 +num_bits_for_control = 31 +num_buckets_larger_than_1_not_in_skew_index 2988605/86163506 (3.46853%) +num_buckets_in_skew_index 5810/86163506 (0.00674299%) +max_bucket_size 68577 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 10013861/94463730 (10.6007%) +num_minimizer_positions_of_buckets_in_skew_index 1280778/94463730 (1.35584%) +=== step 7.1 (build sparse index): 1.24851 [sec] (2.48477 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 1643143 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1178275 + partition = 2: num kmers in buckets of size > 256 and <= 512: 956038 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 605533 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 425543 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 382297 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 254872 + partition = 7: num kmers in buckets of size > 8192 and <= 68577: 624986 +num kmers in skew index = 6070687 (1.20818%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1643143 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1643143)... + built mphs[0] for 1643143 kmers; bits/key = 2.56038 + built positions[0] for 1643143 kmers; bits/key = 7.0002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1178275 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1178275)... + built mphs[1] for 1178275 kmers; bits/key = 2.41806 + built positions[1] for 1178275 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 956038 + building MPHF with 64 threads and 1 partitions (avg. partition size = 956038)... + built mphs[2] for 956038 kmers; bits/key = 2.56129 + built positions[2] for 956038 kmers; bits/key = 9.00035 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 605533 + building MPHF with 64 threads and 1 partitions (avg. partition size = 605533)... + built mphs[3] for 605533 kmers; bits/key = 2.41961 + built positions[3] for 605533 kmers; bits/key = 10.0006 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 425543 + building MPHF with 64 threads and 1 partitions (avg. partition size = 425543)... + built mphs[4] for 425543 kmers; bits/key = 2.42126 + built positions[4] for 425543 kmers; bits/key = 11.0009 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 382297 + building MPHF with 64 threads and 1 partitions (avg. partition size = 382297)... + built mphs[5] for 382297 kmers; bits/key = 2.42153 + built positions[5] for 382297 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 254872 + building MPHF with 64 threads and 1 partitions (avg. partition size = 254872)... + built mphs[6] for 254872 kmers; bits/key = 2.56712 + built positions[6] for 254872 kmers; bits/key = 13.0013 + lower = 8192; upper = 68577; num_bits_per_pos = 17; num_kmers_in_partition = 624986 + building MPHF with 64 threads and 1 partitions (avg. partition size = 624986)... + built mphs[7] for 624986 kmers; bits/key = 2.41943 + built positions[7] for 624986 kmers; bits/key = 17.0005 +=== step 7.2 (build skew index): 3.27287 [sec] (6.51362 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.5247% +buckets with 2 minimizer positions = 2.16597% +buckets with 3 minimizer positions = 0.599947% +buckets with 4 minimizer positions = 0.247865% +buckets with 5 minimizer positions = 0.129293% +buckets with 6 minimizer positions = 0.0768388% +buckets with 7 minimizer positions = 0.0512653% +buckets with 8 minimizer positions = 0.0353688% +buckets with 9 minimizer positions = 0.0260667% +buckets with 10 minimizer positions = 0.0199121% +buckets with 11 minimizer positions = 0.0157805% +buckets with 12 minimizer positions = 0.0127513% +buckets with 13 minimizer positions = 0.0102503% +buckets with 14 minimizer positions = 0.00880535% +buckets with 15 minimizer positions = 0.0074347% +buckets with 16 minimizer positions = 0.00615574% +max_bucket_size = 68577 +=== step 7 (build sparse and skew index): 4.74516 [sec] (9.44377 [ns/kmer]) +=== total time: 33.1531 [sec] (65.9809 [ns/kmer]) +total index size: 566181081 [B] -- 566.181 [MB] +SPACE BREAKDOWN: + mphf: 0.487029 [bits/kmer] (2.84013 [bits/key]) -- 5.40276% + strings_offsets: 0.144419 [bits/kmer] -- 1.60208% + control_codewords: 5.31593 [bits/kmer] -- 58.9712% + mid_load_buckets: 0.597885 [bits/kmer] -- 6.63251% + begin_buckets_of_size: 4.26696e-06 [bits/kmer] -- 4.73347e-05% + strings: 2.24566 [bits/kmer] -- 24.9118% + skew_index: 0.223525 [bits/kmer] -- 2.47963% + weights: 2.92956e-06 [bits/kmer] -- 3.24984e-05% + -------------- + total: 9.01445 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 33.28 seconds +Peak RssAnon: 4721328 kB +Peak RssAnon: 4.50 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:04:57: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +cost: 2.0 + 0.0303995 [bits/kmer] +max string length = 111973 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 20 +=== step 1 (encode strings): 2.66881 [sec] (2.3199 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.35027 [sec] (1.17374 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 11.227 [sec] (9.75922 [ns/kmer]) +num_minimizers = 209937048 +num_minimizer_positions = 213990360 +num_super_kmers = 222970482 +building minimizers MPHF with 64 threads and 70 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 8.98992 [sec] (7.81461 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353097437578532.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 8.00843 [sec] (6.96144 [ns/kmer]) +=== step 6 (merging minimizers tuples): 33.7842 [sec] (29.3674 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 1586083 +bits_for_list_id = 21 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 2035626/209937048 (0.969636%) +num_buckets_in_skew_index 2517/209937048 (0.00119893%) +max_bucket_size 5316 +log2_max_bucket_size 13 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 5736623/213990360 (2.68079%) +num_minimizer_positions_of_buckets_in_skew_index 354832/213990360 (0.165817%) +=== step 7.1 (build sparse index): 2.04958 [sec] (1.78163 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 846431 + partition = 1: num kmers in buckets of size > 128 and <= 256: 463901 + partition = 2: num kmers in buckets of size > 256 and <= 512: 298968 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 116879 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 120430 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 42891 + partition = 6: num kmers in buckets of size > 4096 and <= 5316: 9813 +num kmers in skew index = 1899313 (0.1651%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 846431 + building MPHF with 64 threads and 1 partitions (avg. partition size = 846431)... + built mphs[0] for 846431 kmers; bits/key = 2.41876 + built positions[0] for 846431 kmers; bits/key = 7.00042 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 463901 + building MPHF with 64 threads and 1 partitions (avg. partition size = 463901)... + built mphs[1] for 463901 kmers; bits/key = 2.42059 + built positions[1] for 463901 kmers; bits/key = 8.00074 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 298968 + building MPHF with 64 threads and 1 partitions (avg. partition size = 298968)... + built mphs[2] for 298968 kmers; bits/key = 2.42332 + built positions[2] for 298968 kmers; bits/key = 9.0012 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 116879 + building MPHF with 64 threads and 1 partitions (avg. partition size = 116879)... + built mphs[3] for 116879 kmers; bits/key = 2.57716 + built positions[3] for 116879 kmers; bits/key = 10.0031 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 120430 + building MPHF with 64 threads and 1 partitions (avg. partition size = 120430)... + built mphs[4] for 120430 kmers; bits/key = 2.43408 + built positions[4] for 120430 kmers; bits/key = 11.0027 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 42891 + building MPHF with 64 threads and 1 partitions (avg. partition size = 42891)... + built mphs[5] for 42891 kmers; bits/key = 2.46579 + built positions[5] for 42891 kmers; bits/key = 12.0089 + lower = 4096; upper = 5316; num_bits_per_pos = 13; num_kmers_in_partition = 9813 + building MPHF with 64 threads and 1 partitions (avg. partition size = 9813)... + built mphs[6] for 9813 kmers; bits/key = 2.48487 + built positions[6] for 9813 kmers; bits/key = 13.0374 +=== step 7.2 (build skew index): 1.05428 [sec] (0.916444 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.0292% +buckets with 2 minimizer positions = 0.755504% +buckets with 3 minimizer positions = 0.0986639% +buckets with 4 minimizer positions = 0.0387669% +buckets with 5 minimizer positions = 0.0208944% +buckets with 6 minimizer positions = 0.0129548% +buckets with 7 minimizer positions = 0.00864497% +buckets with 8 minimizer positions = 0.00604515% +buckets with 9 minimizer positions = 0.00458137% +buckets with 10 minimizer positions = 0.00347628% +buckets with 11 minimizer positions = 0.00277321% +buckets with 12 minimizer positions = 0.00222448% +buckets with 13 minimizer positions = 0.00183484% +buckets with 14 minimizer positions = 0.00150569% +buckets with 15 minimizer positions = 0.00123418% +buckets with 16 minimizer positions = 0.00103317% +max_bucket_size = 5316 +=== step 7 (build sparse and skew index): 3.59452 [sec] (3.12459 [ns/kmer]) +=== total time: 69.6232 [sec] (60.5209 [ns/kmer]) +total index size: 1246156075 [B] -- 1246.16 [MB] +SPACE BREAKDOWN: + mphf: 0.514173 [bits/kmer] (2.81753 [bits/key]) -- 5.93329% + strings_offsets: 0.100021 [bits/kmer] -- 1.15419% + control_codewords: 5.8397 [bits/kmer] -- 67.3871% + mid_load_buckets: 0.154586 [bits/kmer] -- 1.78384% + begin_buckets_of_size: 1.8637e-06 [bits/kmer] -- 2.15061e-05% + strings: 2.0304 [bits/kmer] -- 23.4298% + skew_index: 0.0270208 [bits/kmer] -- 0.311806% + weights: 1.27956e-06 [bits/kmer] -- 1.47654e-05% + -------------- + total: 8.6659 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 69.77 seconds +Peak RssAnon: 8420504 kB +Peak RssAnon: 8.03 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:06:07: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.26594 [sec] (2.89979 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.101.bin'... +=== step 2 (compute minimizer tuples): 3.21118 [sec] (1.28156 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 62.0903 [sec] (24.7799 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 64 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 19.3613 [sec] (7.72695 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.7321 [sec] (7.47584 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353167224952796.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 74.4429 [sec] (29.7097 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 8750986 +bits_for_list_id = 24 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 14266506/462224926 (3.08649%) +num_buckets_in_skew_index 60557/462224926 (0.0131012%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 53165974/511201278 (10.4002%) +num_minimizer_positions_of_buckets_in_skew_index 10137441/511201278 (1.98306%) +=== step 7.1 (build sparse index): 6.62725 [sec] (2.64489 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 2: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 7: num kmers in buckets of size > 8192 and <= 22085: 506148 +num kmers in skew index = 43578595 (1.73919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 14957205 kmers; bits/key = 2.56583 + built positions[0] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10906495 kmers; bits/key = 2.61744 + built positions[1] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 7473094 kmers; bits/key = 2.65359 + built positions[2] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 4774535 kmers; bits/key = 2.75085 + built positions[3] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[4] for 2638087 kmers; bits/key = 2.55989 + built positions[4] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[5] for 1593261 kmers; bits/key = 2.56041 + built positions[5] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 64 threads and 1 partitions (avg. partition size = 729770)... + built mphs[6] for 729770 kmers; bits/key = 2.56195 + built positions[6] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 506148 + building MPHF with 64 threads and 1 partitions (avg. partition size = 506148)... + built mphs[7] for 506148 kmers; bits/key = 2.42048 + built positions[7] for 506148 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 8.79966 [sec] (3.51189 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 16.6559 [sec] (6.64725 [ns/kmer]) +=== total time: 201.76 [sec] (80.5209 [ns/kmer]) +total index size: 3135788878 [B] -- 3135.79 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.2262% + strings_offsets: 0.153147 [bits/kmer] -- 1.52966% + control_codewords: 6.08754 [bits/kmer] -- 60.8038% + mid_load_buckets: 0.678982 [bits/kmer] -- 6.78183% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 8.54649e-06% + strings: 2.24545 [bits/kmer] -- 22.4281% + skew_index: 0.323422 [bits/kmer] -- 3.23041% + weights: 5.87466e-07 [bits/kmer] -- 5.86774e-06% + -------------- + total: 10.0118 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 202.09 seconds +Peak RssAnon: 19241048 kB +Peak RssAnon: 18.35 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:09:29: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +read 1000000 sequences, 289026795 bases, 259026795 kmers +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +cost: 2.0 + 0.262431 [bits/kmer] +max string length = 234900 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 18 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.11759 [sec] (2.9707 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.626972 [sec] (1.66657 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.bin' +=== step 3 (merging minimizer tuples): 3.50409 [sec] (9.31432 [ns/kmer]) +num_minimizers = 61900353 +num_minimizer_positions = 66937395 +num_super_kmers = 69800288 +building minimizers MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.90586 [sec] (7.72415 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353369333485336.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.58012 [sec] (6.85827 [ns/kmer]) +=== step 6 (merging minimizers tuples): 5.73968 [sec] (15.2568 [ns/kmer]) +num_bits_per_offset = 29 +max_list_id = 2788316 +bits_for_list_id = 22 +num_bits_for_control = 30 +num_buckets_larger_than_1_not_in_skew_index 3624138/61900353 (5.85479%) +num_buckets_in_skew_index 39/61900353 (6.30045e-05%) +max_bucket_size 536 +log2_max_bucket_size 10 +num_partitions in skew index 4 +num_minimizer_positions_of_buckets_larger_than_1 8656439/66937395 (12.9321%) +num_minimizer_positions_of_buckets_in_skew_index 4780/66937395 (0.007141%) +=== step 7.1 (build sparse index): 0.987218 [sec] (2.62415 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 13913 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2852 + partition = 2: num kmers in buckets of size > 256 and <= 512: 3765 + partition = 3: num kmers in buckets of size > 512 and <= 536: 1800 +num kmers in skew index = 22330 (0.00593559%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 13913 + building MPHF with 64 threads and 1 partitions (avg. partition size = 13913)... + built mphs[0] for 13913 kmers; bits/key = 2.42421 + built positions[0] for 13913 kmers; bits/key = 7.02422 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2852 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2852)... + built mphs[1] for 2852 kmers; bits/key = 3.01823 + built positions[1] for 2852 kmers; bits/key = 8.12342 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 3765 + building MPHF with 64 threads and 1 partitions (avg. partition size = 3765)... + built mphs[2] for 3765 kmers; bits/key = 2.85578 + built positions[2] for 3765 kmers; bits/key = 9.09429 + lower = 512; upper = 536; num_bits_per_pos = 10; num_kmers_in_partition = 1800 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1800)... + built mphs[3] for 1800 kmers; bits/key = 3.48444 + built positions[3] for 1800 kmers; bits/key = 10.2044 +=== step 7.2 (build skew index): 0.039684 [sec] (0.105485 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 94.1451% +buckets with 2 minimizer positions = 4.50452% +buckets with 3 minimizer positions = 0.894247% +buckets with 4 minimizer positions = 0.259036% +buckets with 5 minimizer positions = 0.0977103% +buckets with 6 minimizer positions = 0.0440321% +buckets with 7 minimizer positions = 0.0222034% +buckets with 8 minimizer positions = 0.0119789% +buckets with 9 minimizer positions = 0.00695473% +buckets with 10 minimizer positions = 0.00425684% +buckets with 11 minimizer positions = 0.00288528% +buckets with 12 minimizer positions = 0.00173505% +buckets with 13 minimizer positions = 0.00116639% +buckets with 14 minimizer positions = 0.000827136% +buckets with 15 minimizer positions = 0.000639738% +buckets with 16 minimizer positions = 0.000463648% +max_bucket_size = 536 +=== step 7 (build sparse and skew index): 1.19676 [sec] (3.18114 [ns/kmer]) +=== total time: 17.6711 [sec] (46.9719 [ns/kmer]) +total index size: 398891769 [B] -- 398.892 [MB] +SPACE BREAKDOWN: + mphf: 0.472086 [bits/kmer] (2.86915 [bits/key]) -- 5.56546% + strings_offsets: 0.14346 [bits/kmer] -- 1.69126% + control_codewords: 4.93617 [bits/kmer] -- 58.1928% + mid_load_buckets: 0.667288 [bits/kmer] -- 7.8667% + begin_buckets_of_size: 5.69902e-06 [bits/kmer] -- 6.71861e-05% + strings: 2.26243 [bits/kmer] -- 26.672% + skew_index: 0.000988652 [bits/kmer] -- 0.0116553% + weights: 3.91276e-06 [bits/kmer] -- 4.61278e-05% + -------------- + total: 8.48243 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 17.84 seconds +Peak RssAnon: 3374668 kB +Peak RssAnon: 3.22 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:09:47: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.56365 [sec] (5.10299 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.64.bin'... +=== step 2 (compute minimizer tuples): 1.32295 [sec] (1.4793 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.705 [sec] (11.9701 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 64 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.1792 [sec] (6.90946 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353387189481513.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.90867 [sec] (7.72514 [ns/kmer]) +=== step 6 (merging minimizers tuples): 28.3676 [sec] (31.72 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 10738776 +bits_for_list_id = 24 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 18123968/143418843 (12.6371%) +num_buckets_in_skew_index 15172/143418843 (0.0105788%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 65664193/193511241 (33.933%) +num_minimizer_positions_of_buckets_in_skew_index 2567345/193511241 (1.32672%) +=== step 7.1 (build sparse index): 4.1144 [sec] (4.60064 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 7: num kmers in buckets of size > 8192 and <= 30655: 459571 +num kmers in skew index = 9919638 (1.10919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4240400 kmers; bits/key = 2.84282 + built positions[0] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[1] for 1914946 kmers; bits/key = 2.56017 + built positions[1] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[2] for 1106165 kmers; bits/key = 2.56093 + built positions[2] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 64 threads and 1 partitions (avg. partition size = 771672)... + built mphs[3] for 771672 kmers; bits/key = 2.41893 + built positions[3] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 64 threads and 1 partitions (avg. partition size = 562721)... + built mphs[4] for 562721 kmers; bits/key = 2.56295 + built positions[4] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 64 threads and 1 partitions (avg. partition size = 475654)... + built mphs[5] for 475654 kmers; bits/key = 2.56365 + built positions[5] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 64 threads and 1 partitions (avg. partition size = 388509)... + built mphs[6] for 388509 kmers; bits/key = 2.42185 + built positions[6] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 459571 + building MPHF with 64 threads and 1 partitions (avg. partition size = 459571)... + built mphs[7] for 459571 kmers; bits/key = 2.4208 + built positions[7] for 459571 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 3.72671 [sec] (4.16713 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 8.38385 [sec] (9.37466 [ns/kmer]) +=== total time: 66.4309 [sec] (74.2817 [ns/kmer]) +total index size: 1287008500 [B] -- 1287.01 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.93723% + strings_offsets: 0.333373 [bits/kmer] -- 2.89565% + control_codewords: 5.13178 [bits/kmer] -- 44.5743% + mid_load_buckets: 2.27616 [bits/kmer] -- 19.7706% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.08235e-05% + strings: 3.10303 [bits/kmer] -- 26.9527% + skew_index: 0.215227 [bits/kmer] -- 1.86944% + weights: 1.64596e-06 [bits/kmer] -- 1.42967e-05% + -------------- + total: 11.5129 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 66.56 seconds +Peak RssAnon: 7860832 kB +Peak RssAnon: 7.50 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:10:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +read 1000000 sequences, 159860354 bases, 129860354 kmers +read 2000000 sequences, 345519042 bases, 285519042 kmers +read 3000000 sequences, 569210425 bases, 479210425 kmers +read 4000000 sequences, 848332212 bases, 728332212 kmers +read 5000000 sequences, 1226889961 bases, 1076889961 kmers +read 6000000 sequences, 1800462808 bases, 1620462808 kmers +read 7000000 sequences, 1906975392 bases, 1696975392 kmers +read 8000000 sequences, 1964117272 bases, 1724117272 kmers +read 9000000 sequences, 2021346703 bases, 1751346703 kmers +read 10000000 sequences, 2078777105 bases, 1778777105 kmers +read 11000000 sequences, 2136245853 bases, 1806245853 kmers +read 12000000 sequences, 2193864516 bases, 1833864516 kmers +read 13000000 sequences, 2251713140 bases, 1861713140 kmers +read 14000000 sequences, 2309685311 bases, 1889685311 kmers +read 15000000 sequences, 2367830861 bases, 1917830861 kmers +read 16000000 sequences, 2426185107 bases, 1946185107 kmers +read 17000000 sequences, 2484756357 bases, 1974756357 kmers +read 18000000 sequences, 2543560790 bases, 2003560790 kmers +read 19000000 sequences, 2602544828 bases, 2032544828 kmers +read 20000000 sequences, 2661829332 bases, 2061829332 kmers +read 21000000 sequences, 2721408473 bases, 2091408473 kmers +read 22000000 sequences, 2781228842 bases, 2121228842 kmers +read 23000000 sequences, 2841415119 bases, 2151415119 kmers +read 24000000 sequences, 2901936379 bases, 2181936379 kmers +read 25000000 sequences, 2962750749 bases, 2212750749 kmers +read 26000000 sequences, 3023914429 bases, 2243914429 kmers +read 27000000 sequences, 3085556058 bases, 2275556058 kmers +read 28000000 sequences, 3147523815 bases, 2307523815 kmers +read 29000000 sequences, 3209891758 bases, 2339891758 kmers +read 30000000 sequences, 3272761181 bases, 2372761181 kmers +read 31000000 sequences, 3336150965 bases, 2406150965 kmers +read 32000000 sequences, 3400254734 bases, 2440254734 kmers +read 33000000 sequences, 3464886783 bases, 2474886783 kmers +read 34000000 sequences, 3530247184 bases, 2510247184 kmers +read 35000000 sequences, 3596273843 bases, 2546273843 kmers +read 36000000 sequences, 3663044813 bases, 2583044813 kmers +read 37000000 sequences, 3730743513 bases, 2620743513 kmers +read 38000000 sequences, 3799297920 bases, 2659297920 kmers +read 39000000 sequences, 3869022100 bases, 2699022100 kmers +read 40000000 sequences, 3939899906 bases, 2739899906 kmers +read 41000000 sequences, 4011944353 bases, 2781944353 kmers +read 42000000 sequences, 4085447760 bases, 2825447760 kmers +read 43000000 sequences, 4160667187 bases, 2870667187 kmers +read 44000000 sequences, 4237696486 bases, 2917696486 kmers +read 45000000 sequences, 4316730755 bases, 2966730755 kmers +read 46000000 sequences, 4398064724 bases, 3018064724 kmers +read 47000000 sequences, 4482251464 bases, 3072251464 kmers +read 48000000 sequences, 4569570617 bases, 3129570617 kmers +read 49000000 sequences, 4660631625 bases, 3190631625 kmers +read 50000000 sequences, 4756246344 bases, 3256246344 kmers +read 51000000 sequences, 4856753463 bases, 3326753463 kmers +read 52000000 sequences, 4964398717 bases, 3404398717 kmers +read 53000000 sequences, 5079791551 bases, 3489791551 kmers +read 54000000 sequences, 5205070836 bases, 3585070836 kmers +read 55000000 sequences, 5343495625 bases, 3693495625 kmers +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +cost: 2.0 + 0.890898 [bits/kmer] +max string length = 17920 +num bits per_absolute_offset = 33 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 16.8933 [sec] (4.54349 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.133.bin'... +=== step 2 (compute minimizer tuples): 9.31138 [sec] (2.50432 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +=== step 3 (merging minimizer tuples): 59.5137 [sec] (16.0064 [ns/kmer]) +num_minimizers = 619508590 +num_minimizer_positions = 790834640 +num_super_kmers = 819080133 +building minimizers MPHF with 64 threads and 207 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 25.261 [sec] (6.79404 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 26.9123 [sec] (7.23816 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773353453765299752.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +=== step 6 (merging minimizers tuples): 134.502 [sec] (36.1746 [ns/kmer]) +num_bits_per_offset = 33 +max_list_id = 62946870 +bits_for_list_id = 26 +num_bits_for_control = 34 +num_buckets_larger_than_1_not_in_skew_index 81104016/619508590 (13.0917%) +num_buckets_in_skew_index 149851/619508590 (0.0241887%) +max_bucket_size 71241 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 225181159/790834640 (28.4739%) +num_minimizer_positions_of_buckets_in_skew_index 27398758/790834640 (3.46454%) +=== step 7.1 (build sparse index): 15.9703 [sec] (4.29526 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 35558914 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26726484 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19151665 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 12830346 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 8265693 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 5225188 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3257832 + partition = 7: num kmers in buckets of size > 8192 and <= 71241: 4228970 +num kmers in skew index = 115245092 (3.09955%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 35558914 + building MPHF with 64 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[0] for 35558914 kmers; bits/key = 2.57422 + built positions[0] for 35558914 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26726484 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26726484 kmers; bits/key = 2.5656 + built positions[1] for 26726484 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19151665 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19151665 kmers; bits/key = 2.60993 + built positions[2] for 19151665 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 12830346 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 12830346 kmers; bits/key = 2.65495 + built positions[3] for 12830346 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 8265693 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 8265693 kmers; bits/key = 2.59459 + built positions[4] for 8265693 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 5225188 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 5225188 kmers; bits/key = 2.63154 + built positions[5] for 5225188 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3257832 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3257832 kmers; bits/key = 3.31153 + built positions[6] for 3257832 kmers; bits/key = 13.0001 + lower = 8192; upper = 71241; num_bits_per_pos = 17; num_kmers_in_partition = 4228970 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 4228970 kmers; bits/key = 2.84935 + built positions[7] for 4228970 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 15.5756 [sec] (4.18909 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 86.8841% +buckets with 2 minimizer positions = 10.1608% +buckets with 3 minimizer positions = 1.5937% +buckets with 4 minimizer positions = 0.465965% +buckets with 5 minimizer positions = 0.227622% +buckets with 6 minimizer positions = 0.138122% +buckets with 7 minimizer positions = 0.0932371% +buckets with 8 minimizer positions = 0.0672932% +buckets with 9 minimizer positions = 0.0507686% +buckets with 10 minimizer positions = 0.0394317% +buckets with 11 minimizer positions = 0.0314958% +buckets with 12 minimizer positions = 0.025655% +buckets with 13 minimizer positions = 0.0213997% +buckets with 14 minimizer positions = 0.017952% +buckets with 15 minimizer positions = 0.0152708% +buckets with 16 minimizer positions = 0.0132381% +max_bucket_size = 71241 +=== step 7 (build sparse and skew index): 33.6418 [sec] (9.04805 [ns/kmer]) +=== total time: 306.035 [sec] (82.309 [ns/kmer]) +total index size: 5544019788 [B] -- 5544.02 [MB] +SPACE BREAKDOWN: + mphf: 0.472233 [bits/kmer] (2.83421 [bits/key]) -- 3.95881% + strings_offsets: 0.300083 [bits/kmer] -- 2.51565% + control_codewords: 5.66504 [bits/kmer] -- 47.491% + mid_load_buckets: 1.99858 [bits/kmer] -- 16.7545% + begin_buckets_of_size: 5.76635e-07 [bits/kmer] -- 4.83404e-06% + strings: 2.8909 [bits/kmer] -- 24.2349% + skew_index: 0.601813 [bits/kmer] -- 5.04511% + weights: 3.95899e-07 [bits/kmer] -- 3.31889e-06% + -------------- + total: 11.9286 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 306.53 seconds +Peak RssAnon: 20230700 kB +Peak RssAnon: 19.29 GB +================================================== + diff --git a/benchmarks/rss_anon.k63.log b/benchmarks/rss_anon.k63.log new file mode 100644 index 0000000..36ab9a0 --- /dev/null +++ b/benchmarks/rss_anon.k63.log @@ -0,0 +1,2331 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:22:15: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +cost: 2.0 + 0.212662 [bits/kmer] +max string length = 46783 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 16 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.74024 [sec] (3.12664 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.337428 [sec] (0.606246 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.44362 [sec] (2.5937 [ns/kmer]) +num_minimizers = 24306363 +num_minimizer_positions = 28148872 +num_super_kmers = 28148872 +building minimizers MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.35895 [sec] (2.44158 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354135649824283.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 1.06611 [sec] (1.91545 [ns/kmer]) +=== step 6 (merging minimizers tuples): 2.21267 [sec] (3.97543 [ns/kmer]) +num_bits_per_offset = 30 +max_list_id = 548266 +bits_for_list_id = 20 +num_bits_for_control = 31 +num_buckets_larger_than_1_not_in_skew_index 957832/24306363 (3.94066%) +num_buckets_in_skew_index 3271/24306363 (0.0134574%) +max_bucket_size 414759 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 3653173/28148872 (12.978%) +num_minimizer_positions_of_buckets_in_skew_index 1150439/28148872 (4.08698%) +=== step 7.1 (build sparse index): 0.377633 [sec] (0.678481 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 3558021 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2129998 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1247229 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1130328 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1022110 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 790746 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 666283 + partition = 7: num kmers in buckets of size > 8192 and <= 414759: 3762004 +num kmers in skew index = 14306719 (2.57044%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 3558021 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 3558021 kmers; bits/key = 3.06724 + built positions[0] for 3558021 kmers; bits/key = 7.0001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2129998 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2129998)... + built mphs[1] for 2129998 kmers; bits/key = 2.56007 + built positions[1] for 2129998 kmers; bits/key = 8.00016 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1247229 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1247229)... + built mphs[2] for 1247229 kmers; bits/key = 2.56071 + built positions[2] for 1247229 kmers; bits/key = 9.00028 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1130328 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1130328)... + built mphs[3] for 1130328 kmers; bits/key = 2.56088 + built positions[3] for 1130328 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1022110 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1022110)... + built mphs[4] for 1022110 kmers; bits/key = 2.41826 + built positions[4] for 1022110 kmers; bits/key = 11.0004 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 790746 + building MPHF with 64 threads and 1 partitions (avg. partition size = 790746)... + built mphs[5] for 790746 kmers; bits/key = 2.4189 + built positions[5] for 790746 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 666283 + building MPHF with 64 threads and 1 partitions (avg. partition size = 666283)... + built mphs[6] for 666283 kmers; bits/key = 2.5622 + built positions[6] for 666283 kmers; bits/key = 13.0005 + lower = 8192; upper = 414759; num_bits_per_pos = 19; num_kmers_in_partition = 3762004 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 3762004 kmers; bits/key = 2.9235 + built positions[7] for 3762004 kmers; bits/key = 19.0001 +=== step 7.2 (build skew index): 4.71936 [sec] (8.47912 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.0459% +buckets with 2 minimizer positions = 2.25565% +buckets with 3 minimizer positions = 0.699874% +buckets with 4 minimizer positions = 0.315284% +buckets with 5 minimizer positions = 0.173597% +buckets with 6 minimizer positions = 0.108305% +buckets with 7 minimizer positions = 0.0716932% +buckets with 8 minimizer positions = 0.0520522% +buckets with 9 minimizer positions = 0.0388458% +buckets with 10 minimizer positions = 0.0301485% +buckets with 11 minimizer positions = 0.0240019% +buckets with 12 minimizer positions = 0.0204144% +buckets with 13 minimizer positions = 0.0165265% +buckets with 14 minimizer positions = 0.013984% +buckets with 15 minimizer positions = 0.0120051% +buckets with 16 minimizer positions = 0.00972996% +max_bucket_size = 414759 +=== step 7 (build sparse and skew index): 5.1736 [sec] (9.29524 [ns/kmer]) +=== total time: 13.3326 [sec] (23.9543 [ns/kmer]) +total index size: 308878216 [B] -- 308.878 [MB] +SPACE BREAKDOWN: + mphf: 0.130318 [bits/kmer] (2.98412 [bits/key]) -- 2.93534% + strings_offsets: 0.115799 [bits/kmer] -- 2.60831% + control_codewords: 1.35379 [bits/kmer] -- 30.4933% + mid_load_buckets: 0.196907 [bits/kmer] -- 4.43522% + begin_buckets_of_size: 3.85206e-06 [bits/kmer] -- 8.67656e-05% + strings: 2.21266 [bits/kmer] -- 49.8391% + skew_index: 0.430136 [bits/kmer] -- 9.68858% + weights: 2.6447e-06 [bits/kmer] -- 5.95704e-05% + -------------- + total: 4.43961 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 13.43 seconds +Peak RssAnon: 1470024 kB +Peak RssAnon: 1.40 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:22:29: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +cost: 2.0 + 0.0167212 [bits/kmer] +max string length = 261876 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 18 +=== step 1 (encode strings): 2.79124 [sec] (2.41614 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.49185 [sec] (0.425752 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.bin' +=== step 3 (merging minimizer tuples): 2.78586 [sec] (2.41148 [ns/kmer]) +num_minimizers = 55464592 +num_minimizer_positions = 56006004 +num_super_kmers = 56006004 +building minimizers MPHF with 64 threads and 19 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.62902 [sec] (2.27571 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354149091905546.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.02545 [sec] (1.75325 [ns/kmer]) +=== step 6 (merging minimizers tuples): 4.42841 [sec] (3.83329 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 140487 +bits_for_list_id = 18 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 208120/55464592 (0.37523%) +num_buckets_in_skew_index 490/55464592 (0.000883447%) +max_bucket_size 794 +log2_max_bucket_size 10 +num_partitions in skew index 4 +num_minimizer_positions_of_buckets_larger_than_1 687113/56006004 (1.22686%) +num_minimizer_positions_of_buckets_in_skew_index 62909/56006004 (0.112325%) +=== step 7.1 (build sparse index): 0.463921 [sec] (0.401576 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 604884 + partition = 1: num kmers in buckets of size > 128 and <= 256: 417805 + partition = 2: num kmers in buckets of size > 256 and <= 512: 221219 + partition = 3: num kmers in buckets of size > 512 and <= 794: 50229 +num kmers in skew index = 1294137 (0.112022%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 604884 + building MPHF with 64 threads and 1 partitions (avg. partition size = 604884)... + built mphs[0] for 604884 kmers; bits/key = 2.41964 + built positions[0] for 604884 kmers; bits/key = 7.00061 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 417805 + building MPHF with 64 threads and 1 partitions (avg. partition size = 417805)... + built mphs[1] for 417805 kmers; bits/key = 2.42142 + built positions[1] for 417805 kmers; bits/key = 8.00082 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 221219 + building MPHF with 64 threads and 1 partitions (avg. partition size = 221219)... + built mphs[2] for 221219 kmers; bits/key = 2.42569 + built positions[2] for 221219 kmers; bits/key = 9.00147 + lower = 512; upper = 794; num_bits_per_pos = 10; num_kmers_in_partition = 50229 + building MPHF with 64 threads and 1 partitions (avg. partition size = 50229)... + built mphs[3] for 50229 kmers; bits/key = 2.45946 + built positions[3] for 50229 kmers; bits/key = 10.0073 +=== step 7.2 (build skew index): 0.71437 [sec] (0.618368 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.6239% +buckets with 2 minimizer positions = 0.253291% +buckets with 3 minimizer positions = 0.0530717% +buckets with 4 minimizer positions = 0.0227569% +buckets with 5 minimizer positions = 0.012325% +buckets with 6 minimizer positions = 0.00748405% +buckets with 7 minimizer positions = 0.00509695% +buckets with 8 minimizer positions = 0.00359148% +buckets with 9 minimizer positions = 0.00270803% +buckets with 10 minimizer positions = 0.0021383% +buckets with 11 minimizer positions = 0.00159201% +buckets with 12 minimizer positions = 0.00125666% +buckets with 13 minimizer positions = 0.00109619% +buckets with 14 minimizer positions = 0.000941141% +buckets with 15 minimizer positions = 0.000795102% +buckets with 16 minimizer positions = 0.000685122% +max_bucket_size = 794 +=== step 7 (build sparse and skew index): 1.29619 [sec] (1.122 [ns/kmer]) +=== total time: 16.448 [sec] (14.2376 [ns/kmer]) +total index size: 551471063 [B] -- 551.471 [MB] +SPACE BREAKDOWN: + mphf: 0.138428 [bits/kmer] (2.88327 [bits/key]) -- 3.62483% + strings_offsets: 0.0958242 [bits/kmer] -- 2.50922% + control_codewords: 1.53635 [bits/kmer] -- 40.2303% + mid_load_buckets: 0.0184383 [bits/kmer] -- 0.482819% + begin_buckets_of_size: 1.85587e-06 [bits/kmer] -- 4.85973e-05% + strings: 2.01672 [bits/kmer] -- 52.8092% + skew_index: 0.0131204 [bits/kmer] -- 0.343567% + weights: 1.27418e-06 [bits/kmer] -- 3.33653e-05% + -------------- + total: 3.81888 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 16.54 seconds +Peak RssAnon: 2393784 kB +Peak RssAnon: 2.28 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:22:45: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.80963 [sec] (2.45718 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.07204 [sec] (0.386836 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 6.93808 [sec] (2.50353 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 64 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.15477 [sec] (1.86005 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354165643423020.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.89515 [sec] (1.76636 [ns/kmer]) +=== step 6 (merging minimizers tuples): 11.457 [sec] (4.13415 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 1804032 +bits_for_list_id = 21 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 3097190/122838669 (2.52135%) +num_buckets_in_skew_index 28203/122838669 (0.0229594%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12724460/140756047 (9.04008%) +num_minimizer_positions_of_buckets_in_skew_index 8318311/140756047 (5.90974%) +=== step 7.1 (build sparse index): 1.67258 [sec] (0.603534 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 7: num kmers in buckets of size > 8192 and <= 147936: 20255122 +num kmers in skew index = 145458128 (5.2487%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 25196923 kmers; bits/key = 2.56001 + built positions[0] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21919654 kmers; bits/key = 2.60671 + built positions[1] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19634878 kmers; bits/key = 2.55594 + built positions[2] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 18051454 kmers; bits/key = 2.60116 + built positions[3] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17018125 kmers; bits/key = 2.58264 + built positions[4] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 14085569 kmers; bits/key = 2.54674 + built positions[5] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9296403 kmers; bits/key = 2.72206 + built positions[6] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 20255122 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 20255122 kmers; bits/key = 2.57505 + built positions[7] for 20255122 kmers; bits/key = 18 +=== step 7.2 (build skew index): 16.944 [sec] (6.11405 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 18.9282 [sec] (6.83004 [ns/kmer]) +=== total time: 55.2549 [sec] (19.9381 [ns/kmer]) +total index size: 1647878160 [B] -- 1647.88 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.63218% + strings_offsets: 0.11255 [bits/kmer] -- 2.36601% + control_codewords: 1.46273 [bits/kmer] -- 30.7492% + mid_load_buckets: 0.146928 [bits/kmer] -- 3.08869% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.62633e-05% + strings: 2.11826 [bits/kmer] -- 44.5296% + skew_index: 0.791283 [bits/kmer] -- 16.6342% + weights: 5.31156e-07 [bits/kmer] -- 1.11659e-05% + -------------- + total: 4.75695 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 55.52 seconds +Peak RssAnon: 7262092 kB +Peak RssAnon: 6.93 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:23:41: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +cost: 2.0 + 0.240545 [bits/kmer] +max string length = 490374 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 19 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.19316 [sec] (2.89239 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.317057 [sec] (0.768593 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.03205 [sec] (2.50183 [ns/kmer]) +num_minimizers = 18448739 +num_minimizer_positions = 20311554 +num_super_kmers = 20311554 +building minimizers MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.10855 [sec] (2.68729 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354221176352543.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 0.779128 [sec] (1.88872 [ns/kmer]) +=== step 6 (merging minimizers tuples): 1.59312 [sec] (3.86196 [ns/kmer]) +num_bits_per_offset = 29 +max_list_id = 877290 +bits_for_list_id = 20 +num_bits_for_control = 30 +num_buckets_larger_than_1_not_in_skew_index 1211725/18448739 (6.56806%) +num_buckets_in_skew_index 16/18448739 (8.67268e-05%) +max_bucket_size 489 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 3072206/20311554 (15.1254%) +num_minimizer_positions_of_buckets_in_skew_index 2350/20311554 (0.0115698%) +=== step 7.1 (build sparse index): 0.298088 [sec] (0.72261 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 15469 + partition = 1: num kmers in buckets of size > 128 and <= 256: 14628 + partition = 2: num kmers in buckets of size > 256 and <= 489: 2787 +num kmers in skew index = 32884 (0.00797157%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 15469 + building MPHF with 64 threads and 1 partitions (avg. partition size = 15469)... + built mphs[0] for 15469 kmers; bits/key = 2.55168 + built positions[0] for 15469 kmers; bits/key = 7.02101 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 14628 + building MPHF with 64 threads and 1 partitions (avg. partition size = 14628)... + built mphs[1] for 14628 kmers; bits/key = 2.56166 + built positions[1] for 14628 kmers; bits/key = 8.02406 + lower = 256; upper = 489; num_bits_per_pos = 9; num_kmers_in_partition = 2787 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2787)... + built mphs[2] for 2787 kmers; bits/key = 3.06566 + built positions[2] for 2787 kmers; bits/key = 9.11661 +=== step 7.2 (build skew index): 0.038214 [sec] (0.0926364 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 93.4318% +buckets with 2 minimizer positions = 4.75528% +buckets with 3 minimizer positions = 1.08358% +buckets with 4 minimizer positions = 0.365494% +buckets with 5 minimizer positions = 0.158802% +buckets with 6 minimizer positions = 0.080412% +buckets with 7 minimizer positions = 0.0434393% +buckets with 8 minimizer positions = 0.0264408% +buckets with 9 minimizer positions = 0.0161962% +buckets with 10 minimizer positions = 0.0109113% +buckets with 11 minimizer positions = 0.00805475% +buckets with 12 minimizer positions = 0.00528491% +buckets with 13 minimizer positions = 0.00364795% +buckets with 14 minimizer positions = 0.00271563% +buckets with 15 minimizer positions = 0.00196219% +buckets with 16 minimizer positions = 0.00142015% +max_bucket_size = 489 +=== step 7 (build sparse and skew index): 0.388598 [sec] (0.942019 [ns/kmer]) +=== total time: 6.41166 [sec] (15.5428 [ns/kmer]) +total index size: 209086404 [B] -- 209.086 [MB] +SPACE BREAKDOWN: + mphf: 0.136459 [bits/kmer] (3.05124 [bits/key]) -- 3.36533% + strings_offsets: 0.119202 [bits/kmer] -- 2.93974% + control_codewords: 1.34168 [bits/kmer] -- 33.0881% + mid_load_buckets: 0.215978 [bits/kmer] -- 5.32641% + begin_buckets_of_size: 5.19738e-06 [bits/kmer] -- 0.000128177% + strings: 2.24055 [bits/kmer] -- 55.2559% + skew_index: 0.000982963 [bits/kmer] -- 0.0242417% + weights: 3.56835e-06 [bits/kmer] -- 8.80019e-05% + -------------- + total: 4.05485 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 6.51 seconds +Peak RssAnon: 1081740 kB +Peak RssAnon: 1.03 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:23:47: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.2009 [sec] (4.7222 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.818614 [sec] (0.53683 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.71675 [sec] (3.74892 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.00677 [sec] (1.97178 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354227704249751.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.42505 [sec] (2.24608 [ns/kmer]) +=== step 6 (merging minimizers tuples): 8.26592 [sec] (5.42062 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 5263874 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 9999764/61951224 (16.1414%) +num_buckets_in_skew_index 36489/61951224 (0.0588996%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47622255/105337248 (45.2093%) +num_minimizer_positions_of_buckets_in_skew_index 5800022/105337248 (5.50615%) +=== step 7.1 (build sparse index): 2.4385 [sec] (1.59912 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 2: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 7: num kmers in buckets of size > 8192 and <= 144478: 7418130 +num kmers in skew index = 75796687 (4.97059%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 64 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 32078766 kmers; bits/key = 2.51437 + built positions[0] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16092632 kmers; bits/key = 2.65398 + built positions[1] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8174536 kmers; bits/key = 2.61889 + built positions[2] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3454318 kmers; bits/key = 3.14686 + built positions[3] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[4] for 2781070 kmers; bits/key = 2.55987 + built positions[4] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[5] for 2981930 kmers; bits/key = 2.55978 + built positions[5] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[6] for 2815305 kmers; bits/key = 2.55984 + built positions[6] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 7418130 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 7418130 kmers; bits/key = 2.72795 + built positions[7] for 7418130 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 13.1522 [sec] (8.62496 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 15.8855 [sec] (10.4174 [ns/kmer]) +=== total time: 44.3195 [sec] (29.0638 [ns/kmer]) +total index size: 1294767218 [B] -- 1294.77 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.70902% + strings_offsets: 0.274587 [bits/kmer] -- 4.04242% + control_codewords: 1.34067 [bits/kmer] -- 19.737% + mid_load_buckets: 0.99935 [bits/kmer] -- 14.7122% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.06987e-05% + strings: 3.35283 [bits/kmer] -- 49.3596% + skew_index: 0.709128 [bits/kmer] -- 10.4396% + weights: 9.65307e-07 [bits/kmer] -- 1.4211e-05% + -------------- + total: 6.79265 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 44.58 seconds +Peak RssAnon: 5699240 kB +Peak RssAnon: 5.44 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +2026-03-12 23:24:32: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +read 1000000 sequences, 123618042 bases, 61618042 kmers +read 2000000 sequences, 248006699 bases, 124006699 kmers +read 3000000 sequences, 373165849 bases, 187165849 kmers +read 4000000 sequences, 499216806 bases, 251216806 kmers +read 5000000 sequences, 626504126 bases, 316504126 kmers +read 6000000 sequences, 754844978 bases, 382844978 kmers +read 7000000 sequences, 884611558 bases, 450611558 kmers +read 8000000 sequences, 1015719906 bases, 519719906 kmers +read 9000000 sequences, 1148384128 bases, 590384128 kmers +read 10000000 sequences, 1283305186 bases, 663305186 kmers +read 11000000 sequences, 1420392618 bases, 738392618 kmers +read 12000000 sequences, 1559905103 bases, 815905103 kmers +read 13000000 sequences, 1702344045 bases, 896344045 kmers +read 14000000 sequences, 1848153905 bases, 980153905 kmers +read 15000000 sequences, 1998487611 bases, 1068487611 kmers +read 16000000 sequences, 2153589528 bases, 1161589528 kmers +read 17000000 sequences, 2314472162 bases, 1260472162 kmers +read 18000000 sequences, 2483331066 bases, 1367331066 kmers +read 19000000 sequences, 2661730312 bases, 1483730312 kmers +read 20000000 sequences, 2852409810 bases, 1612409810 kmers +read 21000000 sequences, 3060194564 bases, 1758194564 kmers +read 22000000 sequences, 3290140238 bases, 1926140238 kmers +read 23000000 sequences, 3552570970 bases, 2126570970 kmers +read 24000000 sequences, 3863888905 bases, 2375888905 kmers +read 25000000 sequences, 4253358029 bases, 2703358029 kmers +read 26000000 sequences, 4780487647 bases, 3168487647 kmers +read 27000000 sequences, 5604484526 bases, 3930484526 kmers +read 28000000 sequences, 5925952935 bases, 4189952935 kmers +read 29000000 sequences, 6039783917 bases, 4241783917 kmers +read 30000000 sequences, 6153634902 bases, 4293634902 kmers +read 31000000 sequences, 6267684053 bases, 4345684053 kmers +read 32000000 sequences, 6381788267 bases, 4397788267 kmers +read 33000000 sequences, 6496092541 bases, 4450092541 kmers +read 34000000 sequences, 6610456809 bases, 4502456809 kmers +read 35000000 sequences, 6725025608 bases, 4555025608 kmers +read 36000000 sequences, 6839697388 bases, 4607697388 kmers +read 37000000 sequences, 6954566139 bases, 4660566139 kmers +read 38000000 sequences, 7069620814 bases, 4713620814 kmers +read 39000000 sequences, 7184856392 bases, 4766856392 kmers +read 40000000 sequences, 7300352498 bases, 4820352498 kmers +read 41000000 sequences, 7415987203 bases, 4873987203 kmers +read 42000000 sequences, 7531875755 bases, 4927875755 kmers +read 43000000 sequences, 7647987237 bases, 4981987237 kmers +read 44000000 sequences, 7764325565 bases, 5036325565 kmers +read 45000000 sequences, 7880919196 bases, 5090919196 kmers +read 46000000 sequences, 7997748943 bases, 5145748943 kmers +read 47000000 sequences, 8114852221 bases, 5200852221 kmers +read 48000000 sequences, 8232292777 bases, 5256292777 kmers +read 49000000 sequences, 8349993383 bases, 5311993383 kmers +read 50000000 sequences, 8468086161 bases, 5368086161 kmers +read 51000000 sequences, 8586456588 bases, 5424456588 kmers +read 52000000 sequences, 8705279881 bases, 5481279881 kmers +read 53000000 sequences, 8824571697 bases, 5538571697 kmers +read 54000000 sequences, 8944259928 bases, 5596259928 kmers +read 55000000 sequences, 9064361649 bases, 5654361649 kmers +read 56000000 sequences, 9185024212 bases, 5713024212 kmers +read 57000000 sequences, 9306137968 bases, 5772137968 kmers +read 58000000 sequences, 9427875971 bases, 5831875971 kmers +read 59000000 sequences, 9550182119 bases, 5892182119 kmers +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +cost: 2.0 + 1.2463 [bits/kmer] +max string length = 27681 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 27.7719 [sec] (4.68583 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.82.bin'... +=== step 2 (compute minimizer tuples): 6.51406 [sec] (1.09909 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 36.0692 [sec] (6.0858 [ns/kmer]) +num_minimizers = 274429863 +num_minimizer_positions = 406062800 +num_super_kmers = 406062800 +building minimizers MPHF with 64 threads and 92 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 11.7587 [sec] (1.98399 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354272303682221.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 13.6257 [sec] (2.29901 [ns/kmer]) +=== step 6 (merging minimizers tuples): 47.7857 [sec] (8.06266 [ns/kmer]) +num_bits_per_offset = 34 +max_list_id = 37640387 +bits_for_list_id = 26 +num_bits_for_control = 35 +num_buckets_larger_than_1_not_in_skew_index 51299487/274429863 (18.6931%) +num_buckets_in_skew_index 131946/274429863 (0.04808%) +max_bucket_size 176220 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 146053805/406062800 (35.9683%) +num_minimizer_positions_of_buckets_in_skew_index 37010565/406062800 (9.11449%) +=== step 7.1 (build sparse index): 9.0767 [sec] (1.53147 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 83316513 + partition = 1: num kmers in buckets of size > 128 and <= 256: 74231522 + partition = 2: num kmers in buckets of size > 256 and <= 512: 67271777 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 57125353 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 46013184 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 37921473 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 28349062 + partition = 7: num kmers in buckets of size > 8192 and <= 176220: 62824185 +num kmers in skew index = 457053069 (7.71165%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 83316513 + building MPHF with 64 threads and 28 partitions (avg. partition size = 3000000)... + built mphs[0] for 83316513 kmers; bits/key = 2.55669 + built positions[0] for 83316513 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 74231522 + building MPHF with 64 threads and 25 partitions (avg. partition size = 3000000)... + built mphs[1] for 74231522 kmers; bits/key = 2.57033 + built positions[1] for 74231522 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 67271777 + building MPHF with 64 threads and 23 partitions (avg. partition size = 3000000)... + built mphs[2] for 67271777 kmers; bits/key = 2.54468 + built positions[2] for 67271777 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 57125353 + building MPHF with 64 threads and 20 partitions (avg. partition size = 3000000)... + built mphs[3] for 57125353 kmers; bits/key = 2.53252 + built positions[3] for 57125353 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 46013184 + building MPHF with 64 threads and 16 partitions (avg. partition size = 3000000)... + built mphs[4] for 46013184 kmers; bits/key = 2.53117 + built positions[4] for 46013184 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 37921473 + building MPHF with 64 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[5] for 37921473 kmers; bits/key = 2.56414 + built positions[5] for 37921473 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 28349062 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[6] for 28349062 kmers; bits/key = 2.56358 + built positions[6] for 28349062 kmers; bits/key = 13 + lower = 8192; upper = 176220; num_bits_per_pos = 18; num_kmers_in_partition = 62824185 + building MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... + built mphs[7] for 62824185 kmers; bits/key = 2.55204 + built positions[7] for 62824185 kmers; bits/key = 18 +=== step 7.2 (build skew index): 59.5719 [sec] (10.0513 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 81.2588% +buckets with 2 minimizer positions = 13.7158% +buckets with 3 minimizer positions = 2.82467% +buckets with 4 minimizer positions = 0.819903% +buckets with 5 minimizer positions = 0.371916% +buckets with 6 minimizer positions = 0.218785% +buckets with 7 minimizer positions = 0.143859% +buckets with 8 minimizer positions = 0.101665% +buckets with 9 minimizer positions = 0.0751737% +buckets with 10 minimizer positions = 0.0573965% +buckets with 11 minimizer positions = 0.0451926% +buckets with 12 minimizer positions = 0.0368258% +buckets with 13 minimizer positions = 0.0299698% +buckets with 14 minimizer positions = 0.025193% +buckets with 15 minimizer positions = 0.0214499% +buckets with 16 minimizer positions = 0.0183803% +max_bucket_size = 176220 +=== step 7 (build sparse and skew index): 69.7098 [sec] (11.7618 [ns/kmer]) +=== total time: 213.235 [sec] (35.9782 [ns/kmer]) +total index size: 5431161974 [B] -- 5431.16 [MB] +SPACE BREAKDOWN: + mphf: 0.131165 [bits/kmer] (2.83273 [bits/key]) -- 1.78918% + strings_offsets: 0.273643 [bits/kmer] -- 3.73268% + control_codewords: 1.62062 [bits/kmer] -- 22.1063% + mid_load_buckets: 0.837862 [bits/kmer] -- 11.429% + begin_buckets_of_size: 3.61748e-07 [bits/kmer] -- 4.93449e-06% + strings: 3.2463 [bits/kmer] -- 44.2818% + skew_index: 1.22142 [bits/kmer] -- 16.661% + weights: 2.48364e-07 [bits/kmer] -- 3.38786e-06% + -------------- + total: 7.33101 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir +Wall-clock: 213.66 seconds +Peak RssAnon: 17533688 kB +Peak RssAnon: 16.72 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:28:05: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +cost: 2.0 + 0.212662 [bits/kmer] +max string length = 46783 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 16 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.44943 [sec] (2.60415 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.415712 [sec] (0.746897 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.87098 [sec] (3.36154 [ns/kmer]) +num_minimizers = 29275778 +num_minimizer_positions = 34590805 +num_super_kmers = 36307176 +building minimizers MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.68562 [sec] (3.02851 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354485981115104.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 1.3209 [sec] (2.37323 [ns/kmer]) +=== step 6 (merging minimizers tuples): 3.06287 [sec] (5.50296 [ns/kmer]) +num_bits_per_offset = 30 +max_list_id = 660184 +bits_for_list_id = 20 +num_bits_for_control = 31 +num_buckets_larger_than_1_not_in_skew_index 1189512/29275778 (4.06313%) +num_buckets_in_skew_index 5032/29275778 (0.0171883%) +max_bucket_size 815743 +log2_max_bucket_size 20 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 4643594/34590805 (13.4244%) +num_minimizer_positions_of_buckets_in_skew_index 1865977/34590805 (5.39443%) +=== step 7.1 (build sparse index): 0.503995 [sec] (0.905512 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4850737 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3133092 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1886121 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1113398 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 945186 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1019479 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 681501 + partition = 7: num kmers in buckets of size > 8192 and <= 815743: 5508330 +num kmers in skew index = 19137844 (3.43844%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4850737 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4850737 kmers; bits/key = 2.71418 + built positions[0] for 4850737 kmers; bits/key = 7.00007 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3133092 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3133092 kmers; bits/key = 3.15324 + built positions[1] for 3133092 kmers; bits/key = 8.00011 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1886121 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1886121)... + built mphs[2] for 1886121 kmers; bits/key = 2.5602 + built positions[2] for 1886121 kmers; bits/key = 9.00018 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1113398 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1113398)... + built mphs[3] for 1113398 kmers; bits/key = 2.56094 + built positions[3] for 1113398 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 945186 + building MPHF with 64 threads and 1 partitions (avg. partition size = 945186)... + built mphs[4] for 945186 kmers; bits/key = 2.56129 + built positions[4] for 945186 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1019479 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1019479)... + built mphs[5] for 1019479 kmers; bits/key = 2.56115 + built positions[5] for 1019479 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 681501 + building MPHF with 64 threads and 1 partitions (avg. partition size = 681501)... + built mphs[6] for 681501 kmers; bits/key = 2.41941 + built positions[6] for 681501 kmers; bits/key = 13.0005 + lower = 8192; upper = 815743; num_bits_per_pos = 20; num_kmers_in_partition = 5508330 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 5508330 kmers; bits/key = 2.59546 + built positions[7] for 5508330 kmers; bits/key = 20.0001 +=== step 7.2 (build skew index): 5.64635 [sec] (10.1446 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.9197% +buckets with 2 minimizer positions = 2.25505% +buckets with 3 minimizer positions = 0.743143% +buckets with 4 minimizer positions = 0.33953% +buckets with 5 minimizer positions = 0.188914% +buckets with 6 minimizer positions = 0.116602% +buckets with 7 minimizer positions = 0.0799535% +buckets with 8 minimizer positions = 0.0565963% +buckets with 9 minimizer positions = 0.0427691% +buckets with 10 minimizer positions = 0.0324774% +buckets with 11 minimizer positions = 0.0259498% +buckets with 12 minimizer positions = 0.0206382% +buckets with 13 minimizer positions = 0.017038% +buckets with 14 minimizer positions = 0.0148143% +buckets with 15 minimizer positions = 0.0124232% +buckets with 16 minimizer positions = 0.0107973% +max_bucket_size = 815743 +=== step 7 (build sparse and skew index): 6.23714 [sec] (11.2061 [ns/kmer]) +=== total time: 16.0427 [sec] (28.8234 [ns/kmer]) +total index size: 345440542 [B] -- 345.441 [MB] +SPACE BREAKDOWN: + mphf: 0.151285 [bits/kmer] (2.87621 [bits/key]) -- 3.04695% + strings_offsets: 0.115799 [bits/kmer] -- 2.33224% + control_codewords: 1.63057 [bits/kmer] -- 32.8403% + mid_load_buckets: 0.250291 [bits/kmer] -- 5.04096% + begin_buckets_of_size: 3.85206e-06 [bits/kmer] -- 7.75821e-05% + strings: 2.21266 [bits/kmer] -- 44.564% + skew_index: 0.604527 [bits/kmer] -- 12.1754% + weights: 2.6447e-06 [bits/kmer] -- 5.32653e-05% + -------------- + total: 4.96514 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 16.13 seconds +Peak RssAnon: 1509148 kB +Peak RssAnon: 1.44 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:28:22: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +cost: 2.0 + 0.0167212 [bits/kmer] +max string length = 261876 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 18 +=== step 1 (encode strings): 2.63507 [sec] (2.28095 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.665638 [sec] (0.576185 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.bin' +=== step 3 (merging minimizer tuples): 3.66864 [sec] (3.17563 [ns/kmer]) +num_minimizers = 68497878 +num_minimizer_positions = 69282395 +num_super_kmers = 73080845 +building minimizers MPHF with 64 threads and 23 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.07425 [sec] (2.66111 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354502129247878.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.63184 [sec] (2.27815 [ns/kmer]) +=== step 6 (merging minimizers tuples): 5.90969 [sec] (5.1155 [ns/kmer]) +num_bits_per_offset = 31 +max_list_id = 202837 +bits_for_list_id = 18 +num_bits_for_control = 32 +num_buckets_larger_than_1_not_in_skew_index 296941/68497878 (0.433504%) +num_buckets_in_skew_index 787/68497878 (0.00114894%) +max_bucket_size 1509 +log2_max_bucket_size 11 +num_partitions in skew index 5 +num_minimizer_positions_of_buckets_larger_than_1 973386/69282395 (1.40495%) +num_minimizer_positions_of_buckets_in_skew_index 108859/69282395 (0.157124%) +=== step 7.1 (build sparse index): 0.64748 [sec] (0.560467 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 854803 + partition = 1: num kmers in buckets of size > 128 and <= 256: 508726 + partition = 2: num kmers in buckets of size > 256 and <= 512: 353088 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 143157 + partition = 4: num kmers in buckets of size > 1024 and <= 1509: 45334 +num kmers in skew index = 1905108 (0.164909%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 854803 + building MPHF with 64 threads and 1 partitions (avg. partition size = 854803)... + built mphs[0] for 854803 kmers; bits/key = 2.5616 + built positions[0] for 854803 kmers; bits/key = 7.00044 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 508726 + building MPHF with 64 threads and 1 partitions (avg. partition size = 508726)... + built mphs[1] for 508726 kmers; bits/key = 2.42032 + built positions[1] for 508726 kmers; bits/key = 8.00066 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 353088 + building MPHF with 64 threads and 1 partitions (avg. partition size = 353088)... + built mphs[2] for 353088 kmers; bits/key = 2.56521 + built positions[2] for 353088 kmers; bits/key = 9.00091 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 143157 + building MPHF with 64 threads and 1 partitions (avg. partition size = 143157)... + built mphs[3] for 143157 kmers; bits/key = 2.4309 + built positions[3] for 143157 kmers; bits/key = 10.0026 + lower = 1024; upper = 1509; num_bits_per_pos = 11; num_kmers_in_partition = 45334 + building MPHF with 64 threads and 1 partitions (avg. partition size = 45334)... + built mphs[4] for 45334 kmers; bits/key = 2.46173 + built positions[4] for 45334 kmers; bits/key = 11.0074 +=== step 7.2 (build skew index): 1.05916 [sec] (0.916822 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.5653% +buckets with 2 minimizer positions = 0.296122% +buckets with 3 minimizer positions = 0.0602092% +buckets with 4 minimizer positions = 0.0251672% +buckets with 5 minimizer positions = 0.0136895% +buckets with 6 minimizer positions = 0.00842362% +buckets with 7 minimizer positions = 0.00572864% +buckets with 8 minimizer positions = 0.00416071% +buckets with 9 minimizer positions = 0.00301323% +buckets with 10 minimizer positions = 0.00248183% +buckets with 11 minimizer positions = 0.00184093% +buckets with 12 minimizer positions = 0.00158691% +buckets with 13 minimizer positions = 0.00117084% +buckets with 14 minimizer positions = 0.00105697% +buckets with 15 minimizer positions = 0.000887619% +buckets with 16 minimizer positions = 0.000756228% +max_bucket_size = 1509 +=== step 7 (build sparse and skew index): 1.86127 [sec] (1.61114 [ns/kmer]) +=== total time: 20.4464 [sec] (17.6987 [ns/kmer]) +total index size: 610013389 [B] -- 610.013 [MB] +SPACE BREAKDOWN: + mphf: 0.16806 [bits/kmer] (2.83441 [bits/key]) -- 3.97842% + strings_offsets: 0.0958242 [bits/kmer] -- 2.26841% + control_codewords: 1.89737 [bits/kmer] -- 44.9157% + mid_load_buckets: 0.0261201 [bits/kmer] -- 0.618333% + begin_buckets_of_size: 1.85587e-06 [bits/kmer] -- 4.39335e-05% + strings: 2.01672 [bits/kmer] -- 47.7411% + skew_index: 0.0201901 [bits/kmer] -- 0.477952% + weights: 1.27418e-06 [bits/kmer] -- 3.01633e-05% + -------------- + total: 4.22428 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 20.55 seconds +Peak RssAnon: 3736512 kB +Peak RssAnon: 3.56 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:28:42: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.80814 [sec] (2.45664 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.47058 [sec] (0.530642 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.20648 [sec] (3.32206 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 64 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.26281 [sec] (2.25987 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354522690016211.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.2835 [sec] (2.26733 [ns/kmer]) +=== step 6 (merging minimizers tuples): 17.9343 [sec] (6.4714 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 2392820 +bits_for_list_id = 22 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 4082749/149769567 (2.72602%) +num_buckets_in_skew_index 35781/149769567 (0.0238907%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 16495406/173272792 (9.51991%) +num_minimizer_positions_of_buckets_in_skew_index 11126349/173272792 (6.42129%) +=== step 7.1 (build sparse index): 2.23867 [sec] (0.807801 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 1: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 2: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 7: num kmers in buckets of size > 8192 and <= 284250: 28690575 +num kmers in skew index = 166030448 (5.99103%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 26742724 kmers; bits/key = 2.56429 + built positions[0] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 24475836 kmers; bits/key = 2.62316 + built positions[1] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 21113117 kmers; bits/key = 2.6904 + built positions[2] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 19260150 kmers; bits/key = 2.59757 + built positions[3] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17989259 kmers; bits/key = 2.53715 + built positions[4] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 15443443 kmers; bits/key = 2.69254 + built positions[5] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 12315344 kmers; bits/key = 2.67895 + built positions[6] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 28690575 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 28690575 kmers; bits/key = 2.55295 + built positions[7] for 28690575 kmers; bits/key = 19 +=== step 7.2 (build skew index): 20.3408 [sec] (7.33976 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 23.0008 [sec] (8.29958 [ns/kmer]) +=== total time: 70.9666 [sec] (25.6075 [ns/kmer]) +total index size: 1839839296 [B] -- 1839.84 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.87406% + strings_offsets: 0.11255 [bits/kmer] -- 2.11915% + control_codewords: 1.78341 [bits/kmer] -- 33.579% + mid_load_buckets: 0.19047 [bits/kmer] -- 3.58627% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.45665e-05% + strings: 2.11826 [bits/kmer] -- 39.8836% + skew_index: 0.95376 [bits/kmer] -- 17.9579% + weights: 5.31156e-07 [bits/kmer] -- 1.00009e-05% + -------------- + total: 5.31109 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 71.26 seconds +Peak RssAnon: 7519272 kB +Peak RssAnon: 7.17 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:29:53: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +cost: 2.0 + 0.240545 [bits/kmer] +max string length = 490374 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 19 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.11738 [sec] (2.7087 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.47132 [sec] (1.14255 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.36725 [sec] (3.31441 [ns/kmer]) +num_minimizers = 21966429 +num_minimizer_positions = 25033260 +num_super_kmers = 26367160 +building minimizers MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.29985 [sec] (3.15104 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354593964091768.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 0.989663 [sec] (2.39909 [ns/kmer]) +=== step 6 (merging minimizers tuples): 2.11013 [sec] (5.11526 [ns/kmer]) +num_bits_per_offset = 29 +max_list_id = 1207312 +bits_for_list_id = 21 +num_bits_for_control = 30 +num_buckets_larger_than_1_not_in_skew_index 1786727/21966429 (8.1339%) +num_buckets_in_skew_index 26/21966429 (0.000118362%) +max_bucket_size 442 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 4850053/25033260 (19.3744%) +num_minimizer_positions_of_buckets_in_skew_index 3531/25033260 (0.0141052%) +=== step 7.1 (build sparse index): 0.446954 [sec] (1.08348 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26291 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10791 + partition = 2: num kmers in buckets of size > 256 and <= 442: 19798 +num kmers in skew index = 56880 (0.0137886%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26291 + building MPHF with 64 threads and 1 partitions (avg. partition size = 26291)... + built mphs[0] for 26291 kmers; bits/key = 2.35214 + built positions[0] for 26291 kmers; bits/key = 7.0132 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10791 + building MPHF with 64 threads and 1 partitions (avg. partition size = 10791)... + built mphs[1] for 10791 kmers; bits/key = 2.61255 + built positions[1] for 10791 kmers; bits/key = 8.0304 + lower = 256; upper = 442; num_bits_per_pos = 9; num_kmers_in_partition = 19798 + building MPHF with 64 threads and 1 partitions (avg. partition size = 19798)... + built mphs[2] for 19798 kmers; bits/key = 2.5247 + built positions[2] for 19798 kmers; bits/key = 9.01909 +=== step 7.2 (build skew index): 0.0534 [sec] (0.12945 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 91.866% +buckets with 2 minimizer positions = 5.49617% +buckets with 3 minimizer positions = 1.46125% +buckets with 4 minimizer positions = 0.538881% +buckets with 5 minimizer positions = 0.252859% +buckets with 6 minimizer positions = 0.135052% +buckets with 7 minimizer positions = 0.0790661% +buckets with 8 minimizer positions = 0.0489884% +buckets with 9 minimizer positions = 0.031712% +buckets with 10 minimizer positions = 0.0217832% +buckets with 11 minimizer positions = 0.0157968% +buckets with 12 minimizer positions = 0.0113628% +buckets with 13 minimizer positions = 0.00857672% +buckets with 14 minimizer positions = 0.00628231% +buckets with 15 minimizer positions = 0.00510779% +buckets with 16 minimizer positions = 0.00396969% +max_bucket_size = 442 +=== step 7 (build sparse and skew index): 0.573345 [sec] (1.38987 [ns/kmer]) +=== total time: 7.92894 [sec] (19.2209 [ns/kmer]) +total index size: 229841550 [B] -- 229.842 [MB] +SPACE BREAKDOWN: + mphf: 0.157467 [bits/kmer] (2.95713 [bits/key]) -- 3.53273% + strings_offsets: 0.119202 [bits/kmer] -- 2.67427% + control_codewords: 1.5975 [bits/kmer] -- 35.8395% + mid_load_buckets: 0.340961 [bits/kmer] -- 7.64939% + begin_buckets_of_size: 5.19738e-06 [bits/kmer] -- 0.000116602% + strings: 2.24055 [bits/kmer] -- 50.2662% + skew_index: 0.00167864 [bits/kmer] -- 0.0376599% + weights: 3.56835e-06 [bits/kmer] -- 8.00552e-05% + -------------- + total: 4.45736 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 8.02 seconds +Peak RssAnon: 1339116 kB +Peak RssAnon: 1.28 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:30:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.20269 [sec] (4.72337 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.07252 [sec] (0.703337 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.29204 [sec] (4.78197 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 64 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.27801 [sec] (2.14965 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354601996505215.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.22848 [sec] (2.77294 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.9966 [sec] (7.21133 [ns/kmer]) +num_bits_per_offset = 32 +max_list_id = 6384394 +bits_for_list_id = 23 +num_bits_for_control = 33 +num_buckets_larger_than_1_not_in_skew_index 12190335/69577229 (17.5206%) +num_buckets_in_skew_index 86973/69577229 (0.125002%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 57511599/126350163 (45.5176%) +num_minimizer_positions_of_buckets_in_skew_index 11538643/126350163 (9.13227%) +=== step 7.1 (build sparse index): 3.12428 [sec] (2.04884 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 7: num kmers in buckets of size > 8192 and <= 245177: 10756294 +num kmers in skew index = 143622784 (9.41848%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 64 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[0] for 77399356 kmers; bits/key = 2.54863 + built positions[0] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26776297 kmers; bits/key = 2.54559 + built positions[1] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13974034 kmers; bits/key = 2.56375 + built positions[2] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6504754 kmers; bits/key = 2.78902 + built positions[3] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3263868 kmers; bits/key = 3.30619 + built positions[4] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[5] for 2527517 kmers; bits/key = 2.55992 + built positions[5] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[6] for 2420664 kmers; bits/key = 2.55997 + built positions[6] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 10756294 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10756294 kmers; bits/key = 2.64819 + built positions[7] for 10756294 kmers; bits/key = 18 +=== step 7.2 (build skew index): 21.593 [sec] (14.1602 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 25.0858 [sec] (16.4507 [ns/kmer]) +=== total time: 59.1561 [sec] (38.7933 [ns/kmer]) +total index size: 1481048960 [B] -- 1481.05 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.7032% + strings_offsets: 0.274587 [bits/kmer] -- 3.53397% + control_codewords: 1.5057 [bits/kmer] -- 19.3786% + mid_load_buckets: 1.20688 [bits/kmer] -- 15.5327% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.80953e-05% + strings: 3.35283 [bits/kmer] -- 43.1513% + skew_index: 1.2976 [bits/kmer] -- 16.7002% + weights: 9.65307e-07 [bits/kmer] -- 1.24236e-05% + -------------- + total: 7.76993 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 59.42 seconds +Peak RssAnon: 9646128 kB +Peak RssAnon: 9.20 GB +================================================== + +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +2026-03-12 23:31:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +read 1000000 sequences, 123618042 bases, 61618042 kmers +read 2000000 sequences, 248006699 bases, 124006699 kmers +read 3000000 sequences, 373165849 bases, 187165849 kmers +read 4000000 sequences, 499216806 bases, 251216806 kmers +read 5000000 sequences, 626504126 bases, 316504126 kmers +read 6000000 sequences, 754844978 bases, 382844978 kmers +read 7000000 sequences, 884611558 bases, 450611558 kmers +read 8000000 sequences, 1015719906 bases, 519719906 kmers +read 9000000 sequences, 1148384128 bases, 590384128 kmers +read 10000000 sequences, 1283305186 bases, 663305186 kmers +read 11000000 sequences, 1420392618 bases, 738392618 kmers +read 12000000 sequences, 1559905103 bases, 815905103 kmers +read 13000000 sequences, 1702344045 bases, 896344045 kmers +read 14000000 sequences, 1848153905 bases, 980153905 kmers +read 15000000 sequences, 1998487611 bases, 1068487611 kmers +read 16000000 sequences, 2153589528 bases, 1161589528 kmers +read 17000000 sequences, 2314472162 bases, 1260472162 kmers +read 18000000 sequences, 2483331066 bases, 1367331066 kmers +read 19000000 sequences, 2661730312 bases, 1483730312 kmers +read 20000000 sequences, 2852409810 bases, 1612409810 kmers +read 21000000 sequences, 3060194564 bases, 1758194564 kmers +read 22000000 sequences, 3290140238 bases, 1926140238 kmers +read 23000000 sequences, 3552570970 bases, 2126570970 kmers +read 24000000 sequences, 3863888905 bases, 2375888905 kmers +read 25000000 sequences, 4253358029 bases, 2703358029 kmers +read 26000000 sequences, 4780487647 bases, 3168487647 kmers +read 27000000 sequences, 5604484526 bases, 3930484526 kmers +read 28000000 sequences, 5925952935 bases, 4189952935 kmers +read 29000000 sequences, 6039783917 bases, 4241783917 kmers +read 30000000 sequences, 6153634902 bases, 4293634902 kmers +read 31000000 sequences, 6267684053 bases, 4345684053 kmers +read 32000000 sequences, 6381788267 bases, 4397788267 kmers +read 33000000 sequences, 6496092541 bases, 4450092541 kmers +read 34000000 sequences, 6610456809 bases, 4502456809 kmers +read 35000000 sequences, 6725025608 bases, 4555025608 kmers +read 36000000 sequences, 6839697388 bases, 4607697388 kmers +read 37000000 sequences, 6954566139 bases, 4660566139 kmers +read 38000000 sequences, 7069620814 bases, 4713620814 kmers +read 39000000 sequences, 7184856392 bases, 4766856392 kmers +read 40000000 sequences, 7300352498 bases, 4820352498 kmers +read 41000000 sequences, 7415987203 bases, 4873987203 kmers +read 42000000 sequences, 7531875755 bases, 4927875755 kmers +read 43000000 sequences, 7647987237 bases, 4981987237 kmers +read 44000000 sequences, 7764325565 bases, 5036325565 kmers +read 45000000 sequences, 7880919196 bases, 5090919196 kmers +read 46000000 sequences, 7997748943 bases, 5145748943 kmers +read 47000000 sequences, 8114852221 bases, 5200852221 kmers +read 48000000 sequences, 8232292777 bases, 5256292777 kmers +read 49000000 sequences, 8349993383 bases, 5311993383 kmers +read 50000000 sequences, 8468086161 bases, 5368086161 kmers +read 51000000 sequences, 8586456588 bases, 5424456588 kmers +read 52000000 sequences, 8705279881 bases, 5481279881 kmers +read 53000000 sequences, 8824571697 bases, 5538571697 kmers +read 54000000 sequences, 8944259928 bases, 5596259928 kmers +read 55000000 sequences, 9064361649 bases, 5654361649 kmers +read 56000000 sequences, 9185024212 bases, 5713024212 kmers +read 57000000 sequences, 9306137968 bases, 5772137968 kmers +read 58000000 sequences, 9427875971 bases, 5831875971 kmers +read 59000000 sequences, 9550182119 bases, 5892182119 kmers +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +cost: 2.0 + 1.2463 [bits/kmer] +max string length = 27681 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 26.6761 [sec] (4.50093 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.90.bin'... +=== step 2 (compute minimizer tuples): 10.8136 [sec] (1.82452 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 38.6367 [sec] (6.519 [ns/kmer]) +num_minimizers = 295344565 +num_minimizer_positions = 485764487 +num_super_kmers = 507036670 +building minimizers MPHF with 64 threads and 99 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 12.8853 [sec] (2.17409 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 16.5311 [sec] (2.78922 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1773354661433141535.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 70.2684 [sec] (11.8561 [ns/kmer]) +num_bits_per_offset = 34 +max_list_id = 51830759 +bits_for_list_id = 26 +num_bits_for_control = 35 +num_buckets_larger_than_1_not_in_skew_index 76095635/295344565 (25.765%) +num_buckets_in_skew_index 164193/295344565 (0.0555937%) +max_bucket_size 265182 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 218504008/485764487 (44.9815%) +num_minimizer_positions_of_buckets_in_skew_index 48175742/485764487 (9.91751%) +=== step 7.1 (build sparse index): 12.9437 [sec] (2.18394 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 91237805 + partition = 1: num kmers in buckets of size > 128 and <= 256: 81089595 + partition = 2: num kmers in buckets of size > 256 and <= 512: 71244043 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 64999827 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 55340829 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 44026748 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 33883659 + partition = 7: num kmers in buckets of size > 8192 and <= 265182: 85368523 +num kmers in skew index = 527191029 (8.89506%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 91237805 + building MPHF with 64 threads and 31 partitions (avg. partition size = 3000000)... + built mphs[0] for 91237805 kmers; bits/key = 2.54472 + built positions[0] for 91237805 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 81089595 + building MPHF with 64 threads and 28 partitions (avg. partition size = 3000000)... + built mphs[1] for 81089595 kmers; bits/key = 2.56263 + built positions[1] for 81089595 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 71244043 + building MPHF with 64 threads and 24 partitions (avg. partition size = 3000000)... + built mphs[2] for 71244043 kmers; bits/key = 2.56439 + built positions[2] for 71244043 kmers; bits/key = 9 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 64999827 + building MPHF with 64 threads and 22 partitions (avg. partition size = 3000000)... + built mphs[3] for 64999827 kmers; bits/key = 2.54652 + built positions[3] for 64999827 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 55340829 + building MPHF with 64 threads and 19 partitions (avg. partition size = 3000000)... + built mphs[4] for 55340829 kmers; bits/key = 2.53877 + built positions[4] for 55340829 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 44026748 + building MPHF with 64 threads and 15 partitions (avg. partition size = 3000000)... + built mphs[5] for 44026748 kmers; bits/key = 2.53894 + built positions[5] for 44026748 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 33883659 + building MPHF with 64 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[6] for 33883659 kmers; bits/key = 2.5671 + built positions[6] for 33883659 kmers; bits/key = 13 + lower = 8192; upper = 265182; num_bits_per_pos = 19; num_kmers_in_partition = 85368523 + building MPHF with 64 threads and 29 partitions (avg. partition size = 3000000)... + built mphs[7] for 85368523 kmers; bits/key = 2.56049 + built positions[7] for 85368523 kmers; bits/key = 19 +=== step 7.2 (build skew index): 70.9716 [sec] (11.9747 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 74.1794% +buckets with 2 minimizer positions = 17.5493% +buckets with 3 minimizer positions = 4.9171% +buckets with 4 minimizer positions = 1.47166% +buckets with 5 minimizer positions = 0.573611% +buckets with 6 minimizer positions = 0.301038% +buckets with 7 minimizer positions = 0.190827% +buckets with 8 minimizer positions = 0.133018% +buckets with 9 minimizer positions = 0.0984907% +buckets with 10 minimizer positions = 0.0753777% +buckets with 11 minimizer positions = 0.0590937% +buckets with 12 minimizer positions = 0.0475793% +buckets with 13 minimizer positions = 0.0389636% +buckets with 14 minimizer positions = 0.0324661% +buckets with 15 minimizer positions = 0.0273305% +buckets with 16 minimizer positions = 0.0232809% +max_bucket_size = 265182 +=== step 7 (build sparse and skew index): 85.3278 [sec] (14.397 [ns/kmer]) +=== total time: 261.139 [sec] (44.0608 [ns/kmer]) +total index size: 6028576020 [B] -- 6028.58 [MB] +SPACE BREAKDOWN: + mphf: 0.141443 [bits/kmer] (2.83839 [bits/key]) -- 1.73819% + strings_offsets: 0.273643 [bits/kmer] -- 3.36278% + control_codewords: 1.74413 [bits/kmer] -- 21.4335% + mid_load_buckets: 1.25348 [bits/kmer] -- 15.404% + begin_buckets_of_size: 3.61748e-07 [bits/kmer] -- 4.44549e-06% + strings: 3.2463 [bits/kmer] -- 39.8936% + skew_index: 1.4784 [bits/kmer] -- 18.168% + weights: 2.48364e-07 [bits/kmer] -- 3.05213e-06% + -------------- + total: 8.1374 [bits/kmer] +[TRACKER] Launching: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical + +================================================== + TRUE MEMORY USAGE REPORT (mmap ignored) +================================================== +Command: ./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir --canonical +Wall-clock: 261.68 seconds +Peak RssAnon: 21235956 kB +Peak RssAnon: 20.25 GB +================================================== + diff --git a/script/rss_anon.py b/script/rss_anon.py new file mode 100644 index 0000000..7632806 --- /dev/null +++ b/script/rss_anon.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import subprocess +import time +import sys +import re + +def main(): + if len(sys.argv) < 2: + print("Usage: python3 track_rss.py [args...]") + sys.exit(1) + + cmd = sys.argv[1:] + print(f"[TRACKER] Launching: {' '.join(cmd)}") + + # Start the process + start_time = time.time() + proc = subprocess.Popen(cmd) + pid = proc.pid + + max_rss_anon_kb = 0 + + # Regex to find the RssAnon line in the status file + rss_anon_pattern = re.compile(r'RssAnon:\s+(\d+)\s+kB') + + # Poll the process status while it is running + while proc.poll() is None: + try: + with open(f"/proc/{pid}/status", "r") as f: + content = f.read() + match = rss_anon_pattern.search(content) + if match: + current_rss_anon = int(match.group(1)) + if current_rss_anon > max_rss_anon_kb: + max_rss_anon_kb = current_rss_anon + except FileNotFoundError: + # The process finished and the /proc/[pid] directory is gone + break + except Exception as e: + pass + + # Check every 0.1 seconds to catch spikes without burning CPU + time.sleep(0.1) + + # Calculate elapsed time + elapsed_time = time.time() - start_time + + # Convert peak RssAnon to Gigabytes + max_rss_anon_gb = max_rss_anon_kb / (1024 * 1024) + + print("\n" + "="*50) + print(" TRUE MEMORY USAGE REPORT (mmap ignored)") + print("="*50) + print(f"Command: {' '.join(cmd)}") + print(f"Wall-clock: {elapsed_time:.2f} seconds") + print(f"Peak RssAnon: {max_rss_anon_kb} kB") + print(f"Peak RssAnon: {max_rss_anon_gb:.2f} GB") + print("="*50 + "\n") + + sys.exit(proc.returncode) + +if __name__ == "__main__": + main() diff --git a/script/sweep-m.py b/script/sweep-m.py new file mode 100644 index 0000000..2110f92 --- /dev/null +++ b/script/sweep-m.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +import os +import subprocess +import sys +from pathlib import Path + +if len(sys.argv) < 4: + print("Usage: python3 sweep-m.py ") + sys.exit(1) + +log_label = sys.argv[1] +datasets_dir = Path(sys.argv[2]).resolve() +index_dir = Path(sys.argv[3]).resolve() +tmp_dir = datasets_dir / "tmp_dir" +results_dir = Path(f"results-{log_label}") +threads = 16 +g = 16 + +# 1. Target datasets +datasets = ["human", "se"] + +# 2. Define the sweeps for m (minimizer length) +m_sweeps_k31 = { + "human": [17, 19, 21, 23, 25], + "se": [17, 19, 21, 23, 25] +} + +m_sweeps_k63 = { + "human": [21, 23, 25, 27, 29], + "se": [23, 25, 27, 29, 31] +} + +# --- Utilities --- +def run_cmd(cmd, cwd=None, append_to=None): + print(f"[RUN] {' '.join(cmd)}") + if append_to: + with open(append_to, "a") as f: + subprocess.run(cmd, cwd=cwd, stdout=f, stderr=f, check=True) + else: + subprocess.run(cmd, cwd=cwd, check=True) + +def build_project(max_k63: bool): + flag = "On" if max_k63 else "Off" + print(f"\n=== Building SSHASH (MAX_KMER_LENGTH_63={flag}) ===\n") + run_cmd([ + "cmake", "..", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_CXX_COMPILER=/usr/bin/g++", + "-DSSHASH_USE_ARCH_NATIVE=On", + "-DSSHASH_USE_SANITIZERS=Off", + f"-DSSHASH_USE_MAX_KMER_LENGTH_63={flag}" + ]) + run_cmd(["make", "-j"]) + +def build_sshash(k, canonical, dataset, m_val): + # Differentiate results dir by m_val + mode_dir = results_dir / f"m{m_val}" / f"k{k}" + mode_dir.mkdir(parents=True, exist_ok=True) + + mode = "canon" if canonical else "regular" + log_file = mode_dir / f"{mode}-build.log" + json_file = mode_dir / f"{mode}-build.json" + time_file = mode_dir / f"{mode}-build.time.log" + + input_file = datasets_dir / f"{dataset}.k{k}.eulertigs.fa.gz" + + # Append m_val to the output filename + output_file = index_dir / f"{dataset}.k{k}.m{m_val}" + if canonical: + output_file = str(output_file) + ".canon" + + print(f"\n>>> Building {dataset} (k={k}, m={m_val}, mode={mode})\n") + + # Clean tmp directory (should be empty after each build anyway) + subprocess.run(f"rm -rf {tmp_dir}/*", shell=True, check=True) + + cmd = [ + "/usr/bin/time", "-v", "-a", "-o", str(time_file), + "./sshash", "build", + "-i", str(input_file), + "-k", str(k), + "-m", str(m_val), + "-g", str(g), + "-t", str(threads), + "--verbose", + "-d", str(tmp_dir), + "-o", f"{output_file}.sshash" + ] + if canonical: + cmd.append("--canonical") + + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + +def run_bench(k, canonical, dataset, m_val, runs=3): + """Run SSHASH benchmark for a specific dataset and m.""" + mode = "canon" if canonical else "regular" + + # Store results in the specific m_val / k folder + out_dir = results_dir / f"m{m_val}" / f"k{k}" + out_dir.mkdir(parents=True, exist_ok=True) + log_file = out_dir / f"{mode}-bench.log" + json_file = out_dir / f"{mode}-bench.json" + + # Match the new index naming scheme that includes m_val + suffix = f".k{k}.m{m_val}.canon.sshash" if canonical else f".k{k}.m{m_val}.sshash" + index_path = index_dir / f"{dataset}{suffix}" + + print(f"\n>>> Benchmarking {dataset} (k={k}, m={m_val}, mode={mode})\n") + for i in range(runs): + print(f" ==> run {i+1}/{runs}") + cmd = ["./sshash", "bench", "-i", str(index_path)] + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + + +# --- Main pipeline --- +index_dir.mkdir(parents=True, exist_ok=True) +results_dir.mkdir(parents=True, exist_ok=True) +tmp_dir.mkdir(parents=True, exist_ok=True) + +print("\n=======================================================") +print(" STARTING SWEEP FOR m (MINIMIZER LENGTH) ") +print("=======================================================\n") + +# --- k = 31 Sweep --- +build_project(max_k63=False) + +for dataset in datasets: + for current_m in m_sweeps_k31[dataset]: + # Regular + build_sshash(31, False, dataset, current_m) + run_bench(31, False, dataset, current_m) + + # Canonical + build_sshash(31, True, dataset, current_m) + run_bench(31, True, dataset, current_m) + +# --- k = 63 Sweep --- +build_project(max_k63=True) + +for dataset in datasets: + for current_m in m_sweeps_k63[dataset]: + # Regular + build_sshash(63, False, dataset, current_m) + run_bench(63, False, dataset, current_m) + + # Canonical + build_sshash(63, True, dataset, current_m) + run_bench(63, True, dataset, current_m) + +# Restore default compilation at the end +print("\nRestoring default compilation (max_k63=False)...") +build_project(max_k63=False) + +print("\n All SSHash indexes built and benchmarked successfully across varying 'm' values. \n") diff --git a/script/sweep-min-l.py b/script/sweep-min-l.py new file mode 100644 index 0000000..f16729a --- /dev/null +++ b/script/sweep-min-l.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 + +import os +import re +import subprocess +import sys +from pathlib import Path + +if len(sys.argv) < 4: + print("Usage: python3 sweep-min-l.py ") + sys.exit(1) + +log_label = sys.argv[1] +datasets_dir = Path(sys.argv[2]).resolve() +index_dir = Path(sys.argv[3]).resolve() +tmp_dir = datasets_dir / "tmp_dir" +results_dir = Path(f"results-{log_label}") +threads = 16 +g = 16 + +# 1. Target only human and se datasets +datasets = [ + "human", "se" +] + +m_values_k31 = { + "human": 21, "se": 21 +} + +m_values_k63 = { + "human": 25, "se": 31 +} + +# Values of min_l to benchmark +l_values_to_test = [4, 5, 6, 7, 8] + +# Assuming script is run from the `build` directory +constants_hpp_path = Path("../include/constants.hpp").resolve() + +# --- Utilities --- +def run_cmd(cmd, cwd=None, append_to=None): + print(f"[RUN] {' '.join(cmd)}") + if append_to: + with open(append_to, "a") as f: + subprocess.run(cmd, cwd=cwd, stdout=f, stderr=f, check=True) + else: + subprocess.run(cmd, cwd=cwd, check=True) + +def update_constants_hpp(min_l): + """ + Updates the min_l and max_l values in include/constants.hpp. + max_l is set to min_l + 7 to satisfy the static_assert constraints. + """ + if not constants_hpp_path.exists(): + print(f"Error: Could not find {constants_hpp_path}") + sys.exit(1) + + with open(constants_hpp_path, 'r') as f: + content = f.read() + + # Update min_l + content = re.sub(r'constexpr uint64_t min_l = \d+;', + f'constexpr uint64_t min_l = {min_l};', + content) + + # Update max_l safely + max_l = min_l + 7 + content = re.sub(r'constexpr uint64_t max_l = \d+;', + f'constexpr uint64_t max_l = {max_l};', + content) + + with open(constants_hpp_path, 'w') as f: + f.write(content) + + print(f"\n[CONFIG] Updated {constants_hpp_path.name} -> min_l = {min_l}, max_l = {max_l}") + +def build_project(max_k63: bool): + flag = "On" if max_k63 else "Off" + print(f"\n=== Building SSHASH (MAX_KMER_LENGTH_63={flag}) ===\n") + run_cmd([ + "cmake", "..", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_CXX_COMPILER=/usr/bin/g++", + "-DSSHASH_USE_ARCH_NATIVE=On", + "-DSSHASH_USE_SANITIZERS=Off", + f"-DSSHASH_USE_MAX_KMER_LENGTH_63={flag}" + ]) + run_cmd(["make", "-j"]) + +def build_sshash(k, canonical, m_values, l_val): + # Differentiate results dir by l_val + mode_dir = results_dir / f"l{l_val}" / f"k{k}" + mode_dir.mkdir(parents=True, exist_ok=True) + + mode = "canon" if canonical else "regular" + log_file = mode_dir / f"{mode}-build.log" + json_file = mode_dir / f"{mode}-build.json" + time_file = mode_dir / f"{mode}-build.time.log" + + for dataset in datasets: + m_val = m_values[dataset] + input_file = datasets_dir / f"{dataset}.k{k}.eulertigs.fa.gz" + + # 2. Append l_val to the output filename + output_file = index_dir / f"{dataset}.k{k}.l{l_val}" + if canonical: + output_file = str(output_file) + ".canon" + + print(f"\n>>> Building {dataset} (k={k}, m={m_val}, l={l_val}, mode={mode})\n") + + # Clean tmp directory (should be empty after each build anyway) + subprocess.run(f"rm -rf {tmp_dir}/*", shell=True, check=True) + + cmd = [ + "/usr/bin/time", "-v", "-a", "-o", str(time_file), + "./sshash", "build", + "-i", str(input_file), + "-k", str(k), + "-m", str(m_val), + "-g", str(g), + "-t", str(threads), + "--verbose", + "-d", str(tmp_dir), + "-o", f"{output_file}.sshash" + ] + if canonical: + cmd.append("--canonical") + + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + +def run_bench(k, canonical, l_val, runs=3): + """Run SSHASH benchmark for all datasets.""" + mode = "canon" if canonical else "regular" + + # Store results in the specific l_val / k folder + out_dir = results_dir / f"l{l_val}" / f"k{k}" + out_dir.mkdir(parents=True, exist_ok=True) + log_file = out_dir / f"{mode}-bench.log" + json_file = out_dir / f"{mode}-bench.json" + + for dataset in datasets: + # Match the new index naming scheme that includes l_val + suffix = f".k{k}.l{l_val}.canon.sshash" if canonical else f".k{k}.l{l_val}.sshash" + index_path = index_dir / f"{dataset}{suffix}" + + print(f"\n>>> Benchmarking {dataset} (k={k}, l={l_val}, mode={mode})\n") + for i in range(runs): + print(f" ==> run {i+1}/{runs}") + cmd = ["./sshash", "bench", "-i", str(index_path)] + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + + +# --- Main pipeline --- +index_dir.mkdir(parents=True, exist_ok=True) +results_dir.mkdir(parents=True, exist_ok=True) + +for current_l in l_values_to_test: + print(f"\n=======================================================") + print(f" STARTING SWEEP FOR min_l = {current_l}") + print(f"=======================================================\n") + + # Update the header file + update_constants_hpp(current_l) + + # Build and benchmark for k = 31 (Regular) + build_project(max_k63=False) + build_sshash(31, False, m_values_k31, current_l) + run_bench(31, False, current_l) + + # Build and benchmark for k = 31 (Canonical) + build_sshash(31, True, m_values_k31, current_l) + run_bench(31, True, current_l) + + # Build and benchmark for k = 63 (Regular) + build_project(max_k63=True) + build_sshash(63, False, m_values_k63, current_l) + run_bench(63, False, current_l) + + # Build and benchmark for k = 63 (Canonical) + build_sshash(63, True, m_values_k63, current_l) + run_bench(63, True, current_l) + +# Restore default constants file at the end +print("\nRestoring default constants.hpp (min_l = 6)...") +update_constants_hpp(6) +build_project(max_k63=False) + +print("\n All SSHash indexes built and benchmarked successfully across varying 'min_l' values. \n") \ No newline at end of file From b34177127aa4d066e03a45faf324c7f312e0d09c Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Mon, 16 Mar 2026 21:54:41 +0100 Subject: [PATCH 2/9] more scripts --- script/plot-trade-off-l.py | 175 +++++++++++++++++++++++++++++++++++ script/plot-trade-off-m.py | 182 +++++++++++++++++++++++++++++++++++++ 2 files changed, 357 insertions(+) create mode 100644 script/plot-trade-off-l.py create mode 100644 script/plot-trade-off-m.py diff --git a/script/plot-trade-off-l.py b/script/plot-trade-off-l.py new file mode 100644 index 0000000..d6ceeca --- /dev/null +++ b/script/plot-trade-off-l.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +import re +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.ticker import MultipleLocator + +def parse_results(results_dir): + """ + Traverses the results directory and extracts space and time metrics. + Calculates bits/k-mer from bytes and num_kmers. + Extracts dataset name from filenames. + """ + results_dir = Path(results_dir) + data = [] + + # Regex to extract l and k from folder names + l_pattern = re.compile(r'l(\d+)') + k_pattern = re.compile(r'k(\d+)') + + for l_dir in results_dir.glob('l*'): + l_match = l_pattern.search(l_dir.name) + if not l_match: continue + l_val = int(l_match.group(1)) + + for k_dir in l_dir.glob('k*'): + k_match = k_pattern.search(k_dir.name) + if not k_match: continue + k_val = int(k_match.group(1)) + + for mode in ['regular', 'canon']: + build_json = k_dir / f"{mode}-build.json" + bench_json = k_dir / f"{mode}-bench.json" + + if not build_json.exists() or not bench_json.exists(): + continue + + # 1. Parse Build JSON for Space (bits/k-mer) + space_dict = {} + with open(build_json, 'r') as f: + for line in f: + try: + j = json.loads(line) + + # Extract dataset name from "/mnt/.../human.k31.eulertigs.fa.gz" + filename = os.path.basename(j.get("input_filename", "")) + ds = filename.split('.')[0] if filename else "unknown" + + if "index_size_in_bytes" in j and "num_kmers" in j: + bytes_size = float(j["index_size_in_bytes"]) + num_kmers = float(j["num_kmers"]) + # Calculate bits per k-mer + bits_per_kmer = (bytes_size * 8.0) / num_kmers + space_dict[ds] = bits_per_kmer + except json.JSONDecodeError: + continue + + # 2. Parse Bench JSON for Query Time (ns/kmer) + time_dict = {} + count_dict = {} + with open(bench_json, 'r') as f: + for line in f: + try: + j = json.loads(line) + + # Extract dataset name from ".../human.k31.l4.sshash" + filename = os.path.basename(j.get("index_filename", "")) + ds = filename.split('.')[0] if filename else "unknown" + + # Use positive lookup time + t_str = j.get("positive lookup (avg_nanosec_per_kmer)") + if t_str is not None: + t = float(t_str) + time_dict[ds] = time_dict.get(ds, 0.0) + t + count_dict[ds] = count_dict.get(ds, 0) + 1 + except json.JSONDecodeError: + continue + + # 3. Combine and store + for ds in space_dict.keys(): + if ds in time_dict and count_dict[ds] > 0: + # Average the benchmark runs + avg_time = time_dict[ds] / count_dict[ds] + data.append({ + 'Dataset': ds, + 'k': k_val, + 'l': l_val, + 'Mode': mode, + 'Space (bits/k-mer)': space_dict[ds], + 'Query Time (ns/k-mer)': avg_time + }) + + return pd.DataFrame(data) + +def plot_tradeoff(df, output_img="tradeoff_plot_l.png"): + """ + Generates a space-time trade-off plot. + Different lines for datasets/k/modes, points vary by 'l'. + """ + if df.empty: + print("No data parsed! Please check the JSON keys in the script.") + return + + # Enforce categorical order so the legend is populated exactly how we want: + # Human before SE, and regular before canon + df['Dataset'] = pd.Categorical(df['Dataset'], categories=['human', 'se'], ordered=True) + df['Mode'] = pd.Categorical(df['Mode'], categories=['regular', 'canon'], ordered=True) + df = df.sort_values(by=['Dataset', 'k', 'Mode']) + + plt.figure(figsize=(10, 8)) + plt.style.use('seaborn-v0_8-whitegrid') + + # Group by Dataset, k, and Mode with sort=False to preserve our categorical ordering + groups = df.groupby(['Dataset', 'k', 'Mode'], sort=False) + + for (dataset, k, mode), group in groups: + # Sort by l to make the line connect logically + group = group.sort_values(by='l') + + label = f"{'Human' if dataset == 'human' else 'SE'} (k={k}, {mode})" + + # Color logic: Red for Human, Blue for SE. Darker if canonical. + if dataset == 'human': + color = 'firebrick' if mode == 'canon' else 'lightcoral' + else: # se + color = 'royalblue' if mode == 'canon' else 'lightskyblue' + + # Marker logic: Circle for k=31, Square for k=63 + marker = 'o' if k == 31 else 's' + + # Plot line and scatter (linewidth=2.5 for thicker lines) + plt.plot(group['Space (bits/k-mer)'], group['Query Time (ns/k-mer)'], + linestyle='-', color=color, alpha=0.7, linewidth=2.5) + plt.scatter(group['Space (bits/k-mer)'], group['Query Time (ns/k-mer)'], + label=label, color=color, marker=marker, s=80, edgecolor='k', zorder=5) + + # Annotate ALL points with 'l' values + for _, row in group.iterrows(): + current_l = row['l'] + + plt.annotate(f"{int(current_l)}", + (row['Space (bits/k-mer)'], row['Query Time (ns/k-mer)']), + textcoords="offset points", + xytext=(5,5), + ha='left', fontsize=10) + + # Force x-axis ticks to be 1 unit (1 bit) apart + plt.gca().xaxis.set_major_locator(MultipleLocator(1)) + + # Make axis labels bold + plt.xlabel("Index Space (bits/k-mer)", fontsize=12, fontweight='bold') + plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=12, fontweight='bold') + plt.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.tight_layout() + + plt.savefig(output_img, dpi=300, bbox_inches='tight') + print(f"Plot saved successfully as '{output_img}'!") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 plot_tradeoff_l.py ") + sys.exit(1) + + results_directory = sys.argv[1] + df = parse_results(results_directory) + + print("Extracted Data:") + print(df.to_string(index=False)) + + plot_tradeoff(df, output_img="sshash_tradeoff_l.png") + \ No newline at end of file diff --git a/script/plot-trade-off-m.py b/script/plot-trade-off-m.py new file mode 100644 index 0000000..651efb8 --- /dev/null +++ b/script/plot-trade-off-m.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +import re +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.ticker import MultipleLocator + +def parse_results(results_dir): + """ + Traverses the results directory and extracts space and time metrics. + Calculates bits/k-mer from bytes and num_kmers. + Extracts dataset name from filenames. + """ + results_dir = Path(results_dir) + data = [] + + # Regex to extract m and k from folder names + m_pattern = re.compile(r'm(\d+)') + k_pattern = re.compile(r'k(\d+)') + + for m_dir in results_dir.glob('m*'): + m_match = m_pattern.search(m_dir.name) + if not m_match: continue + m_val = int(m_match.group(1)) + + for k_dir in m_dir.glob('k*'): + k_match = k_pattern.search(k_dir.name) + if not k_match: continue + k_val = int(k_match.group(1)) + + for mode in ['regular', 'canon']: + build_json = k_dir / f"{mode}-build.json" + bench_json = k_dir / f"{mode}-bench.json" + + if not build_json.exists() or not bench_json.exists(): + continue + + # 1. Parse Build JSON for Space (bits/k-mer) + space_dict = {} + with open(build_json, 'r') as f: + for line in f: + try: + j = json.loads(line) + + # Extract dataset name from "/mnt/.../human.k31.eulertigs.fa.gz" + filename = os.path.basename(j.get("input_filename", "")) + ds = filename.split('.')[0] if filename else "unknown" + + if "index_size_in_bytes" in j and "num_kmers" in j: + bytes_size = float(j["index_size_in_bytes"]) + num_kmers = float(j["num_kmers"]) + # Calculate bits per k-mer + bits_per_kmer = (bytes_size * 8.0) / num_kmers + space_dict[ds] = bits_per_kmer + except json.JSONDecodeError: + continue + + # 2. Parse Bench JSON for Query Time (ns/kmer) + time_dict = {} + count_dict = {} + with open(bench_json, 'r') as f: + for line in f: + try: + j = json.loads(line) + + # Extract dataset name from ".../human.k31.m17.sshash" + filename = os.path.basename(j.get("index_filename", "")) + ds = filename.split('.')[0] if filename else "unknown" + + # Use positive lookup time + t_str = j.get("positive lookup (avg_nanosec_per_kmer)") + if t_str is not None: + t = float(t_str) + time_dict[ds] = time_dict.get(ds, 0.0) + t + count_dict[ds] = count_dict.get(ds, 0) + 1 + except json.JSONDecodeError: + continue + + # 3. Combine and store + for ds in space_dict.keys(): + if ds in time_dict and count_dict[ds] > 0: + # Average the 3 benchmark runs + avg_time = time_dict[ds] / count_dict[ds] + data.append({ + 'Dataset': ds, + 'k': k_val, + 'm': m_val, + 'Mode': mode, + 'Space (bits/k-mer)': space_dict[ds], + 'Query Time (ns/k-mer)': avg_time + }) + + return pd.DataFrame(data) + +def plot_tradeoff(df, output_img="tradeoff_plot.png"): + """ + Generates a space-time trade-off plot. + Different lines for datasets/k/modes, points vary by 'm'. + """ + if df.empty: + print("No data parsed! Please check the JSON keys in the script.") + return + + # Enforce categorical order so the legend is populated exactly how we want: + # Human before SE, and regular before canon + df['Dataset'] = pd.Categorical(df['Dataset'], categories=['human', 'se'], ordered=True) + df['Mode'] = pd.Categorical(df['Mode'], categories=['regular', 'canon'], ordered=True) + df = df.sort_values(by=['Dataset', 'k', 'Mode']) + + plt.figure(figsize=(10, 8)) + plt.style.use('seaborn-v0_8-whitegrid') + + # Group by Dataset, k, and Mode with sort=False to preserve our categorical ordering + groups = df.groupby(['Dataset', 'k', 'Mode'], sort=False) + + for (dataset, k, mode), group in groups: + # Sort by m to make the line connect logically + group = group.sort_values(by='m') + + label = f"{'Human' if dataset == 'human' else 'SE'} (k={k}, {mode})" + + # Color logic: Red for Human, Blue for SE. Darker if canonical. + if dataset == 'human': + color = 'firebrick' if mode == 'canon' else 'lightcoral' + else: # se + color = 'royalblue' if mode == 'canon' else 'lightskyblue' + + # Marker logic: Circle for k=31, Square for k=63 + marker = 'o' if k == 31 else 's' + + # Plot line and scatter (added linewidth=2.5 for thicker lines) + plt.plot(group['Space (bits/k-mer)'], group['Query Time (ns/k-mer)'], + linestyle='-', color=color, alpha=0.7, linewidth=2.5) + plt.scatter(group['Space (bits/k-mer)'], group['Query Time (ns/k-mer)'], + label=label, color=color, marker=marker, s=80, edgecolor='k', zorder=5) + + # Find min and max m for this group to filter annotations + min_m = group['m'].min() + max_m = group['m'].max() + + # Annotate points with 'm' values + for _, row in group.iterrows(): + current_m = row['m'] + + # If k=63, ONLY annotate if m is the smallest or largest in this sweep + if row['k'] == 63 and current_m not in (min_m, max_m): + continue + + plt.annotate(f"{int(current_m)}", + (row['Space (bits/k-mer)'], row['Query Time (ns/k-mer)']), + textcoords="offset points", + xytext=(5,5), + ha='left', fontsize=10) + + # Force x-axis ticks to be 1 unit (1 bit) apart + plt.gca().xaxis.set_major_locator(MultipleLocator(1)) + + # Make axis labels bold + plt.xlabel("Index Space (bits/k-mer)", fontsize=12, fontweight='bold') + plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=12, fontweight='bold') + plt.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.tight_layout() + + plt.savefig(output_img, dpi=300, bbox_inches='tight') + print(f"Plot saved successfully as '{output_img}'!") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 plot_tradeoff.py ") + sys.exit(1) + + results_directory = sys.argv[1] + df = parse_results(results_directory) + + print("Extracted Data:") + print(df.to_string(index=False)) + + plot_tradeoff(df, output_img="sshash_tradeoff_m.png") From 5f72925d8fc0a1963bc279e4ab8b6f1b4fec5dfc Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Tue, 17 Mar 2026 22:15:30 +0100 Subject: [PATCH 3/9] minor --- script/plot-trade-off-l.py | 30 ++++++++++++++++++++---------- script/plot-trade-off-m.py | 29 ++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/script/plot-trade-off-l.py b/script/plot-trade-off-l.py index d6ceeca..8b56ea2 100644 --- a/script/plot-trade-off-l.py +++ b/script/plot-trade-off-l.py @@ -96,7 +96,7 @@ def parse_results(results_dir): return pd.DataFrame(data) -def plot_tradeoff(df, output_img="tradeoff_plot_l.png"): +def plot_tradeoff(df, output_img="tradeoff_plot_l.pdf"): """ Generates a space-time trade-off plot. Different lines for datasets/k/modes, points vary by 'l'. @@ -111,7 +111,7 @@ def plot_tradeoff(df, output_img="tradeoff_plot_l.png"): df['Mode'] = pd.Categorical(df['Mode'], categories=['regular', 'canon'], ordered=True) df = df.sort_values(by=['Dataset', 'k', 'Mode']) - plt.figure(figsize=(10, 8)) + plt.figure(figsize=(5, 10)) plt.style.use('seaborn-v0_8-whitegrid') # Group by Dataset, k, and Mode with sort=False to preserve our categorical ordering @@ -152,11 +152,22 @@ def plot_tradeoff(df, output_img="tradeoff_plot_l.png"): plt.gca().xaxis.set_major_locator(MultipleLocator(1)) # Make axis labels bold - plt.xlabel("Index Space (bits/k-mer)", fontsize=12, fontweight='bold') - plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=12, fontweight='bold') - plt.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xlabel("Index Space (bits/k-mer)", fontsize=10 + # , fontweight='bold' + ) + plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=10 + # , fontweight='bold' + ) + + # Legend modifications: Move above plot, centered, multi-column + plt.legend(title="Configuration", + bbox_to_anchor=(0.5, 1.02), + loc='lower center', + ncol=2, + borderaxespad=0.) + plt.tight_layout() - + plt.savefig(output_img, dpi=300, bbox_inches='tight') print(f"Plot saved successfully as '{output_img}'!") @@ -167,9 +178,8 @@ def plot_tradeoff(df, output_img="tradeoff_plot_l.png"): results_directory = sys.argv[1] df = parse_results(results_directory) - + print("Extracted Data:") print(df.to_string(index=False)) - - plot_tradeoff(df, output_img="sshash_tradeoff_l.png") - \ No newline at end of file + + plot_tradeoff(df, output_img="sshash_tradeoff_l.pdf") diff --git a/script/plot-trade-off-m.py b/script/plot-trade-off-m.py index 651efb8..0a1d079 100644 --- a/script/plot-trade-off-m.py +++ b/script/plot-trade-off-m.py @@ -96,7 +96,7 @@ def parse_results(results_dir): return pd.DataFrame(data) -def plot_tradeoff(df, output_img="tradeoff_plot.png"): +def plot_tradeoff(df, output_img="tradeoff_plot.pdf"): """ Generates a space-time trade-off plot. Different lines for datasets/k/modes, points vary by 'm'. @@ -111,7 +111,7 @@ def plot_tradeoff(df, output_img="tradeoff_plot.png"): df['Mode'] = pd.Categorical(df['Mode'], categories=['regular', 'canon'], ordered=True) df = df.sort_values(by=['Dataset', 'k', 'Mode']) - plt.figure(figsize=(10, 8)) + plt.figure(figsize=(5, 10)) plt.style.use('seaborn-v0_8-whitegrid') # Group by Dataset, k, and Mode with sort=False to preserve our categorical ordering @@ -160,11 +160,22 @@ def plot_tradeoff(df, output_img="tradeoff_plot.png"): plt.gca().xaxis.set_major_locator(MultipleLocator(1)) # Make axis labels bold - plt.xlabel("Index Space (bits/k-mer)", fontsize=12, fontweight='bold') - plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=12, fontweight='bold') - plt.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xlabel("Index Space (bits/k-mer)", fontsize=10 + # , fontweight='bold' + ) + plt.ylabel("Positive Lookup Time (ns/k-mer)", fontsize=10 + # , fontweight='bold' + ) + + # Legend modifications: Move above plot, centered, multi-column + plt.legend(title="Configuration", + bbox_to_anchor=(0.5, 1.02), + loc='lower center', + ncol=2, + borderaxespad=0.) + plt.tight_layout() - + plt.savefig(output_img, dpi=300, bbox_inches='tight') print(f"Plot saved successfully as '{output_img}'!") @@ -175,8 +186,8 @@ def plot_tradeoff(df, output_img="tradeoff_plot.png"): results_directory = sys.argv[1] df = parse_results(results_directory) - + print("Extracted Data:") print(df.to_string(index=False)) - - plot_tradeoff(df, output_img="sshash_tradeoff_m.png") + + plot_tradeoff(df, output_img="sshash_tradeoff_m.pdf") From 6c7eb756b975d60d4fe2263ac0bd8f8085fb587f Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 11:06:52 +0100 Subject: [PATCH 4/9] minor --- benchmarks/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 7c265aa..4217c8f 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -25,5 +25,5 @@ Queries were run using one thread, instead. The results can be exported to CSV format with - python3 ../script/print_csv.py ../benchmarks/results-10-11-25/k31 - python3 ../script/print_csv.py ../benchmarks/results-10-11-25/k63 + python3 ../script/print_csv.py ../benchmarks/results-21-01-26/k31 + python3 ../script/print_csv.py ../benchmarks/results-21-01-26/k63 From ab3253d2b11d1f75c16a718124c7c256f0736e0b Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 12:01:02 +0100 Subject: [PATCH 5/9] modified query script --- benchmarks/README.md | 2 +- ...{streaming-query-high-hit.py => streaming-query.py} | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) rename script/{streaming-query-high-hit.py => streaming-query.py} (86%) diff --git a/benchmarks/README.md b/benchmarks/README.md index 4217c8f..5e65e01 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -10,7 +10,7 @@ To run the benchmarks, from within the `build` directory, run python3 ../script/build.py python3 ../script/bench.py - python3 ../script/streaming-query-high-hit.py + python3 ../script/streaming-query.py where `` should be replaced by a suitable basename, e.g., the current date. diff --git a/script/streaming-query-high-hit.py b/script/streaming-query.py similarity index 86% rename from script/streaming-query-high-hit.py rename to script/streaming-query.py index 25008d5..3f90993 100644 --- a/script/streaming-query-high-hit.py +++ b/script/streaming-query.py @@ -24,7 +24,11 @@ "cod", "kestrel", "human", "ncbi-virus", "se", "hprc" ] -queries = {"cod":"SRR12858649", "kestrel":"SRR11449743_1", "human":"SRR5833294", "ncbi-virus":"ncbi-queries", "se":"SRR27871075_1", "hprc":"SRR5833294"} +# high hit +# queries = {"cod":"SRR12858649", "kestrel":"SRR11449743_1", "human":"SRR5833294", "ncbi-virus":"ncbi-queries", "se":"SRR27871075_1", "hprc":"SRR5833294"} + +# low hit +queries = {"cod":"SRR11449743_1", "kestrel":"SRR12858649", "human":"SRR5901135_1", "ncbi-virus":"SRR5833294", "se":"SRR5833294", "hprc":"SRR5901135_1"} # ------------------------------ # Utility functions @@ -55,8 +59,8 @@ def run_bench(k, canonical, runs = 1): mode = "canon" if canonical else "regular" out_dir = results_dir / f"k{k}" out_dir.mkdir(parents=True, exist_ok=True) - log_file = out_dir / f"{mode}-streaming-queries-high-hit.log" - json_file = out_dir / f"{mode}-streaming-queries-high-hit.json" + log_file = out_dir / f"{mode}-streaming-queries.log" + json_file = out_dir / f"{mode}-streaming-queries.json" for dataset in datasets: suffix = f".k{k}.canon.sshash" if canonical else f".k{k}.sshash" From 56325ae5c8a09f65fd79eb3ff59a72e976f8787c Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 12:29:51 +0100 Subject: [PATCH 6/9] added script to mix fastq files --- script/mix_fastq_files.py | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 script/mix_fastq_files.py diff --git a/script/mix_fastq_files.py b/script/mix_fastq_files.py new file mode 100644 index 0000000..fb949f2 --- /dev/null +++ b/script/mix_fastq_files.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import gzip +import random +import argparse + +def fastq_reader(file_path): + """ + Generator that yields one complete FASTQ read (4 lines) at a time + from a gzipped file. + """ + with gzip.open(file_path, 'rt') as f: + while True: + line1 = f.readline() + if not line1: + break # EOF + line2 = f.readline() + line3 = f.readline() + line4 = f.readline() + + # Yield the entire 4-line read as a single string + yield line1 + line2 + line3 + line4 + +def mix_fastq(file1, file2, output_file): + print(f"Mixing reads from:\n - {file1}\n - {file2}") + print(f"Writing mixed reads to: {output_file} ...") + + # Initialize generators for both files + iter1 = fastq_reader(file1) + iter2 = fastq_reader(file2) + + # Keep track of active iterators + active_iters = [iter1, iter2] + + written_count = 0 + + with gzip.open(output_file, 'wt') as out: + while active_iters: + # Pick a random index from the currently active iterators + idx = random.randrange(len(active_iters)) + + try: + # Fetch the next read from the randomly chosen file + read = next(active_iters[idx]) + out.write(read) + written_count += 1 + + # Optional: print progress every 1M reads + if written_count % 1_000_000 == 0: + print(f"Processed {written_count:,} reads...") + + except StopIteration: + # If the chosen file is exhausted, remove it from the active pool + active_iters.pop(idx) + + print(f"Done! Successfully mixed {written_count:,} total reads into {output_file}.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Mix two gzipped FASTQ files with uniform probability.") + parser.add_argument("-1", "--file1", required=True, help="Path to the first compressed fastq file (.fastq.gz)") + parser.add_argument("-2", "--file2", required=True, help="Path to the second compressed fastq file (.fastq.gz)") + parser.add_argument("-o", "--output", required=True, help="Path to the output mixed fastq file (.fastq.gz)") + + args = parser.parse_args() + + mix_fastq(args.file1, args.file2, args.output) + \ No newline at end of file From a59ae39c50baabc966e41c52ab542befb4bf806c Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 12:46:31 +0100 Subject: [PATCH 7/9] added script to mix fastq files --- script/mix_fastq_files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/mix_fastq_files.py b/script/mix_fastq_files.py index fb949f2..33e549f 100644 --- a/script/mix_fastq_files.py +++ b/script/mix_fastq_files.py @@ -45,6 +45,9 @@ def mix_fastq(file1, file2, output_file): out.write(read) written_count += 1 + if written_count == 3_000_000: + break + # Optional: print progress every 1M reads if written_count % 1_000_000 == 0: print(f"Processed {written_count:,} reads...") @@ -64,4 +67,3 @@ def mix_fastq(file1, file2, output_file): args = parser.parse_args() mix_fastq(args.file1, args.file2, args.output) - \ No newline at end of file From 7ad398a8c12cb562c33bd5462e4e869d0c67e37e Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 13:34:47 +0100 Subject: [PATCH 8/9] ready for mixed workloads --- .../k31/canon-streaming-queries.json | 6 ++ .../k31/canon-streaming-queries.log | 66 +++++++++++++++++++ .../k31/regular-streaming-queries.json | 6 ++ .../k31/regular-streaming-queries.log | 66 +++++++++++++++++++ .../k63/canon-streaming-queries.json | 6 ++ .../k63/canon-streaming-queries.log | 66 +++++++++++++++++++ .../k63/regular-streaming-queries.json | 6 ++ .../k63/regular-streaming-queries.log | 66 +++++++++++++++++++ script/streaming-query.py | 5 +- 9 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.json create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.log create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.json create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.log create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.json create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.log create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.json create mode 100644 benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.log diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.json b/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.json new file mode 100644 index 0000000..f87c9e8 --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "695737535", "num_positive_kmers": "4754204", "num_negative_kmers": "690972341", "num_invalid_kmers": "10990", "num_searches": "3225641", "num_extensions": "1528563", "elapsed_millisec": "60246"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "163287360", "num_positive_kmers": "790414", "num_negative_kmers": "162496946", "num_invalid_kmers": "0", "num_searches": "540136", "num_extensions": "250278", "elapsed_millisec": "5077"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "395433242", "num_positive_kmers": "1134", "num_negative_kmers": "395408021", "num_invalid_kmers": "24087", "num_searches": "694", "num_extensions": "440", "elapsed_millisec": "36039"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1452096", "num_negative_kmers": "1567493879", "num_invalid_kmers": "1029011", "num_searches": "643932", "num_extensions": "808164", "elapsed_millisec": "132169"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "63384101", "num_negative_kmers": "1505561874", "num_invalid_kmers": "1029011", "num_searches": "9728422", "num_extensions": "53655679", "elapsed_millisec": "167203"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "395433242", "num_positive_kmers": "2384", "num_negative_kmers": "395406771", "num_invalid_kmers": "24087", "num_searches": "1360", "num_extensions": "1024", "elapsed_millisec": "39411"} diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.log b/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.log new file mode 100644 index 0000000..ab2a329 --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k31/canon-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2026-03-20 12:14:20: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2026-03-20 12:15:20: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 4754204 (0.683333%) +num_negative_kmers = 690972341 (99.3151%) +num_invalid_kmers = 10990 (0.00157962%) +num_searches = 3225641/4754204 (67.8482%) +num_extensions = 1528563/4754204 (32.1518%) +elapsed = 60.246 sec / 1.0041 min / 86.593 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2026-03-20 12:15:26: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2026-03-20 12:15:31: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 790414 (0.484063%) +num_negative_kmers = 162496946 (99.5159%) +num_invalid_kmers = 0 (0%) +num_searches = 540136/790414 (68.3358%) +num_extensions = 250278/790414 (31.6642%) +elapsed = 5.077 sec / 0.0846167 min / 31.0924 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:15:44: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:16:20: DONE +==== query report: +num_kmers = 395433242 +num_positive_kmers = 1134 (0.000286774%) +num_negative_kmers = 395408021 (99.9936%) +num_invalid_kmers = 24087 (0.00609129%) +num_searches = 694/1134 (61.1993%) +num_extensions = 440/1134 (38.8007%) +elapsed = 36.039 sec / 0.60065 min / 91.138 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:16:21: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:18:34: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1452096 (0.0924917%) +num_negative_kmers = 1567493879 (99.842%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 643932/1452096 (44.345%) +num_extensions = 808164/1452096 (55.655%) +elapsed = 132.169 sec / 2.20282 min / 84.1854 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:18:40: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:21:27: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 63384101 (4.03727%) +num_negative_kmers = 1505561874 (95.8972%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 9728422/63384101 (15.3484%) +num_extensions = 53655679/63384101 (84.6516%) +elapsed = 167.203 sec / 2.78672 min / 106.5 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:21:50: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:22:29: DONE +==== query report: +num_kmers = 395433242 +num_positive_kmers = 2384 (0.000602883%) +num_negative_kmers = 395406771 (99.9933%) +num_invalid_kmers = 24087 (0.00609129%) +num_searches = 1360/2384 (57.047%) +num_extensions = 1024/2384 (42.953%) +elapsed = 39.411 sec / 0.65685 min / 99.6654 ns/kmer diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.json b/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.json new file mode 100644 index 0000000..a1d979d --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "695737535", "num_positive_kmers": "4754204", "num_negative_kmers": "690972341", "num_invalid_kmers": "10990", "num_searches": "3225641", "num_extensions": "1528563", "elapsed_millisec": "87893"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "163287360", "num_positive_kmers": "790414", "num_negative_kmers": "162496946", "num_invalid_kmers": "0", "num_searches": "540136", "num_extensions": "250278", "elapsed_millisec": "7115"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "395433242", "num_positive_kmers": "1134", "num_negative_kmers": "395408021", "num_invalid_kmers": "24087", "num_searches": "694", "num_extensions": "440", "elapsed_millisec": "54044"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1452096", "num_negative_kmers": "1567493879", "num_invalid_kmers": "1029011", "num_searches": "643932", "num_extensions": "808164", "elapsed_millisec": "188208"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "63384101", "num_negative_kmers": "1505561874", "num_invalid_kmers": "1029011", "num_searches": "9728422", "num_extensions": "53655679", "elapsed_millisec": "243017"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "395433242", "num_positive_kmers": "2384", "num_negative_kmers": "395406771", "num_invalid_kmers": "24087", "num_searches": "1360", "num_extensions": "1024", "elapsed_millisec": "57925"} diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.log b/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.log new file mode 100644 index 0000000..c93a9d4 --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k31/regular-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2026-03-20 12:03:34: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2026-03-20 12:05:02: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 4754204 (0.683333%) +num_negative_kmers = 690972341 (99.3151%) +num_invalid_kmers = 10990 (0.00157962%) +num_searches = 3225641/4754204 (67.8482%) +num_extensions = 1528563/4754204 (32.1518%) +elapsed = 87.893 sec / 1.46488 min / 126.331 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2026-03-20 12:05:03: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2026-03-20 12:05:10: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 790414 (0.484063%) +num_negative_kmers = 162496946 (99.5159%) +num_invalid_kmers = 0 (0%) +num_searches = 540136/790414 (68.3358%) +num_extensions = 250278/790414 (31.6642%) +elapsed = 7.115 sec / 0.118583 min / 43.5735 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:05:11: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:06:05: DONE +==== query report: +num_kmers = 395433242 +num_positive_kmers = 1134 (0.000286774%) +num_negative_kmers = 395408021 (99.9936%) +num_invalid_kmers = 24087 (0.00609129%) +num_searches = 694/1134 (61.1993%) +num_extensions = 440/1134 (38.8007%) +elapsed = 54.044 sec / 0.900733 min / 136.67 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:06:05: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:09:13: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1452096 (0.0924917%) +num_negative_kmers = 1567493879 (99.842%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 643932/1452096 (44.345%) +num_extensions = 808164/1452096 (55.655%) +elapsed = 188.208 sec / 3.1368 min / 119.88 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:09:14: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:13:17: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 63384101 (4.03727%) +num_negative_kmers = 1505561874 (95.8972%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 9728422/63384101 (15.3484%) +num_extensions = 53655679/63384101 (84.6516%) +elapsed = 243.017 sec / 4.05028 min / 154.79 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:13:19: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:14:17: DONE +==== query report: +num_kmers = 395433242 +num_positive_kmers = 2384 (0.000602883%) +num_negative_kmers = 395406771 (99.9933%) +num_invalid_kmers = 24087 (0.00609129%) +num_searches = 1360/2384 (57.047%) +num_extensions = 1024/2384 (42.953%) +elapsed = 57.925 sec / 0.965417 min / 146.485 ns/kmer diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.json b/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.json new file mode 100644 index 0000000..ee951fd --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "461383839", "num_positive_kmers": "756097", "num_negative_kmers": "460616752", "num_invalid_kmers": "10990", "num_searches": "613034", "num_extensions": "143063", "elapsed_millisec": "22347"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "97972416", "num_positive_kmers": "41066", "num_negative_kmers": "97931350", "num_invalid_kmers": "0", "num_searches": "21367", "num_extensions": "19699", "elapsed_millisec": "2727"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "322085785", "num_positive_kmers": "8", "num_negative_kmers": "322073557", "num_invalid_kmers": "12220", "num_searches": "8", "num_extensions": "0", "elapsed_millisec": "13201"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "21769", "num_negative_kmers": "477122927", "num_invalid_kmers": "673778", "num_searches": "15569", "num_extensions": "6200", "elapsed_millisec": "43117"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "9842483", "num_negative_kmers": "467302213", "num_invalid_kmers": "673778", "num_searches": "5417015", "num_extensions": "4425468", "elapsed_millisec": "52375"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "322085785", "num_positive_kmers": "21", "num_negative_kmers": "322073544", "num_invalid_kmers": "12220", "num_searches": "15", "num_extensions": "6", "elapsed_millisec": "16596"} diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.log b/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.log new file mode 100644 index 0000000..6c630ff --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k63/canon-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2026-03-20 12:26:35: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2026-03-20 12:26:57: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 756097 (0.163876%) +num_negative_kmers = 460616752 (99.8337%) +num_invalid_kmers = 10990 (0.00238196%) +num_searches = 613034/756097 (81.0788%) +num_extensions = 143063/756097 (18.9212%) +elapsed = 22.347 sec / 0.37245 min / 48.4347 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2026-03-20 12:27:00: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2026-03-20 12:27:03: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 41066 (0.0419159%) +num_negative_kmers = 97931350 (99.9581%) +num_invalid_kmers = 0 (0%) +num_searches = 21367/41066 (52.0309%) +num_extensions = 19699/41066 (47.9691%) +elapsed = 2.727 sec / 0.04545 min / 27.8344 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:27:11: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:27:24: DONE +==== query report: +num_kmers = 322085785 +num_positive_kmers = 8 (2.48381e-06%) +num_negative_kmers = 322073557 (99.9962%) +num_invalid_kmers = 12220 (0.00379402%) +num_searches = 8/8 (100%) +num_extensions = 0/8 (0%) +elapsed = 13.201 sec / 0.220017 min / 40.986 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:27:25: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:28:08: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 21769 (0.00455591%) +num_negative_kmers = 477122927 (99.8544%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 15569/21769 (71.5191%) +num_extensions = 6200/21769 (28.4809%) +elapsed = 43.117 sec / 0.718617 min / 90.2372 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:28:14: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:29:07: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 9842483 (2.05988%) +num_negative_kmers = 467302213 (97.7991%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 5417015/9842483 (55.0371%) +num_extensions = 4425468/9842483 (44.9629%) +elapsed = 52.375 sec / 0.872917 min / 109.613 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:29:31: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:29:48: DONE +==== query report: +num_kmers = 322085785 +num_positive_kmers = 21 (6.52e-06%) +num_negative_kmers = 322073544 (99.9962%) +num_invalid_kmers = 12220 (0.00379402%) +num_searches = 15/21 (71.4286%) +num_extensions = 6/21 (28.5714%) +elapsed = 16.596 sec / 0.2766 min / 51.5266 ns/kmer diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.json b/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.json new file mode 100644 index 0000000..a1f91b4 --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "461383839", "num_positive_kmers": "756097", "num_negative_kmers": "460616752", "num_invalid_kmers": "10990", "num_searches": "613034", "num_extensions": "143063", "elapsed_millisec": "28164"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "97972416", "num_positive_kmers": "41066", "num_negative_kmers": "97931350", "num_invalid_kmers": "0", "num_searches": "21367", "num_extensions": "19699", "elapsed_millisec": "3165"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "322085785", "num_positive_kmers": "8", "num_negative_kmers": "322073557", "num_invalid_kmers": "12220", "num_searches": "8", "num_extensions": "0", "elapsed_millisec": "17717"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "21769", "num_negative_kmers": "477122927", "num_invalid_kmers": "673778", "num_searches": "15569", "num_extensions": "6200", "elapsed_millisec": "52468"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "9842483", "num_negative_kmers": "467302213", "num_invalid_kmers": "673778", "num_searches": "5417015", "num_extensions": "4425468", "elapsed_millisec": "66099"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz", "num_kmers": "322085785", "num_positive_kmers": "21", "num_negative_kmers": "322073544", "num_invalid_kmers": "12220", "num_searches": "15", "num_extensions": "6", "elapsed_millisec": "22897"} diff --git a/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.log b/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.log new file mode 100644 index 0000000..4b79062 --- /dev/null +++ b/benchmarks/results-20-03-26-streaming-low-hit/k63/regular-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2026-03-20 12:22:44: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2026-03-20 12:23:12: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 756097 (0.163876%) +num_negative_kmers = 460616752 (99.8337%) +num_invalid_kmers = 10990 (0.00238196%) +num_searches = 613034/756097 (81.0788%) +num_extensions = 143063/756097 (18.9212%) +elapsed = 28.164 sec / 0.4694 min / 61.0425 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2026-03-20 12:23:14: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2026-03-20 12:23:17: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 41066 (0.0419159%) +num_negative_kmers = 97931350 (99.9581%) +num_invalid_kmers = 0 (0%) +num_searches = 21367/41066 (52.0309%) +num_extensions = 19699/41066 (47.9691%) +elapsed = 3.165 sec / 0.05275 min / 32.305 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:23:24: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:23:42: DONE +==== query report: +num_kmers = 322085785 +num_positive_kmers = 8 (2.48381e-06%) +num_negative_kmers = 322073557 (99.9962%) +num_invalid_kmers = 12220 (0.00379402%) +num_searches = 8/8 (100%) +num_extensions = 0/8 (0%) +elapsed = 17.717 sec / 0.295283 min / 55.0071 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:23:43: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:24:35: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 21769 (0.00455591%) +num_negative_kmers = 477122927 (99.8544%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 15569/21769 (71.5191%) +num_extensions = 6200/21769 (28.4809%) +elapsed = 52.468 sec / 0.874467 min / 109.807 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2026-03-20 12:24:41: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2026-03-20 12:25:47: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 9842483 (2.05988%) +num_negative_kmers = 467302213 (97.7991%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 5417015/9842483 (55.0371%) +num_extensions = 4425468/9842483 (44.9629%) +elapsed = 66.099 sec / 1.10165 min / 138.335 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz +2026-03-20 12:26:10: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5901135_1.fastq.gz'... +2026-03-20 12:26:33: DONE +==== query report: +num_kmers = 322085785 +num_positive_kmers = 21 (6.52e-06%) +num_negative_kmers = 322073544 (99.9962%) +num_invalid_kmers = 12220 (0.00379402%) +num_searches = 15/21 (71.4286%) +num_extensions = 6/21 (28.5714%) +elapsed = 22.897 sec / 0.381617 min / 71.0898 ns/kmer diff --git a/script/streaming-query.py b/script/streaming-query.py index 3f90993..e62dc08 100644 --- a/script/streaming-query.py +++ b/script/streaming-query.py @@ -28,7 +28,10 @@ # queries = {"cod":"SRR12858649", "kestrel":"SRR11449743_1", "human":"SRR5833294", "ncbi-virus":"ncbi-queries", "se":"SRR27871075_1", "hprc":"SRR5833294"} # low hit -queries = {"cod":"SRR11449743_1", "kestrel":"SRR12858649", "human":"SRR5901135_1", "ncbi-virus":"SRR5833294", "se":"SRR5833294", "hprc":"SRR5901135_1"} +# queries = {"cod":"SRR11449743_1", "kestrel":"SRR12858649", "human":"SRR5901135_1", "ncbi-virus":"SRR5833294", "se":"SRR5833294", "hprc":"SRR5901135_1"} + +# mixed hit +queries = {"cod":"cod-queries.mixed", "kestrel":"kestrel-queries.mixed", "human":"human-queries.mixed", "ncbi-virus":"ncbi-virus-queries.mixed", "se":"se-queries.mixed", "hprc":"hprc-queries.mixed"} # ------------------------------ # Utility functions From 66b1ae8b095a2f5531cf329c3fa4e57d51975dc2 Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Fri, 20 Mar 2026 15:16:37 +0100 Subject: [PATCH 9/9] added results for mixed workloads --- .../k31/canon-streaming-queries.json | 6 ++ .../k31/canon-streaming-queries.log | 66 +++++++++++++++++++ .../k31/regular-streaming-queries.json | 6 ++ .../k31/regular-streaming-queries.log | 66 +++++++++++++++++++ .../k63/canon-streaming-queries.json | 6 ++ .../k63/canon-streaming-queries.log | 66 +++++++++++++++++++ .../k63/regular-streaming-queries.json | 6 ++ .../k63/regular-streaming-queries.log | 66 +++++++++++++++++++ 8 files changed, 288 insertions(+) create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.json create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.log create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.json create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.log create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.json create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.log create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.json create mode 100644 benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.log diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.json b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.json new file mode 100644 index 0000000..714068f --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz", "num_kmers": "262507590", "num_positive_kmers": "98556426", "num_negative_kmers": "163948665", "num_invalid_kmers": "2499", "num_searches": "5229421", "num_extensions": "93327005", "elapsed_millisec": "15540"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz", "num_kmers": "262492170", "num_positive_kmers": "108354359", "num_negative_kmers": "154135318", "num_invalid_kmers": "2493", "num_searches": "2966871", "num_extensions": "105387488", "elapsed_millisec": "10337"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz", "num_kmers": "323959757", "num_positive_kmers": "63702317", "num_negative_kmers": "260188961", "num_invalid_kmers": "68479", "num_searches": "4424811", "num_extensions": "59277506", "elapsed_millisec": "29351"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz", "num_kmers": "146679963", "num_positive_kmers": "14108777", "num_negative_kmers": "132468434", "num_invalid_kmers": "102752", "num_searches": "647007", "num_extensions": "13461770", "elapsed_millisec": "11552"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz", "num_kmers": "220771194", "num_positive_kmers": "149704544", "num_negative_kmers": "71008273", "num_invalid_kmers": "58377", "num_searches": "42474888", "num_extensions": "107229656", "elapsed_millisec": "35125"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz", "num_kmers": "323975232", "num_positive_kmers": "65791107", "num_negative_kmers": "258115646", "num_invalid_kmers": "68479", "num_searches": "6001013", "num_extensions": "59790094", "elapsed_millisec": "32780"} diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.log b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.log new file mode 100644 index 0000000..745073a --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/canon-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz +2026-03-20 13:43:02: performing queries from file '/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz'... +2026-03-20 13:43:18: DONE +==== query report: +num_kmers = 262507590 +num_positive_kmers = 98556426 (37.5442%) +num_negative_kmers = 163948665 (62.4548%) +num_invalid_kmers = 2499 (0.000951972%) +num_searches = 5229421/98556426 (5.30602%) +num_extensions = 93327005/98556426 (94.694%) +elapsed = 15.54 sec / 0.259 min / 59.1983 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz +2026-03-20 13:43:18: performing queries from file '/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz'... +2026-03-20 13:43:29: DONE +==== query report: +num_kmers = 262492170 +num_positive_kmers = 108354359 (41.2791%) +num_negative_kmers = 154135318 (58.72%) +num_invalid_kmers = 2493 (0.000949743%) +num_searches = 2966871/108354359 (2.73812%) +num_extensions = 105387488/108354359 (97.2619%) +elapsed = 10.337 sec / 0.172283 min / 39.3802 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz +2026-03-20 13:43:30: performing queries from file '/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz'... +2026-03-20 13:43:59: DONE +==== query report: +num_kmers = 323959757 +num_positive_kmers = 63702317 (19.6637%) +num_negative_kmers = 260188961 (80.3152%) +num_invalid_kmers = 68479 (0.0211381%) +num_searches = 4424811/63702317 (6.94608%) +num_extensions = 59277506/63702317 (93.0539%) +elapsed = 29.351 sec / 0.489183 min / 90.6008 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz +2026-03-20 13:44:00: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz'... +2026-03-20 13:44:11: DONE +==== query report: +num_kmers = 146679963 +num_positive_kmers = 14108777 (9.61875%) +num_negative_kmers = 132468434 (90.3112%) +num_invalid_kmers = 102752 (0.0700518%) +num_searches = 647007/14108777 (4.58585%) +num_extensions = 13461770/14108777 (95.4142%) +elapsed = 11.552 sec / 0.192533 min / 78.7565 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz +2026-03-20 13:44:12: performing queries from file '/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz'... +2026-03-20 13:44:47: DONE +==== query report: +num_kmers = 220771194 +num_positive_kmers = 149704544 (67.8098%) +num_negative_kmers = 71008273 (32.1637%) +num_invalid_kmers = 58377 (0.0264423%) +num_searches = 42474888/149704544 (28.3725%) +num_extensions = 107229656/149704544 (71.6275%) +elapsed = 35.125 sec / 0.585417 min / 159.101 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz +2026-03-20 13:44:49: performing queries from file '/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz'... +2026-03-20 13:45:22: DONE +==== query report: +num_kmers = 323975232 +num_positive_kmers = 65791107 (20.3074%) +num_negative_kmers = 258115646 (79.6714%) +num_invalid_kmers = 68479 (0.0211371%) +num_searches = 6001013/65791107 (9.12131%) +num_extensions = 59790094/65791107 (90.8787%) +elapsed = 32.78 sec / 0.546333 min / 101.181 ns/kmer diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.json b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.json new file mode 100644 index 0000000..81344bc --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz", "num_kmers": "262507590", "num_positive_kmers": "98556426", "num_negative_kmers": "163948665", "num_invalid_kmers": "2499", "num_searches": "5229421", "num_extensions": "93327005", "elapsed_millisec": "21708"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz", "num_kmers": "262492170", "num_positive_kmers": "108354359", "num_negative_kmers": "154135318", "num_invalid_kmers": "2493", "num_searches": "2966871", "num_extensions": "105387488", "elapsed_millisec": "13878"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz", "num_kmers": "323959757", "num_positive_kmers": "63702317", "num_negative_kmers": "260188961", "num_invalid_kmers": "68479", "num_searches": "4424811", "num_extensions": "59277506", "elapsed_millisec": "41867"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz", "num_kmers": "146679963", "num_positive_kmers": "14108777", "num_negative_kmers": "132468434", "num_invalid_kmers": "102752", "num_searches": "647007", "num_extensions": "13461770", "elapsed_millisec": "16269"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz", "num_kmers": "220771194", "num_positive_kmers": "149704544", "num_negative_kmers": "71008273", "num_invalid_kmers": "58377", "num_searches": "42474888", "num_extensions": "107229656", "elapsed_millisec": "40939"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz", "num_kmers": "323975232", "num_positive_kmers": "65791107", "num_negative_kmers": "258115646", "num_invalid_kmers": "68479", "num_searches": "6001013", "num_extensions": "59790094", "elapsed_millisec": "45968"} diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.log b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.log new file mode 100644 index 0000000..e1b7592 --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k31/regular-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz +2026-03-20 13:39:57: performing queries from file '/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz'... +2026-03-20 13:40:19: DONE +==== query report: +num_kmers = 262507590 +num_positive_kmers = 98556426 (37.5442%) +num_negative_kmers = 163948665 (62.4548%) +num_invalid_kmers = 2499 (0.000951972%) +num_searches = 5229421/98556426 (5.30602%) +num_extensions = 93327005/98556426 (94.694%) +elapsed = 21.708 sec / 0.3618 min / 82.6948 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz +2026-03-20 13:40:19: performing queries from file '/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz'... +2026-03-20 13:40:33: DONE +==== query report: +num_kmers = 262492170 +num_positive_kmers = 108354359 (41.2791%) +num_negative_kmers = 154135318 (58.72%) +num_invalid_kmers = 2493 (0.000949743%) +num_searches = 2966871/108354359 (2.73812%) +num_extensions = 105387488/108354359 (97.2619%) +elapsed = 13.878 sec / 0.2313 min / 52.8701 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz +2026-03-20 13:40:34: performing queries from file '/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz'... +2026-03-20 13:41:16: DONE +==== query report: +num_kmers = 323959757 +num_positive_kmers = 63702317 (19.6637%) +num_negative_kmers = 260188961 (80.3152%) +num_invalid_kmers = 68479 (0.0211381%) +num_searches = 4424811/63702317 (6.94608%) +num_extensions = 59277506/63702317 (93.0539%) +elapsed = 41.867 sec / 0.697783 min / 129.235 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz +2026-03-20 13:41:16: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz'... +2026-03-20 13:41:33: DONE +==== query report: +num_kmers = 146679963 +num_positive_kmers = 14108777 (9.61875%) +num_negative_kmers = 132468434 (90.3112%) +num_invalid_kmers = 102752 (0.0700518%) +num_searches = 647007/14108777 (4.58585%) +num_extensions = 13461770/14108777 (95.4142%) +elapsed = 16.269 sec / 0.27115 min / 110.915 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz +2026-03-20 13:41:33: performing queries from file '/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz'... +2026-03-20 13:42:14: DONE +==== query report: +num_kmers = 220771194 +num_positive_kmers = 149704544 (67.8098%) +num_negative_kmers = 71008273 (32.1637%) +num_invalid_kmers = 58377 (0.0264423%) +num_searches = 42474888/149704544 (28.3725%) +num_extensions = 107229656/149704544 (71.6275%) +elapsed = 40.939 sec / 0.682317 min / 185.436 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz +2026-03-20 13:42:16: performing queries from file '/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz'... +2026-03-20 13:43:02: DONE +==== query report: +num_kmers = 323975232 +num_positive_kmers = 65791107 (20.3074%) +num_negative_kmers = 258115646 (79.6714%) +num_invalid_kmers = 68479 (0.0211371%) +num_searches = 6001013/65791107 (9.12131%) +num_extensions = 59790094/65791107 (90.8787%) +elapsed = 45.968 sec / 0.766133 min / 141.887 ns/kmer diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.json b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.json new file mode 100644 index 0000000..8aa6772 --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz", "num_kmers": "166507590", "num_positive_kmers": "49473240", "num_negative_kmers": "117031851", "num_invalid_kmers": "2499", "num_searches": "25712107", "num_extensions": "23761133", "elapsed_millisec": "9022"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz", "num_kmers": "166492170", "num_positive_kmers": "60243103", "num_negative_kmers": "106246574", "num_invalid_kmers": "2493", "num_searches": "30776452", "num_extensions": "29466651", "elapsed_millisec": "8412"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz", "num_kmers": "228455205", "num_positive_kmers": "18070078", "num_negative_kmers": "210335568", "num_invalid_kmers": "49559", "num_searches": "9771968", "num_extensions": "8298110", "elapsed_millisec": "12141"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz", "num_kmers": "50683541", "num_positive_kmers": "10232359", "num_negative_kmers": "40375010", "num_invalid_kmers": "76172", "num_searches": "5432493", "num_extensions": "4799866", "elapsed_millisec": "4382"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz", "num_kmers": "125047006", "num_positive_kmers": "97817106", "num_negative_kmers": "27181268", "num_invalid_kmers": "48632", "num_searches": "61475221", "num_extensions": "36341885", "elapsed_millisec": "46414"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz", "num_kmers": "228470682", "num_positive_kmers": "19314643", "num_negative_kmers": "209106480", "num_invalid_kmers": "49559", "num_searches": "10625970", "num_extensions": "8688673", "elapsed_millisec": "15015"} diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.log b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.log new file mode 100644 index 0000000..04c6a45 --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/canon-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz +2026-03-20 13:47:27: performing queries from file '/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz'... +2026-03-20 13:47:36: DONE +==== query report: +num_kmers = 166507590 +num_positive_kmers = 49473240 (29.7123%) +num_negative_kmers = 117031851 (70.2862%) +num_invalid_kmers = 2499 (0.00150083%) +num_searches = 25712107/49473240 (51.9717%) +num_extensions = 23761133/49473240 (48.0283%) +elapsed = 9.022 sec / 0.150367 min / 54.1837 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz +2026-03-20 13:47:36: performing queries from file '/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz'... +2026-03-20 13:47:45: DONE +==== query report: +num_kmers = 166492170 +num_positive_kmers = 60243103 (36.1837%) +num_negative_kmers = 106246574 (63.8148%) +num_invalid_kmers = 2493 (0.00149737%) +num_searches = 30776452/60243103 (51.0871%) +num_extensions = 29466651/60243103 (48.9129%) +elapsed = 8.412 sec / 0.1402 min / 50.5249 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz +2026-03-20 13:47:46: performing queries from file '/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz'... +2026-03-20 13:47:58: DONE +==== query report: +num_kmers = 228455205 +num_positive_kmers = 18070078 (7.90968%) +num_negative_kmers = 210335568 (92.0686%) +num_invalid_kmers = 49559 (0.0216931%) +num_searches = 9771968/18070078 (54.0782%) +num_extensions = 8298110/18070078 (45.9218%) +elapsed = 12.141 sec / 0.20235 min / 53.1439 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz +2026-03-20 13:47:58: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz'... +2026-03-20 13:48:02: DONE +==== query report: +num_kmers = 50683541 +num_positive_kmers = 10232359 (20.1887%) +num_negative_kmers = 40375010 (79.661%) +num_invalid_kmers = 76172 (0.150289%) +num_searches = 5432493/10232359 (53.0913%) +num_extensions = 4799866/10232359 (46.9087%) +elapsed = 4.382 sec / 0.0730333 min / 86.458 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz +2026-03-20 13:48:03: performing queries from file '/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz'... +2026-03-20 13:48:49: DONE +==== query report: +num_kmers = 125047006 +num_positive_kmers = 97817106 (78.2243%) +num_negative_kmers = 27181268 (21.7368%) +num_invalid_kmers = 48632 (0.038891%) +num_searches = 61475221/97817106 (62.8471%) +num_extensions = 36341885/97817106 (37.1529%) +elapsed = 46.414 sec / 0.773567 min / 371.172 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz +2026-03-20 13:48:52: performing queries from file '/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz'... +2026-03-20 13:49:07: DONE +==== query report: +num_kmers = 228470682 +num_positive_kmers = 19314643 (8.45388%) +num_negative_kmers = 209106480 (91.5244%) +num_invalid_kmers = 49559 (0.0216916%) +num_searches = 10625970/19314643 (55.0151%) +num_extensions = 8688673/19314643 (44.9849%) +elapsed = 15.015 sec / 0.25025 min / 65.7196 ns/kmer diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.json b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.json new file mode 100644 index 0000000..3216a5a --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz", "num_kmers": "166507590", "num_positive_kmers": "49473240", "num_negative_kmers": "117031851", "num_invalid_kmers": "2499", "num_searches": "25712107", "num_extensions": "23761133", "elapsed_millisec": "11506"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz", "num_kmers": "166492170", "num_positive_kmers": "60243103", "num_negative_kmers": "106246574", "num_invalid_kmers": "2493", "num_searches": "30776452", "num_extensions": "29466651", "elapsed_millisec": "10546"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz", "num_kmers": "228455205", "num_positive_kmers": "18070078", "num_negative_kmers": "210335568", "num_invalid_kmers": "49559", "num_searches": "9771968", "num_extensions": "8298110", "elapsed_millisec": "15751"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz", "num_kmers": "50683541", "num_positive_kmers": "10232359", "num_negative_kmers": "40375010", "num_invalid_kmers": "76172", "num_searches": "5432493", "num_extensions": "4799866", "elapsed_millisec": "5302"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz", "num_kmers": "125047006", "num_positive_kmers": "97817106", "num_negative_kmers": "27181268", "num_invalid_kmers": "48632", "num_searches": "61475221", "num_extensions": "36341885", "elapsed_millisec": "45459"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz", "num_kmers": "228470682", "num_positive_kmers": "19314643", "num_negative_kmers": "209106480", "num_invalid_kmers": "49559", "num_searches": "10625970", "num_extensions": "8688673", "elapsed_millisec": "19500"} diff --git a/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.log b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.log new file mode 100644 index 0000000..4ab44c8 --- /dev/null +++ b/benchmarks/results-results-20-03-26-streaming-mixed-hit/k63/regular-streaming-queries.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz +2026-03-20 13:45:35: performing queries from file '/mnt/hd2/pibiri/DNA/queries/cod-queries.mixed.fastq.gz'... +2026-03-20 13:45:47: DONE +==== query report: +num_kmers = 166507590 +num_positive_kmers = 49473240 (29.7123%) +num_negative_kmers = 117031851 (70.2862%) +num_invalid_kmers = 2499 (0.00150083%) +num_searches = 25712107/49473240 (51.9717%) +num_extensions = 23761133/49473240 (48.0283%) +elapsed = 11.506 sec / 0.191767 min / 69.102 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz +2026-03-20 13:45:47: performing queries from file '/mnt/hd2/pibiri/DNA/queries/kestrel-queries.mixed.fastq.gz'... +2026-03-20 13:45:57: DONE +==== query report: +num_kmers = 166492170 +num_positive_kmers = 60243103 (36.1837%) +num_negative_kmers = 106246574 (63.8148%) +num_invalid_kmers = 2493 (0.00149737%) +num_searches = 30776452/60243103 (51.0871%) +num_extensions = 29466651/60243103 (48.9129%) +elapsed = 10.546 sec / 0.175767 min / 63.3423 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz +2026-03-20 13:45:58: performing queries from file '/mnt/hd2/pibiri/DNA/queries/human-queries.mixed.fastq.gz'... +2026-03-20 13:46:14: DONE +==== query report: +num_kmers = 228455205 +num_positive_kmers = 18070078 (7.90968%) +num_negative_kmers = 210335568 (92.0686%) +num_invalid_kmers = 49559 (0.0216931%) +num_searches = 9771968/18070078 (54.0782%) +num_extensions = 8298110/18070078 (45.9218%) +elapsed = 15.751 sec / 0.262517 min / 68.9457 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz +2026-03-20 13:46:14: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-virus-queries.mixed.fastq.gz'... +2026-03-20 13:46:19: DONE +==== query report: +num_kmers = 50683541 +num_positive_kmers = 10232359 (20.1887%) +num_negative_kmers = 40375010 (79.661%) +num_invalid_kmers = 76172 (0.150289%) +num_searches = 5432493/10232359 (53.0913%) +num_extensions = 4799866/10232359 (46.9087%) +elapsed = 5.302 sec / 0.0883667 min / 104.61 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz +2026-03-20 13:46:20: performing queries from file '/mnt/hd2/pibiri/DNA/queries/se-queries.mixed.fastq.gz'... +2026-03-20 13:47:05: DONE +==== query report: +num_kmers = 125047006 +num_positive_kmers = 97817106 (78.2243%) +num_negative_kmers = 27181268 (21.7368%) +num_invalid_kmers = 48632 (0.038891%) +num_searches = 61475221/97817106 (62.8471%) +num_extensions = 36341885/97817106 (37.1529%) +elapsed = 45.459 sec / 0.75765 min / 363.535 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz +2026-03-20 13:47:07: performing queries from file '/mnt/hd2/pibiri/DNA/queries/hprc-queries.mixed.fastq.gz'... +2026-03-20 13:47:27: DONE +==== query report: +num_kmers = 228470682 +num_positive_kmers = 19314643 (8.45388%) +num_negative_kmers = 209106480 (91.5244%) +num_invalid_kmers = 49559 (0.0216916%) +num_searches = 10625970/19314643 (55.0151%) +num_extensions = 8688673/19314643 (44.9849%) +elapsed = 19.5 sec / 0.325 min / 85.3501 ns/kmer