From f5b487d7ded8d5ec1351382d4abcfd78a31178bd Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Fri, 13 Feb 2026 13:43:02 +0100 Subject: [PATCH 01/11] improve examples --- Cargo.lock | 892 +++++++++++------------------------ Cargo.toml | 9 +- README.md | 159 ++++--- examples/databento_replay.rs | 324 +++++++++++++ examples/hello_world.rs | 138 ------ examples/sensor_test.rs | 188 ++++++++ src/components.rs | 2 + src/index.rs | 8 + tests/index_tests.rs | 27 ++ 9 files changed, 919 insertions(+), 828 deletions(-) create mode 100644 examples/databento_replay.rs delete mode 100644 examples/hello_world.rs create mode 100644 examples/sensor_test.rs diff --git a/Cargo.lock b/Cargo.lock index 362f79a..1ea9913 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,21 +11,27 @@ dependencies = [ "memchr", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" @@ -33,34 +39,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] -name = "anyhow" -version = "1.0.101" +name = "anstyle-parse" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] [[package]] -name = "arc-swap" -version = "1.8.1" +name = "anstyle-query" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "rustversion", + "windows-sys", ] [[package]] -name = "assert_no_alloc" -version = "1.1.2" +name = "anstyle-wincon" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ca83137a482d61d916ceb1eba52a684f98004f18e0cafea230fe5579c178a3" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] [[package]] -name = "atomic" -version = "0.6.1" +name = "assert_no_alloc" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" -dependencies = [ - "bytemuck", -] +checksum = "55ca83137a482d61d916ceb1eba52a684f98004f18e0cafea230fe5579c178a3" [[package]] name = "autocfg" @@ -68,12 +79,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "bitflags" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" - [[package]] name = "bumpalo" version = "3.19.1" @@ -113,6 +118,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -122,30 +129,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "chacha20" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" -dependencies = [ - "cfg-if", - "cpufeatures", - "rand_core", -] - -[[package]] -name = "chrono" -version = "0.4.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - [[package]] name = "ciborium" version = "0.2.2" @@ -180,6 +163,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -188,30 +172,35 @@ version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", ] [[package]] -name = "clap_lex" -version = "0.7.7" +name = "clap_derive" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "core-foundation-sys" -version = "0.8.7" +name = "clap_lex" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] -name = "cpufeatures" -version = "0.3.0" +name = "colorchoice" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" -dependencies = [ - "libc", -] +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "criterion" @@ -291,10 +280,64 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] -name = "dyn-clone" -version = "1.0.20" +name = "csv" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "dbn" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5aa5489abf84f8dde3b736c13dffa9f99ea0fc95d06c7719007d40339e63fb" +dependencies = [ + "csv", + "dbn-macros", + "fallible-streaming-iterator", + "itoa", + "json-writer", + "num_enum", + "oval", + "thiserror", + "time", + "zstd", +] + +[[package]] +name = "dbn-macros" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94fa2649cf276e4ae91232d1d1d318367d1b6df1b15d1175493c213575a6ddc" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "deranged" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" +dependencies = [ + "powerfmt", +] [[package]] name = "either" @@ -309,15 +352,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "erased-serde" -version = "0.4.9" +name = "fallible-streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" -dependencies = [ - "serde", - "serde_core", - "typeid", -] +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "find-msvc-tools" @@ -325,24 +363,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "getrandom" -version = "0.4.1" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "rand_core", "wasip2", - "wasip3", ] [[package]] @@ -356,15 +386,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash", -] - [[package]] name = "hashbrown" version = "0.16.1" @@ -383,36 +404,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - [[package]] name = "indexmap" version = "2.13.0" @@ -420,9 +411,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown 0.16.1", - "serde", - "serde_core", + "hashbrown", ] [[package]] @@ -436,6 +425,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.10.5" @@ -451,6 +446,16 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + [[package]] name = "js-sys" version = "0.3.85" @@ -462,10 +467,14 @@ dependencies = [ ] [[package]] -name = "leb128fmt" -version = "0.1.0" +name = "json-writer" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +checksum = "279046e6427c19c86f93df06fe9dc90c32b43f4a2a85bb3083d579e4a1e7ef03" +dependencies = [ + "itoa", + "ryu", +] [[package]] name = "libc" @@ -473,21 +482,6 @@ version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - [[package]] name = "memchr" version = "2.8.0" @@ -504,13 +498,10 @@ dependencies = [ ] [[package]] -name = "nom" -version = "8.0.0" +name = "num-conv" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" -dependencies = [ - "memchr", -] +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-traits" @@ -521,12 +512,40 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "oorandom" version = "11.1.5" @@ -534,27 +553,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] -name = "parking_lot" -version = "0.12.5" +name = "oval" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] +checksum = "135cef32720c6746450d910890b0b69bcba2bbf6f85c9f4583df13fe415de828" [[package]] -name = "parking_lot_core" -version = "0.9.12" +name = "pkg-config" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-link", -] +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" @@ -585,13 +593,18 @@ dependencies = [ ] [[package]] -name = "prettyplease" -version = "0.2.37" +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "proc-macro2", - "syn", + "toml_edit", ] [[package]] @@ -618,23 +631,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "rand" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" -dependencies = [ - "chacha20", - "getrandom", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" - [[package]] name = "rayon" version = "1.11.0" @@ -655,15 +651,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" version = "1.12.3" @@ -699,23 +686,12 @@ version = "0.1.0" dependencies = [ "assert_no_alloc", "bytemuck", + "clap", "criterion", "crossbeam-skiplist", - "crossbeam-utils", - "libc", + "dbn", "memmap2", - "rand", - "spdlog-rs", - "thiserror", -] - -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", + "zstd", ] [[package]] @@ -739,18 +715,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" - [[package]] name = "serde" version = "1.0.228" @@ -761,15 +725,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_buf" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc948de1bbead18a61be0b33182636603ea0239ca2577b9704fc39eba900e4e5" -dependencies = [ - "serde_core", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -790,15 +745,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_fmt" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497af288b3b95d067a23a4f749f2861121ffcb2f6d8379310dcda040c345ed" -dependencies = [ - "serde_core", -] - [[package]] name = "serde_json" version = "1.0.149" @@ -819,203 +765,113 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "smallvec" -version = "1.15.1" +name = "strsim" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] -name = "spdlog-internal" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c4ffbdbc9f2d819ffb53ef00a253f524ba7bfd7a3aa8dcd50789b9b27be550" -dependencies = [ - "nom", - "strum", - "strum_macros", - "thiserror", -] - -[[package]] -name = "spdlog-macros" -version = "0.3.0" +name = "syn" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b87a8a157696b61e2a87ed9753da2afb2a27c1e7490786fddf3e71d7e0c3b69e" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", - "spdlog-internal", - "syn", -] - -[[package]] -name = "spdlog-rs" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c3a3480e91f3f57be460a227415f08975bc39f667c5eff18a76b2dfbeb2f09b" -dependencies = [ - "arc-swap", - "atomic", - "bytemuck", - "chrono", - "dyn-clone", - "is-terminal", - "libc", - "once_cell", - "parking_lot", - "rustc_version", - "spdlog-macros", - "thiserror", - "value-bag", - "winapi", + "unicode-ident", ] [[package]] -name = "strum" -version = "0.27.2" +name = "thiserror" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "strum_macros", + "thiserror-impl", ] [[package]] -name = "strum_macros" -version = "0.27.2" +name = "thiserror-impl" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ - "heck", "proc-macro2", "quote", "syn", ] [[package]] -name = "sval" -version = "2.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1aaf178a50bbdd86043fce9bf0a5867007d9b382db89d1c96ccae4601ff1ff9" - -[[package]] -name = "sval_buffer" -version = "2.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89273e48f03807ebf51c4d81c52f28d35ffa18a593edf97e041b52de143df89" -dependencies = [ - "sval", - "sval_ref", -] - -[[package]] -name = "sval_dynamic" -version = "2.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0430f4e18e7eba21a49d10d25a8dec3ce0e044af40b162347e99a8e3c3ced864" -dependencies = [ - "sval", -] - -[[package]] -name = "sval_fmt" -version = "2.17.0" +name = "time" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "835f51b9d7331b9d7fc48fc716c02306fa88c4a076b1573531910c91a525882d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ + "deranged", "itoa", - "ryu", - "sval", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", ] [[package]] -name = "sval_json" -version = "2.17.0" +name = "time-core" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13cbfe3ef406ee2366e7e8ab3678426362085fa9eaedf28cb878a967159dced3" -dependencies = [ - "itoa", - "ryu", - "sval", -] +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] -name = "sval_nested" -version = "2.17.0" +name = "time-macros" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b20358af4af787c34321a86618c3cae12eabdd0e9df22cd9dd2c6834214c518" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ - "sval", - "sval_buffer", - "sval_ref", + "num-conv", + "time-core", ] [[package]] -name = "sval_ref" -version = "2.17.0" +name = "tinytemplate" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5e500f8eb2efa84f75e7090f7fc43f621b9f8b6cde571c635b3855f97b332a" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ - "sval", + "serde", + "serde_json", ] [[package]] -name = "sval_serde" -version = "2.17.0" +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2032ae39b11dcc6c18d5fbc50a661ea191cac96484c59ccf49b002261ca2c1" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ "serde_core", - "sval", - "sval_nested", ] [[package]] -name = "syn" -version = "2.0.114" +name = "toml_edit" +version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "thiserror" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", ] [[package]] -name = "tinytemplate" -version = "1.2.1" +name = "toml_parser" +version = "1.0.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +checksum = "0742ff5ff03ea7e67c8ae6c93cac239e0d9784833362da3f9a9c1da8dfefcbdc" dependencies = [ - "serde", - "serde_json", + "winnow", ] -[[package]] -name = "typeid" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" - [[package]] name = "unicode-ident" version = "1.0.23" @@ -1023,47 +879,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "value-bag" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" -dependencies = [ - "value-bag-serde1", - "value-bag-sval2", -] - -[[package]] -name = "value-bag-serde1" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16530907bfe2999a1773ca5900a65101e092c70f642f25cc23ca0c43573262c5" -dependencies = [ - "erased-serde", - "serde_buf", - "serde_core", - "serde_fmt", -] - -[[package]] -name = "value-bag-sval2" -version = "1.12.0" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d00ae130edd690eaa877e4f40605d534790d1cf1d651e7685bd6a144521b251f" -dependencies = [ - "sval", - "sval_buffer", - "sval_dynamic", - "sval_fmt", - "sval_json", - "sval_ref", - "sval_serde", -] +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "walkdir" @@ -1084,15 +903,6 @@ dependencies = [ "wit-bindgen", ] -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen", -] - [[package]] name = "wasm-bindgen" version = "0.2.108" @@ -1138,40 +948,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - [[package]] name = "web-sys" version = "0.3.85" @@ -1182,22 +958,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -1207,71 +967,12 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -1282,115 +983,70 @@ dependencies = [ ] [[package]] -name = "wit-bindgen" -version = "0.51.0" +name = "winnow" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ - "wit-bindgen-rust-macro", + "memchr", ] [[package]] -name = "wit-bindgen-core" +name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] -name = "wit-bindgen-rust" -version = "0.51.0" +name = "zerocopy" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", + "zerocopy-derive", ] [[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" +name = "zerocopy-derive" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ - "anyhow", - "prettyplease", "proc-macro2", "quote", "syn", - "wit-bindgen-core", - "wit-bindgen-rust", ] [[package]] -name = "wit-component" -version = "0.244.0" +name = "zmij" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] +checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" [[package]] -name = "wit-parser" -version = "0.244.0" +name = "zstd" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", + "zstd-safe", ] [[package]] -name = "zerocopy" -version = "0.8.39" +name = "zstd-safe" +version = "7.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" dependencies = [ - "zerocopy-derive", + "zstd-sys", ] [[package]] -name = "zerocopy-derive" -version = "0.8.39" +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ - "proc-macro2", - "quote", - "syn", + "cc", + "pkg-config", ] - -[[package]] -name = "zmij" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" diff --git a/Cargo.toml b/Cargo.toml index feb95fc..48c3ac4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,18 +6,17 @@ description = "A wait-free, cache-friendly state synchronization engine for HFT. authors = ["Your Name"] [dependencies] -spdlog-rs = "0.5.2" -rand = "0.10.0-rc.6" bytemuck = {version = "1.25.0", features = ["derive"]} memmap2 = "0.9.9" -thiserror = "2.0.18" -crossbeam-utils = "0.8.21" crossbeam-skiplist = "0.1" -libc = "0.2" # Needed for mlock (memory pinning) and sched_setaffinity +clap = { version = "4.5.57", features = ["derive"] } [dev-dependencies] assert_no_alloc = { version = "1.1.2" } criterion = { version = "0.5", features = ["html_reports"] } +dbn = { version = "0.48.0" } +zstd = "0.13" +clap = { version = "4.0", features = ["derive"] } [lib] bench = false # We use the 'benches/' directory diff --git a/README.md b/README.md index fcecf5a..8b3520e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Roda -Ultra-high-performance, low-latency state computer for real-time analytics and trading systems. Roda lets you build +Ultra-high-performance, low-latency state computer for real-time analytics and event-driven systems. Roda lets you build deterministic streaming pipelines with cache-friendly dataflows, wait-free reads, and explicit memory bounds—ideal for -HFT, market microstructure research, telemetry, and any workload where microseconds matter. +IoT, telemetry, industrial automation, and any workload where microseconds matter. > Status: Early design and API preview. Examples and tests illustrate the intended DX. Expect rapid iteration and > breaking changes. @@ -85,134 +85,159 @@ roda-state = { path = "." } Run the example: ```bash -cargo run --example hello_world +cargo run --example sensor_test ``` -## Example: From Ticks to OHLC to Trading Signals +## Example: From Sensor Readings to Summaries to Alerts -Below is a trimmed version of `examples/hello_world.rs` that demonstrates a two-stage pipeline: aggregate ticks into OHLC candles, then derive a simple momentum signal via a sliding window. +Below is a trimmed version of `examples/sensor_test.rs` that demonstrates a two-stage pipeline: aggregate raw sensor readings into statistical summaries, then derive alerts when anomalies are detected via a sliding window. ```rust use bytemuck::{Pod, Zeroable}; use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; use roda_state::{Aggregator, RodaEngine, Window}; +use std::thread; +use std::time::Duration; #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct Tick { - symbol: u64, - price: f64, +struct Reading { + sensor_id: u64, + value: f64, timestamp: u64, } +impl Reading { + fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { sensor_id, value, timestamp } + } +} + + #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct OHLC { - symbol: u64, - open: f64, - high: f64, - low: f64, - close: f64, +struct Summary { + sensor_id: u64, + min: f64, + max: f64, + avg: f64, + count: u64, timestamp: u64, } #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct Signal { - symbol: u64, +struct Alert { + sensor_id: u64, timestamp: u64, - direction: i32, - size: u32, + severity: i32, + _pad0: i32, } #[derive(Clone, Copy, PartialEq, Eq, Hash, Pod, Zeroable)] #[repr(C)] -struct TimeKey { - symbol: u64, +struct SensorKey { + sensor_id: u64, timestamp: u64, } fn main() { let engine = RodaEngine::new(); - // Allocate bounded stores (explicit memory profile) - let tick_store = engine.store::(StoreOptions { - name: "ticks", + // 1. Allocate bounded stores + let mut reading_store = engine.store::(StoreOptions { + name: "readings", size: 1_000_000, in_memory: true, }); - let tick_reader = tick_store.reader(); - let mut ohlc_store = engine.store::(StoreOptions { - name: "ohlc", + let reading_reader = reading_store.reader(); + + let mut summary_store = engine.store::(StoreOptions { + name: "summaries", size: 10_000, in_memory: true, }); - let ohlc_reader = ohlc_store.reader(); - let mut signal_store = engine.store::(StoreOptions { - name: "signals", + let summary_reader = summary_store.reader(); + + let mut alert_store = engine.store::(StoreOptions { + name: "alerts", size: 10_000, in_memory: true, }); + let alert_reader_for_print = alert_store.reader(); - // Index to locate candles by (symbol, time) - let ohlc_index = ohlc_store.direct_index::(); + let summary_index = summary_store.direct_index::(); - // Declare pipelines - let mut ohlc_pipeline: Aggregator = Aggregator::new(); - let mut strategy_pipeline: Window = Window::new(); + // 2. Declare pipelines + let summary_pipeline: Aggregator = Aggregator::new(); + let alert_pipeline: Window = Window::new(); - // Worker 1: aggregate ticks -> OHLC and maintain index + // 3. Worker 1: aggregate readings -> summaries and maintain index engine.run_worker(move || { - tick_reader.next(); - ohlc_pipeline - .from(&tick_reader) - .to(&mut ohlc_store) - .partition_by(|t| TimeKey { - symbol: t.symbol, - timestamp: t.timestamp / 100_000 + reading_reader.next(); + summary_pipeline + .from(&reading_reader) + .to(&mut summary_store) + .partition_by(|r| SensorKey { + sensor_id: r.sensor_id, + timestamp: r.timestamp / 100_000 }) - .reduce(|i, t, c| { + .reduce(|i, r, s| { if i == 0 { - c.open = t.price; - c.high = t.price; - c.low = t.price; - c.close = t.price; - c.symbol = t.symbol; - c.timestamp = (t.timestamp / 100_000) * 100_000; + *s = Summary { + sensor_id: r.sensor_id, + min: r.value, max: r.value, avg: r.value, count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + }; } else { - c.high = c.high.max(t.price); - c.low = c.low.min(t.price); - c.close = t.price; + s.min = s.min.min(r.value); + s.max = s.max.max(r.value); + s.avg = (s.avg * s.count as f64 + r.value) / (s.count + 1) as f64; + s.count += 1; } }); - ohlc_index.compute(|c| TimeKey { - symbol: c.symbol, - timestamp: c.timestamp / 100_000 + summary_index.compute(|s| SensorKey { + sensor_id: s.sensor_id, + timestamp: s.timestamp / 100_000 }); }); - // Worker 2: 2-bar momentum signal + // 4. Worker 2: alert on average jumps engine.run_worker(move || { - ohlc_reader.next(); - strategy_pipeline - .from(&ohlc_reader) - .to(&mut signal_store) + summary_reader.next(); + alert_pipeline + .from(&summary_reader) + .to(&mut alert_store) .reduce(2, |w| { - let prev = w[0]; - let cur = w[1]; - (cur.close > prev.close).then(|| Signal { - symbol: cur.symbol, + let (prev, cur) = (w[0], w[1]); + (cur.avg > prev.avg * 1.5).then(|| Alert { + sensor_id: cur.sensor_id, timestamp: cur.timestamp, - direction: 1, - size: ((cur.close - prev.close) as u32).min(100) + severity: 1, + ..Default::default() }) }); }); + + // 5. Data Ingestion + reading_store.push(Reading::from(1, 10.0, 10_000)); + reading_store.push(Reading::from(1, 12.0, 20_000)); + reading_store.push(Reading::from(1, 20.0, 110_000)); + reading_store.push(Reading::from(1, 22.0, 120_000)); + + thread::sleep(Duration::from_millis(100)); + + // 6. Print Results + while alert_reader_for_print.next() { + if let Some(a) = alert_reader_for_print.get() { + println!("{:?}", a); + } + } } ``` -Explore the full example in `examples/hello_world.rs` for more context. +Explore the full example in `examples/sensor_test.rs` for more context. ## Contributing diff --git a/examples/databento_replay.rs b/examples/databento_replay.rs new file mode 100644 index 0000000..c788bd8 --- /dev/null +++ b/examples/databento_replay.rs @@ -0,0 +1,324 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Instant; + +use bytemuck::{Pod, Zeroable}; +use clap::Parser; +use dbn::decode::{DbnDecoder as Decoder, DbnMetadata, DecodeRecordRef}; +use dbn::enums::{Action, rtype, Side, SType}; +use dbn::record::MboMsg; +use dbn::Record; +use dbn::SymbolIndex; + +// Use your specific high-level API modules +use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::{RodaEngine, Window}; + +// ============================================================================== +// 1. DATA CONTRACT +// ============================================================================== + +/// The normalized "Top of Book" update. +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BboUpdate { + pub ts: u64, + pub instrument_id: u32, + pub _pad0: u32, + pub bid_px: i64, + pub ask_px: i64, + pub bid_sz: u32, + pub ask_sz: u32, +} + +/// The output signal generated by the strategy. +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct TradeSignal { + pub ts: u64, + pub instrument_id: u32, + pub signal_strength: f32, // > 0.8 is Buy + pub is_buy: i32, // 1 = Buy, -1 = Sell + pub _pad0: i32, +} + +struct OrderInfo { + price: i64, + size: u32, + is_bid: bool, +} + +#[derive(Parser)] +struct Args { + #[arg(long)] + file: PathBuf, + #[arg(long, default_value = "NVDA")] + symbol: String, +} + +// ============================================================================== +// 2. THE PIPELINE IMPLEMENTATION +// ============================================================================== + +fn main() -> Result<(), Box> { + let args = Args::parse(); + println!("[System] Booting Roda Showcase (Declarative Mode)..."); + + let engine = RodaEngine::new(); + + // A. RESOURCES + // -------------------------------------------------------------------------- + // 1. Market Data Store (The "River" of BBO updates) + let mut market_store = engine.store::(StoreOptions { + name: "market_data", + size: 10_000_000, + in_memory: true, + }); + let market_reader = market_store.reader(); + + // We create an index to look up BBO by Instrument ID + let _market_index = market_store.direct_index::(); + + // 2. Signal Store (The Output of our Strategy) + let mut signal_store = engine.store::(StoreOptions { + name: "signals", + size: 10_000, + in_memory: true, + }); + // We don't read signals in this example, but we could + let _signal_reader = signal_store.reader(); + + // B. PIPELINES + // -------------------------------------------------------------------------- + // Strategy Pipeline: Look at the last 1 tick (Current) to calculate imbalance + // In a real strategy, we might look at a window of 5. + let strategy_pipeline: Window = Window::new(); + + // C. WORKERS + // -------------------------------------------------------------------------- + + // --- WORKER 1: STRATEGY ENGINE --- + // This runs in a background thread managed by RodaEngine + engine.run_worker(move || { + // 1. Drive the reader + market_reader.next(); + + // 2. Execute Pipeline + strategy_pipeline + .from(&market_reader) + .to(&mut signal_store) + .reduce(1, |window| { + // Window size 1 means window[0] is the current item + let bbo = &window[0]; + + // Logic: Calculate Book Imbalance + let bid_vol = bbo.bid_sz as f32; + let ask_vol = bbo.ask_sz as f32; + let total_vol = bid_vol + ask_vol; + + if total_vol > 0.0 { + let imbalance = (bid_vol - ask_vol) / total_vol; + + // Signal Logic: Strong Buy Imbalance (> 0.8) + if imbalance > 0.8 { + return Some(TradeSignal { + ts: bbo.ts, + instrument_id: bbo.instrument_id, + signal_strength: imbalance, + is_buy: 1, + _pad0: 0, + }); + } + } + None + }); + }); + + // --- WORKER 2: FEED HANDLER (The Data Source) --- + // Since this reads from a File (Zstd) and not a Roda Store, + // we run it as the "Driver" on the main thread (or a separate spawn). + // It acts as the Producer for 'market_store'. + + println!("[Writer] Starting Feed Handler for {:?}...", args.file); + let start = Instant::now(); + let mut count = 0u64; + + // 1. Setup Decoder + let mut decoder = Decoder::from_zstd_file(&args.file)?; + + // 2. Resolve Symbology + let metadata = decoder.metadata(); + // Prefer robust mapping using metadata symbol map for the start date + let date = metadata.start().date(); + let pit_map = metadata.symbol_map_for_date(date)?; + // Find instrument_id for the requested symbol + let mut target_id_opt = pit_map + .inner() + .iter() + .find_map(|(iid, sym)| if sym == &args.symbol { Some(*iid) } else { None }); + + if target_id_opt.is_none() { + // Fallback: resolve via mappings depending on stype_in/out + target_id_opt = match (metadata.stype_in, metadata.stype_out) { + (Some(SType::RawSymbol), SType::InstrumentId) => metadata + .mappings + .iter() + .find(|m| m.raw_symbol == args.symbol) + .and_then(|m| m.intervals.first()) + .and_then(|i| i.symbol.parse::().ok()), + (Some(SType::InstrumentId), SType::RawSymbol) => metadata + .mappings + .iter() + .find_map(|m| { + if m.intervals.iter().any(|iv| iv.symbol == args.symbol) { + m.raw_symbol.parse::().ok() + } else { + None + } + }), + _ => None, + }; + } + + // Final fallback: if still not found, try instrument defs; if still not found, pick first symbol in map + let (target_id, resolved_symbol) = if let Some(id) = target_id_opt { + (id, args.symbol.clone()) + } else { + let mut resolver = Decoder::from_zstd_file(&args.file)?; + let mut found: Option = None; + while let Some(rec) = resolver.decode_record_ref()? { + if rec.header().rtype == rtype::INSTRUMENT_DEF { + if let Ok(def) = rec.try_get::() { + if let Ok(sym) = dbn::record::c_chars_to_str(&def.raw_symbol) { + if sym == args.symbol { + found = Some(def.hd.instrument_id); + break; + } + } + } + } + } + if let Some(id) = found { + (id, args.symbol.clone()) + } else { + // Last resort: pick first available symbol from the map + if let Some((iid, sym)) = pit_map.inner().iter().next() { + eprintln!( + "[Writer] Warning: symbol '{}' not found. Falling back to '{}' (iid={}).", + args.symbol, sym, iid + ); + (*iid, sym.clone()) + } else { + panic!("Symbol not found and no mappings available") + } + } + }; + + println!("[Writer] Mapped {} -> ID {}", resolved_symbol, target_id); + + // 3. Local State (Order Book Reconstruction) + let mut book = HashMap::::new(); + let mut last_bbo = (0i64, i64::MAX); // Bid, Ask + + // 4. Hot Loop + while let Some(record) = decoder.decode_record_ref()? { + if record.header().rtype != rtype::MBO { + continue; + } + + let msg = record.get::().unwrap(); + if msg.hd.instrument_id != target_id { + continue; + } + + let action = Action::try_from(msg.action as u8).unwrap_or(Action::None); + let side = Side::try_from(msg.side as u8).unwrap_or(Side::None); + + // Update Local Book + let mut changed = false; + match action { + Action::Add => { + book.insert( + msg.order_id, + OrderInfo { + price: msg.price, + size: msg.size, + is_bid: side == Side::Bid, + }, + ); + changed = true; + } + Action::Cancel | Action::Fill => { + book.remove(&msg.order_id); + changed = true; + } + Action::Modify => { + if let Some(o) = book.get_mut(&msg.order_id) { + o.price = msg.price; + o.size = msg.size; + changed = true; + } + } + _ => {} + } + + // Compute BBO and Push to Roda Store + if changed { + let (bid, ask, b_sz, a_sz) = compute_bbo(&book); + + if bid != last_bbo.0 || ask != last_bbo.1 { + let update = BboUpdate { + ts: msg.hd.ts_event, + instrument_id: target_id, + bid_px: bid, + ask_px: ask, + bid_sz: b_sz, + ask_sz: a_sz, + _pad0: 0, + }; + market_store.push(update); + last_bbo = (bid, ask); + count += 1; + } + } + } + + let duration = start.elapsed(); + println!( + "[Writer] Finished! Pushed {} updates in {:?}", + count, duration + ); + + Ok(()) +} + +fn compute_bbo(book: &HashMap) -> (i64, i64, u32, u32) { + let mut best_bid = 0; + let mut bid_sz = 0; + let mut best_ask = i64::MAX; + let mut ask_sz = 0; + + for info in book.values() { + if info.is_bid { + if info.price > best_bid { + best_bid = info.price; + bid_sz = info.size; + } else if info.price == best_bid { + bid_sz += info.size; + } + } else { + if info.price < best_ask { + best_ask = info.price; + ask_sz = info.size; + } else if info.price == best_ask { + ask_sz += info.size; + } + } + } + + if best_ask == i64::MAX { + best_ask = 0; + } + + (best_bid, best_ask, bid_sz, ask_sz) +} \ No newline at end of file diff --git a/examples/hello_world.rs b/examples/hello_world.rs deleted file mode 100644 index 0868940..0000000 --- a/examples/hello_world.rs +++ /dev/null @@ -1,138 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; -use roda_state::{Aggregator, RodaEngine, Window}; -use std::cmp::min; -// ============================================================================== -// 1. DATA CONTRACT -// ============================================================================== - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Tick { - pub symbol: u64, - pub price: f64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct OHLC { - pub symbol: u64, - pub open: f64, - pub high: f64, - pub low: f64, - pub close: f64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct TimeKey { - pub symbol: u64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Signal { - pub symbol: u64, - pub timestamp: u64, - pub direction: i32, - pub size: u32, -} - -// ============================================================================== -// 2. DECLARATIVE PIPELINE EXAMPLE -// ============================================================================== - -fn main() { - let engine = RodaEngine::new(); - - // A. RESOURCES - let tick_store = engine.store::(StoreOptions { - name: "ticks", - size: 1_000_000, - in_memory: true, - }); - let tick_reader = tick_store.reader(); - let mut ohlc_store = engine.store::(StoreOptions { - name: "ohlc", - size: 10_000, - in_memory: true, - }); - let ohlc_reader = ohlc_store.reader(); - let mut simple_strategy = engine.store::(StoreOptions { - name: "simple_strategy", - size: 10_000, - in_memory: true, - }); - - // The Index tracks where specific candles live in the ring buffer - let ohlc_index = ohlc_store.direct_index::(); - - // B. PIPELINE - let ohlc_pipeline: Aggregator = Aggregator::new(); - let simple_strategy_pipeline: Window = Window::new(); - - // C. WORKER - engine.run_worker(move || { - tick_reader.next(); - - // 1. PARTITION: Map the Tick to a Candle ID (Construct the Key) - ohlc_pipeline - .from(&tick_reader) - .to(&mut ohlc_store) - .partition_by(|tick| TimeKey { - symbol: tick.symbol, - timestamp: tick.timestamp / 100_000, - }) - .reduce(|index, tick, candle| { - if index == 0 { - // Init (First tick in bucket) - candle.open = tick.price; - candle.high = tick.price; - candle.low = tick.price; - candle.close = tick.price; - - // Set Identity - candle.symbol = tick.symbol; - candle.timestamp = (tick.timestamp / 100_000) * 100_000; - } else { - // Update - candle.high = tick.price.max(candle.high); - candle.low = tick.price.min(candle.low); - candle.close = tick.price; - } - }); - - // 3. INDEX: Ensure the new candle is discoverable - // Note: Input is 'candle' (OHLC), not 'tick' - ohlc_index.compute(|candle| TimeKey { - symbol: candle.symbol, - timestamp: candle.timestamp / 100_000, - }); - }); - - engine.run_worker(move || { - ohlc_reader.next(); - - simple_strategy_pipeline - .from(&ohlc_reader) - .to(&mut simple_strategy) - .reduce(2, |candle| { - let cur = candle[1]; - let prev = candle[0]; - - if cur.close > prev.close { - return Some(Signal { - symbol: cur.symbol, - timestamp: cur.timestamp, - direction: 1, - size: min(100, (cur.close - prev.close) as u32), - }); - } - - None - }) - }); -} diff --git a/examples/sensor_test.rs b/examples/sensor_test.rs new file mode 100644 index 0000000..04b0724 --- /dev/null +++ b/examples/sensor_test.rs @@ -0,0 +1,188 @@ +use bytemuck::{Pod, Zeroable}; +use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; +use roda_state::{Aggregator, RodaEngine, Window}; +use std::thread; +use std::time::Duration; + +/// Raw sensor reading +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +/// Statistical summary of readings for a time window +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +/// Key used for partitioning and indexing summaries +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +/// Alert generated when an anomaly is detected +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} + +fn main() { + let engine = RodaEngine::new(); + + // 1. SETUP STORES + // Stores are bounded, pre-allocated buffers for your state. + let mut reading_store = engine.store::(StoreOptions { + name: "readings", + size: 1000, + in_memory: true, + }); + let reading_reader = reading_store.reader(); + + let mut summary_store = engine.store::(StoreOptions { + name: "summaries", + size: 100, + in_memory: true, + }); + let summary_reader = summary_store.reader(); + + let mut alert_store = engine.store::(StoreOptions { + name: "alerts", + size: 100, + in_memory: true, + }); + let alert_reader_for_print = alert_store.reader(); + + // Secondary index to look up summaries by sensor and time + let summary_index = summary_store.direct_index::(); + let summary_index_reader = summary_index.reader(); + + // 2. DEFINE PIPELINES + let summary_pipeline: Aggregator = Aggregator::new(); + let alert_pipeline: Window = Window::new(); + + // 3. WORKER: Aggregate readings into summaries + engine.run_worker(move || { + reading_reader.next(); // Wait for data + + summary_pipeline + .from(&reading_reader) + .to(&mut summary_store) + .partition_by(|r| SensorKey { + sensor_id: r.sensor_id, + timestamp: r.timestamp / 100_000, + }) + .reduce(|idx, r, s| { + if idx == 0 { + *s = Summary { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + }; + } else { + s.min = s.min.min(r.value); + s.max = s.max.max(r.value); + s.avg = (s.avg * s.count as f64 + r.value) / (s.count + 1) as f64; + s.count += 1; + } + }); + + // Update the index so summaries can be found by key + summary_index.compute(|s| SensorKey { + sensor_id: s.sensor_id, + timestamp: s.timestamp / 100_000, + }); + }); + + // 4. WORKER: Detect anomalies from summaries + engine.run_worker(move || { + summary_reader.next(); // Wait for data + + alert_pipeline + .from(&summary_reader) + .to(&mut alert_store) + .reduce(2, |window| { + let (prev, cur) = (window[0], window[1]); + + // Alert if average value jumps by more than 50% + if cur.avg > prev.avg * 1.5 { + Some(Alert { + sensor_id: cur.sensor_id, + timestamp: cur.timestamp, + severity: 1, + ..Default::default() + }) + } else { + None + } + }); + }); + + // 5. INGEST DATA + println!("Pushing sensor readings..."); + let readings = [ + Reading::from(1, 10.0, 10_000), + Reading::from(1, 12.0, 20_000), + Reading::from(1, 12.0, 30_000), + Reading::from(1, 12.0, 40_000), + Reading::from(1, 20.0, 110_000), // Average jump here + Reading::from(1, 22.0, 120_000), + Reading::from(1, 22.0, 220_000), + Reading::from(1, 22.0, 320_000), + ]; + + for r in readings { + reading_store.push(r); + } + + // Give workers a moment to process + thread::sleep(Duration::from_millis(100)); + + // 6. DISPLAY RESULTS + println!("\nSummaries in Index:"); + for (_, summary) in summary_index_reader.iter() { + println!( + "Sensor {} at {}: Avg={:.2}, Count={}", + summary.sensor_id, summary.timestamp, summary.avg, summary.count + ); + } + + println!("\nAlerts Detected:"); + while alert_reader_for_print.next() { + if let Some(alert) = alert_reader_for_print.get() { + println!( + "ALERT: Sensor {} anomaly at {}", + alert.sensor_id, alert.timestamp + ); + } + } +} diff --git a/src/components.rs b/src/components.rs index d99deb3..b9213c5 100644 --- a/src/components.rs +++ b/src/components.rs @@ -45,9 +45,11 @@ pub trait Index { type Reader: IndexReader; fn compute(&self, key_fn: impl FnOnce(&State) -> Key); fn reader(&self) -> Self::Reader; + fn iter(&self) -> impl Iterator + '_; } pub trait IndexReader { fn with(&self, key: &Key, handler: impl FnOnce(&State) -> R) -> Option; fn get(&self, key: &Key) -> Option; + fn iter(&self) -> impl Iterator + '_; } diff --git a/src/index.rs b/src/index.rs index 808b99a..a748c38 100644 --- a/src/index.rs +++ b/src/index.rs @@ -32,6 +32,10 @@ where map: self.map.clone(), } } + + fn iter(&self) -> impl Iterator + '_ { + self.map.iter().map(|entry| (*entry.key(), *entry.value())) + } } impl IndexReader for DirectIndexReader @@ -46,4 +50,8 @@ where fn get(&self, key: &Key) -> Option { self.map.get(key).map(|entry| *entry.value()) } + + fn iter(&self) -> impl Iterator + '_ { + self.map.iter().map(|entry| (*entry.key(), *entry.value())) + } } diff --git a/tests/index_tests.rs b/tests/index_tests.rs index d9660ee..f7e3b3b 100644 --- a/tests/index_tests.rs +++ b/tests/index_tests.rs @@ -265,3 +265,30 @@ fn test_multiple_workers_reading_index_only_original_computes() { assert_eq!(reader1.get(&10), Some(1)); assert_eq!(reader2.get(&20), Some(2)); } + +#[test] +fn test_index_iterator() { + let engine = RodaEngine::new(); + let mut store = engine.store::(StoreOptions { + name: "test", + size: 1024, + in_memory: true, + }); + let index = store.direct_index::(); + + for i in 0..5 { + store.push(i); + index.compute(|&x| x * 2); + } + + let reader = index.reader(); + let items: Vec<_> = reader.iter().collect(); + + assert_eq!(items.len(), 5); + let expected = vec![(0, 0), (2, 1), (4, 2), (6, 3), (8, 4)]; + assert_eq!(items, expected); + + // Test Index::iter too + let items_from_index: Vec<_> = index.iter().collect(); + assert_eq!(items_from_index, expected); +} From 5cea96826151bfeec83e99ea3cd19e36213a2f1e Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Fri, 13 Feb 2026 15:04:50 +0100 Subject: [PATCH 02/11] improve benchmarking by introducing latency measurer --- Cargo.lock | 93 ++++++++++++++ Cargo.toml | 1 + benches/comprehensive_bench.rs | 29 ++++- benches/store_bench.rs | 35 +++++- examples/databento_replay.rs | 107 ++++------------ examples/sensor_test.rs | 2 +- src/components.rs | 2 +- src/engine.rs | 40 ++++-- src/lib.rs | 1 + src/measure/latency_measurer.rs | 208 ++++++++++++++++++++++++++++++++ src/measure/mod.rs | 2 + tests/aggregator_tests.rs | 2 +- tests/index_tests.rs | 20 +-- tests/window_tests.rs | 12 +- 14 files changed, 434 insertions(+), 120 deletions(-) create mode 100644 src/measure/latency_measurer.rs create mode 100644 src/measure/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 1ea9913..e0ecff9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -79,6 +85,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "bumpalo" version = "3.19.1" @@ -105,6 +117,12 @@ dependencies = [ "syn", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cast" version = "0.3.0" @@ -202,6 +220,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.5.1" @@ -238,6 +265,15 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -363,6 +399,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -392,6 +438,20 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64", + "byteorder", + "crossbeam-channel", + "flate2", + "nom", + "num-traits", +] + [[package]] name = "heck" version = "0.5.0" @@ -497,6 +557,32 @@ dependencies = [ "libc", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-conv" version = "0.2.0" @@ -690,6 +776,7 @@ dependencies = [ "criterion", "crossbeam-skiplist", "dbn", + "hdrhistogram", "memmap2", "zstd", ] @@ -764,6 +851,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "strsim" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index 48c3ac4..925166b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ bytemuck = {version = "1.25.0", features = ["derive"]} memmap2 = "0.9.9" crossbeam-skiplist = "0.1" clap = { version = "4.5.57", features = ["derive"] } +hdrhistogram = "7.5" [dev-dependencies] assert_no_alloc = { version = "1.1.2" } diff --git a/benches/comprehensive_bench.rs b/benches/comprehensive_bench.rs index a976741..47af6c8 100644 --- a/benches/comprehensive_bench.rs +++ b/benches/comprehensive_bench.rs @@ -1,6 +1,7 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, black_box, criterion_group, criterion_main}; use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; +use roda_state::measure::LatencyMeasurer; use roda_state::{Aggregator, RodaEngine, Window}; #[derive(Clone, Copy, Zeroable, Pod, Default)] @@ -42,8 +43,10 @@ fn bench_index(c: &mut Criterion) { let index = store.direct_index::(); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("index_compute_10k", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); let reader = store.reader(); let index = store.direct_index::(); while reader.next() { @@ -51,6 +54,7 @@ fn bench_index(c: &mut Criterion) { } }); }); + println!("index_compute_10k latency:{}", measurer.format_stats()); // Pre-compute index for lookup bench let reader = store.reader(); @@ -59,14 +63,18 @@ fn bench_index(c: &mut Criterion) { } let index_reader = index.reader(); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("index_lookup", |b| { let mut i = 0u32; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(index_reader.get(&(i % 10000))); i += 1; }); }); + println!("index_lookup latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("index_incremental_compute", |b| { let mut i = 10000u32; let reader = store.reader(); @@ -76,6 +84,7 @@ fn bench_index(c: &mut Criterion) { } b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); store.push(RawData { id: i, value: i as f64, @@ -86,6 +95,7 @@ fn bench_index(c: &mut Criterion) { i += 1; }); }); + println!("index_incremental_compute latency:{}", measurer.format_stats()); group.finish(); } @@ -109,11 +119,13 @@ fn bench_aggregator(c: &mut Criterion) { let source_reader = source.reader(); let aggregator: Aggregator = Aggregator::new(); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function( format!("aggregator_reduce_step_{}_partitions", num_partitions), |b| { let mut i = 0u32; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); source.push(RawData { id: i % num_partitions, value: 1.0, @@ -133,6 +145,11 @@ fn bench_aggregator(c: &mut Criterion) { }); }, ); + println!( + "aggregator_reduce_step_{}_partitions latency:{}", + num_partitions, + measurer.format_stats() + ); } group.finish(); @@ -158,9 +175,11 @@ fn bench_window(c: &mut Criterion) { let window: Window = Window::new(); for window_size in [10, 100] { + let mut measurer = LatencyMeasurer::new(1000); group.bench_function(format!("window_reduce_size_{}", window_size), |b| { let mut i = 0u32; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); source.push(RawData { id: i, value: i as f64, @@ -181,6 +200,11 @@ fn bench_window(c: &mut Criterion) { i += 1; }); }); + println!( + "window_reduce_size_{} latency:{}", + window_size, + measurer.format_stats() + ); } group.finish(); @@ -213,9 +237,11 @@ fn bench_mixed(c: &mut Criterion) { let aggregator: Aggregator = Aggregator::new(); let window: Window = Window::new(); - group.bench_function("mixed_push_agg_window", |b| { + let mut measurer = LatencyMeasurer::new(1000); + group.bench_function("mixed_pipeline", |b| { let mut i = 0u32; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); // Push to S1 s1.push(RawData { id: i % 10, @@ -250,6 +276,7 @@ fn bench_mixed(c: &mut Criterion) { i += 1; }); }); + println!("mixed_pipeline latency:{}", measurer.format_stats()); group.finish(); } diff --git a/benches/store_bench.rs b/benches/store_bench.rs index 48ad295..988898a 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -2,7 +2,9 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; use roda_state::RodaEngine; use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::measure::LatencyMeasurer; use std::hint::black_box; +use std::time::Instant; #[derive(Clone, Copy, Zeroable, Pod)] #[repr(C)] @@ -11,7 +13,8 @@ struct LargeState { } fn bench_push(c: &mut Criterion) { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); let mut group = c.benchmark_group("push"); // 1GB buffer to ensure we don't overflow during benchmarking @@ -23,13 +26,16 @@ fn bench_push(c: &mut Criterion) { }); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("push_u64", |b| { let mut val = 0u64; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); store_u64.push(black_box(val)); val += 1; }); }); + println!("push_u64 latency:{}", measurer.format_stats()); let mut store_large = engine.store::(StoreOptions { name: "bench_push_large", @@ -37,18 +43,22 @@ fn bench_push(c: &mut Criterion) { in_memory: true, }); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("push_128b", |b| { let val = LargeState { data: [42; 16] }; b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); store_large.push(black_box(val)); }); }); + println!("push_128b latency:{}", measurer.format_stats()); group.finish(); } fn bench_fetch(c: &mut Criterion) { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); let mut group = c.benchmark_group("fetch"); let size = 1024 * 1024 * 100; // 100MB @@ -65,17 +75,23 @@ fn bench_fetch(c: &mut Criterion) { let reader = store.reader(); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_at_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_at(black_box(5000))); }); }); + println!("get_at_u64 latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_last_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_last()); }); }); + println!("get_last_u64 latency:{}", measurer.format_stats()); let mut store_large = engine.store::(StoreOptions { name: "bench_fetch_large", @@ -87,25 +103,32 @@ fn bench_fetch(c: &mut Criterion) { } let reader_large = store_large.reader(); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_at_128b", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader_large.get_at(black_box(5000))); }); }); + println!("get_at_128b latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("next_get_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); if reader.next() { black_box(reader.get()); } }); }); + println!("next_get_u64 latency:{}", measurer.format_stats()); group.finish(); } fn bench_window(c: &mut Criterion) { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); let mut group = c.benchmark_group("window"); let size = 1024 * 1024 * 100; // 100MB @@ -122,17 +145,23 @@ fn bench_window(c: &mut Criterion) { let reader = store.reader(); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_window_10", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_window::<10>(black_box(5000))); }); }); + println!("get_window_10 latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_window_100", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_window::<100>(black_box(5000))); }); }); + println!("get_window_100 latency:{}", measurer.format_stats()); group.finish(); } diff --git a/examples/databento_replay.rs b/examples/databento_replay.rs index c788bd8..e6804e5 100644 --- a/examples/databento_replay.rs +++ b/examples/databento_replay.rs @@ -4,14 +4,13 @@ use std::time::Instant; use bytemuck::{Pod, Zeroable}; use clap::Parser; -use dbn::decode::{DbnDecoder as Decoder, DbnMetadata, DecodeRecordRef}; -use dbn::enums::{Action, rtype, Side, SType}; +use dbn::decode::{DbnDecoder as Decoder, DecodeRecordRef}; +use dbn::enums::{Action, rtype, Side}; use dbn::record::MboMsg; use dbn::Record; -use dbn::SymbolIndex; // Use your specific high-level API modules -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; use roda_state::{RodaEngine, Window}; // ============================================================================== @@ -52,8 +51,6 @@ struct OrderInfo { struct Args { #[arg(long)] file: PathBuf, - #[arg(long, default_value = "NVDA")] - symbol: String, } // ============================================================================== @@ -64,7 +61,7 @@ fn main() -> Result<(), Box> { let args = Args::parse(); println!("[System] Booting Roda Showcase (Declarative Mode)..."); - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); // A. RESOURCES // -------------------------------------------------------------------------- @@ -77,7 +74,8 @@ fn main() -> Result<(), Box> { let market_reader = market_store.reader(); // We create an index to look up BBO by Instrument ID - let _market_index = market_store.direct_index::(); + // We will use this for partitioning later + // let _market_index = market_store.direct_index::(); // 2. Signal Store (The Output of our Strategy) let mut signal_store = engine.store::(StoreOptions { @@ -134,6 +132,12 @@ fn main() -> Result<(), Box> { }); }); + // 3. Partitioning: we can use direct index to partition by symbol + let market_index = market_store.direct_index::(); + engine.run_worker(move || { + market_index.compute(|bbo| bbo.instrument_id); + }); + // --- WORKER 2: FEED HANDLER (The Data Source) --- // Since this reads from a File (Zstd) and not a Roda Store, // we run it as the "Driver" on the main thread (or a separate spawn). @@ -146,79 +150,9 @@ fn main() -> Result<(), Box> { // 1. Setup Decoder let mut decoder = Decoder::from_zstd_file(&args.file)?; - // 2. Resolve Symbology - let metadata = decoder.metadata(); - // Prefer robust mapping using metadata symbol map for the start date - let date = metadata.start().date(); - let pit_map = metadata.symbol_map_for_date(date)?; - // Find instrument_id for the requested symbol - let mut target_id_opt = pit_map - .inner() - .iter() - .find_map(|(iid, sym)| if sym == &args.symbol { Some(*iid) } else { None }); - - if target_id_opt.is_none() { - // Fallback: resolve via mappings depending on stype_in/out - target_id_opt = match (metadata.stype_in, metadata.stype_out) { - (Some(SType::RawSymbol), SType::InstrumentId) => metadata - .mappings - .iter() - .find(|m| m.raw_symbol == args.symbol) - .and_then(|m| m.intervals.first()) - .and_then(|i| i.symbol.parse::().ok()), - (Some(SType::InstrumentId), SType::RawSymbol) => metadata - .mappings - .iter() - .find_map(|m| { - if m.intervals.iter().any(|iv| iv.symbol == args.symbol) { - m.raw_symbol.parse::().ok() - } else { - None - } - }), - _ => None, - }; - } - - // Final fallback: if still not found, try instrument defs; if still not found, pick first symbol in map - let (target_id, resolved_symbol) = if let Some(id) = target_id_opt { - (id, args.symbol.clone()) - } else { - let mut resolver = Decoder::from_zstd_file(&args.file)?; - let mut found: Option = None; - while let Some(rec) = resolver.decode_record_ref()? { - if rec.header().rtype == rtype::INSTRUMENT_DEF { - if let Ok(def) = rec.try_get::() { - if let Ok(sym) = dbn::record::c_chars_to_str(&def.raw_symbol) { - if sym == args.symbol { - found = Some(def.hd.instrument_id); - break; - } - } - } - } - } - if let Some(id) = found { - (id, args.symbol.clone()) - } else { - // Last resort: pick first available symbol from the map - if let Some((iid, sym)) = pit_map.inner().iter().next() { - eprintln!( - "[Writer] Warning: symbol '{}' not found. Falling back to '{}' (iid={}).", - args.symbol, sym, iid - ); - (*iid, sym.clone()) - } else { - panic!("Symbol not found and no mappings available") - } - } - }; - - println!("[Writer] Mapped {} -> ID {}", resolved_symbol, target_id); - // 3. Local State (Order Book Reconstruction) - let mut book = HashMap::::new(); - let mut last_bbo = (0i64, i64::MAX); // Bid, Ask + let mut books = HashMap::>::new(); + let mut last_bbos = HashMap::::new(); // instrument_id -> (Bid, Ask) // 4. Hot Loop while let Some(record) = decoder.decode_record_ref()? { @@ -227,9 +161,10 @@ fn main() -> Result<(), Box> { } let msg = record.get::().unwrap(); - if msg.hd.instrument_id != target_id { - continue; - } + let instrument_id = msg.hd.instrument_id; + + let book = books.entry(instrument_id).or_default(); + let last_bbo = last_bbos.entry(instrument_id).or_insert((0i64, i64::MAX)); let action = Action::try_from(msg.action as u8).unwrap_or(Action::None); let side = Side::try_from(msg.side as u8).unwrap_or(Side::None); @@ -264,12 +199,12 @@ fn main() -> Result<(), Box> { // Compute BBO and Push to Roda Store if changed { - let (bid, ask, b_sz, a_sz) = compute_bbo(&book); + let (bid, ask, b_sz, a_sz) = compute_bbo(book); if bid != last_bbo.0 || ask != last_bbo.1 { let update = BboUpdate { ts: msg.hd.ts_event, - instrument_id: target_id, + instrument_id, bid_px: bid, ask_px: ask, bid_sz: b_sz, @@ -277,7 +212,7 @@ fn main() -> Result<(), Box> { _pad0: 0, }; market_store.push(update); - last_bbo = (bid, ask); + *last_bbo = (bid, ask); count += 1; } } diff --git a/examples/sensor_test.rs b/examples/sensor_test.rs index 04b0724..51b178a 100644 --- a/examples/sensor_test.rs +++ b/examples/sensor_test.rs @@ -54,7 +54,7 @@ pub struct Alert { } fn main() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); // 1. SETUP STORES // Stores are bounded, pre-allocated buffers for your state. diff --git a/src/components.rs b/src/components.rs index b9213c5..9f0049f 100644 --- a/src/components.rs +++ b/src/components.rs @@ -8,7 +8,7 @@ pub struct StoreOptions { } pub trait Engine { - fn run_worker(&self, runnable: impl FnMut() + Send + 'static); + fn run_worker(&mut self, runnable: impl FnMut() + Send + 'static); fn store(&self, options: StoreOptions) -> impl Store + 'static; } diff --git a/src/engine.rs b/src/engine.rs index fe57bf1..9b685d6 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,23 +1,40 @@ use crate::components::{Engine, Store, StoreOptions}; +use crate::measure::latency_measurer::LatencyMeasurer; use crate::store::StoreJournal; use bytemuck::Pod; use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::thread; +use std::time::Instant; pub struct RodaEngine { root_path: &'static str, running: Arc, + enable_latency_stats: bool, + worker_handlers: Vec>, } impl Engine for RodaEngine { - fn run_worker(&self, mut runnable: impl FnMut() + Send + 'static) { + fn run_worker(&mut self, mut runnable: impl FnMut() + Send + 'static) { + let worker_id = self.worker_handlers.len(); let running = self.running.clone(); - thread::spawn(move || { - while running.load(std::sync::atomic::Ordering::Relaxed) { - runnable(); + let enable_latency_stats = self.enable_latency_stats; + let handler = thread::spawn(move || { + if enable_latency_stats { + let mut measurer = LatencyMeasurer::new(1000); + while running.load(std::sync::atomic::Ordering::Relaxed) { + let instant = Instant::now(); + runnable(); + measurer.measure(instant.elapsed()); + } + println!("[Worker:{}]{}", worker_id, measurer.format_stats()); + } else { + while running.load(std::sync::atomic::Ordering::Relaxed) { + runnable(); + } } }); + self.worker_handlers.push(handler); } fn store(&self, options: StoreOptions) -> impl Store + 'static { @@ -30,6 +47,8 @@ impl RodaEngine { Self { root_path: "data", running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], } } @@ -37,8 +56,14 @@ impl RodaEngine { Self { root_path, running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], } } + + pub fn enable_latency_stats(&mut self, enable: bool) { + self.enable_latency_stats = enable; + } } impl Default for RodaEngine { @@ -46,10 +71,3 @@ impl Default for RodaEngine { Self::new() } } - -impl Drop for RodaEngine { - fn drop(&mut self) { - self.running - .store(false, std::sync::atomic::Ordering::Relaxed); - } -} diff --git a/src/lib.rs b/src/lib.rs index df35bc3..a23200e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ pub mod index; mod storage; pub mod store; pub mod window; +pub mod measure; pub use crate::aggregator::Aggregator; pub use crate::engine::RodaEngine; diff --git a/src/measure/latency_measurer.rs b/src/measure/latency_measurer.rs new file mode 100644 index 0000000..af9cb4e --- /dev/null +++ b/src/measure/latency_measurer.rs @@ -0,0 +1,208 @@ +use hdrhistogram::Histogram; +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone, Default)] +pub struct LatencyStats { + pub count: u64, + pub min: u64, + pub max: u64, + pub mean: f64, + pub p50: u64, + pub p90: u64, + pub p99: u64, + pub p999: u64, + pub p9999: u64, +} + +pub struct LatencyMeasurerGuard<'a> { + measurer: &'a mut LatencyMeasurer, + start: Option, +} + +impl Drop for LatencyMeasurerGuard<'_> { + fn drop(&mut self) { + if let Some(start) = self.start { + self.measurer.measure_local(start.elapsed()); + } + } +} + +/// A latency measurer that uses hdrhistogram. +pub struct LatencyMeasurer { + histogram: Histogram, + sum: u64, + step_instant: Instant, + sample_rate: u64, + step: u64, +} + +impl LatencyMeasurer { + pub fn new(sample_rate: u64) -> Self { + assert!(sample_rate > 0, "sample_rate must be positive"); + // Range: 1ns to 1,000s (1,000,000,000,000 ns) + // 3 significant figures + let histogram = Histogram::::new_with_bounds(1, 1_000_000_000_000, 3).unwrap(); + Self { + histogram, + sum: 0, + sample_rate, + step_instant: Instant::now(), + step: 0, + } + } + + pub fn measure(&mut self, duration: Duration) { + self.step += 1; + if self.step % self.sample_rate != 0 { + return; + } + + self.measure_local(duration); + } + + fn measure_local(&mut self, duration: Duration) { + let count = self.sample_rate; + let nanos = duration.as_nanos() as u64; + let nanos = nanos.clamp(1, 1_000_000_000_000); + + self.histogram.record_n(nanos, count).unwrap(); + self.sum += nanos; + } + + pub fn measure_with_guard(&mut self) -> LatencyMeasurerGuard<'_> { + self.step += 1; + if self.step % self.sample_rate != 0 { + return LatencyMeasurerGuard { + measurer: self, + start: None, + }; + } + LatencyMeasurerGuard { + measurer: self, + start: Some(Instant::now()), + } + } + + pub fn step_measure(&mut self) { + self.step += 1; + if self.step % self.sample_rate != 0 { + return; + } + let elapsed = self.step_instant.elapsed(); + self.measure(elapsed); + self.step_instant = Instant::now(); + } + + pub fn reset(&mut self) { + self.histogram.reset(); + self.sum = 0; + } + + pub fn get_stats(&self) -> LatencyStats { + let count = self.histogram.len(); + if count == 0 { + return LatencyStats::default(); + } + + LatencyStats { + count, + min: self.histogram.min(), + max: self.histogram.max(), + mean: self.histogram.mean(), + p50: self.histogram.value_at_quantile(0.5), + p90: self.histogram.value_at_quantile(0.9), + p99: self.histogram.value_at_quantile(0.99), + p999: self.histogram.value_at_quantile(0.999), + p9999: self.histogram.value_at_quantile(0.9999), + } + } + + pub fn format_stats(&self) -> String { + let stats = self.get_stats(); + if stats.count == 0 { + return "No stats collected yet".into(); + } + + format!( + "count={},\tmin={},\tmax={},\tmean={},\tp50={},\tp90={},\tp99={},\tp999={},\tp9999={}", + Self::format_count(stats.count), + Self::format_duration(stats.min as f64), + Self::format_duration(stats.max as f64), + Self::format_duration(stats.mean), + Self::format_duration(stats.p50 as f64), + Self::format_duration(stats.p90 as f64), + Self::format_duration(stats.p99 as f64), + Self::format_duration(stats.p999 as f64), + Self::format_duration(stats.p9999 as f64), + ) + } + + fn format_count(count: u64) -> String { + if count < 1000 { + count.to_string() + } else if count < 1_000_000 { + let val = count as f64 / 1000.0; + if val == val.floor() { + format!("{:.0}k", val) + } else { + format!("{:.1}k", val) + } + } else { + let val = count as f64 / 1_000_000.0; + if val == val.floor() { + format!("{:.0}M", val) + } else { + format!("{:.1}M", val) + } + } + } + + fn format_duration(nanos: f64) -> String { + if nanos < 1000.0 { + if nanos == nanos.floor() { + format!("{:.0}ns", nanos) + } else { + format!("{:.1}ns", nanos) + } + } else if nanos < 1_000_000.0 { + let val = nanos / 1000.0; + if val == val.floor() { + format!("{:.0}us", val) + } else { + format!("{:.1}us", val) + } + } else if nanos < 1_000_000_000.0 { + let val = nanos / 1_000_000.0; + if val == val.floor() { + format!("{:.0}ms", val) + } else { + let s = format!("{:.2}ms", val); + if s.ends_with("0ms") { + format!("{:.1}ms", val) + } else { + s + } + } + } else { + let val = nanos / 1_000_000_000.0; + if val == val.floor() { + format!("{:.0}s", val) + } else { + let s = format!("{:.2}s", val); + if s.ends_with("0s") { + format!("{:.1}s", val) + } else { + s + } + } + } + } + + pub fn is_outlier(&self, duration: Duration) -> bool { + let stats = self.get_stats(); + if stats.count < 100 { + return false; + } + duration.as_nanos() as u64 > stats.p999 + } +} diff --git a/src/measure/mod.rs b/src/measure/mod.rs new file mode 100644 index 0000000..69c1e95 --- /dev/null +++ b/src/measure/mod.rs @@ -0,0 +1,2 @@ +pub mod latency_measurer; +pub use latency_measurer::{LatencyMeasurer, LatencyStats}; \ No newline at end of file diff --git a/tests/aggregator_tests.rs b/tests/aggregator_tests.rs index c0337cb..406c1dd 100644 --- a/tests/aggregator_tests.rs +++ b/tests/aggregator_tests.rs @@ -363,7 +363,7 @@ fn test_aggregator_large_index() { #[test] fn test_aggregator_worker_large() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 2000, diff --git a/tests/index_tests.rs b/tests/index_tests.rs index f7e3b3b..2b11097 100644 --- a/tests/index_tests.rs +++ b/tests/index_tests.rs @@ -13,7 +13,7 @@ struct ComplexKey { #[test] fn test_index_multiple_values() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -38,7 +38,7 @@ fn test_index_multiple_values() { #[test] fn test_multiple_indices_on_same_store() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -62,7 +62,7 @@ fn test_multiple_indices_on_same_store() { #[test] fn test_index_complex_key() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -95,7 +95,7 @@ fn test_index_complex_key() { #[test] fn test_index_shallow_clone_sharing() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -114,7 +114,7 @@ fn test_index_shallow_clone_sharing() { #[test] fn test_index_collision_overwrite() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -136,7 +136,7 @@ fn test_index_collision_overwrite() { #[test] fn test_index_not_found() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -154,7 +154,7 @@ fn test_index_not_found() { #[test] fn test_concurrent_push_and_index() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -187,7 +187,7 @@ fn test_concurrent_push_and_index() { #[test] fn test_run_worker_with_multiple_stores() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store_u32 = engine.store::(StoreOptions { name: "test", size: 1024, @@ -240,7 +240,7 @@ fn test_run_worker_with_multiple_stores() { #[test] fn test_multiple_workers_reading_index_only_original_computes() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, @@ -268,7 +268,7 @@ fn test_multiple_workers_reading_index_only_original_computes() { #[test] fn test_index_iterator() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut store = engine.store::(StoreOptions { name: "test", size: 1024, diff --git a/tests/window_tests.rs b/tests/window_tests.rs index 16f173e..c81de3d 100644 --- a/tests/window_tests.rs +++ b/tests/window_tests.rs @@ -18,7 +18,7 @@ pub struct Analysis { #[test] fn test_window_filling_and_sliding() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 10, @@ -74,7 +74,7 @@ fn test_window_filling_and_sliding() { #[test] fn test_window_size_one() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 10, @@ -124,7 +124,7 @@ fn test_window_size_one() { #[test] fn test_window_large_sliding() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 100, @@ -181,7 +181,7 @@ fn test_window_large_sliding() { #[test] fn test_window_worker_large() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 2000, @@ -232,7 +232,7 @@ fn test_window_worker_large() { #[test] fn test_window_max_value() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 10, @@ -275,7 +275,7 @@ fn test_window_max_value() { fn test_window_all_none_until_full() { use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let mut source = engine.store::(StoreOptions { name: "source", size: 10, From f2acafceebf99f846f4051a2eecf35e2275c4141 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sat, 14 Feb 2026 18:34:58 +0100 Subject: [PATCH 03/11] refactorings. remove traits and move implementations. create new store type named SlotStore --- Cargo.lock | 497 +++++++++++++++++- Cargo.toml | 6 +- benches/comprehensive_bench.rs | 38 +- benches/store_bench.rs | 26 +- examples/databento_replay.rs | 259 --------- examples/databento_replay/README.md | 38 ++ examples/databento_replay/book_level_entry.rs | 11 + examples/databento_replay/importer.rs | 46 ++ examples/databento_replay/light_mbo_entry.rs | 57 ++ examples/databento_replay/main.rs | 170 ++++++ .../{sensor_test.rs => sensor_test/main.rs} | 13 +- src/aggregator.rs | 47 +- src/components.rs | 55 +- src/direct_index.rs | 123 +++++ src/engine.rs | 96 +++- src/index.rs | 57 -- src/journal_store.rs | 185 +++++++ src/lib.rs | 12 +- src/measure/latency_measurer.rs | 3 +- src/measure/mod.rs | 2 +- src/op_counter.rs | 31 ++ src/slot_store.rs | 90 ++++ src/storage/journal_mmap.rs | 335 ++++++++++++ src/storage/mmap_journal.rs | 134 ----- src/storage/mod.rs | 3 +- src/storage/slot_mmap.rs | 348 ++++++++++++ src/store.rs | 137 ----- src/window.rs | 26 +- tests/aggregator_tests.rs | 83 +-- tests/comprehensive_tests.rs | 57 +- tests/index_tests.rs | 164 +++++- tests/journal_tests.rs | 17 +- tests/logic_tests.rs | 17 +- tests/push_read_tests.rs | 53 +- tests/store_no_alloc_tests.rs | 44 +- tests/window_tests.rs | 39 +- 36 files changed, 2384 insertions(+), 935 deletions(-) delete mode 100644 examples/databento_replay.rs create mode 100644 examples/databento_replay/README.md create mode 100644 examples/databento_replay/book_level_entry.rs create mode 100644 examples/databento_replay/importer.rs create mode 100644 examples/databento_replay/light_mbo_entry.rs create mode 100644 examples/databento_replay/main.rs rename examples/{sensor_test.rs => sensor_test/main.rs} (92%) create mode 100644 src/direct_index.rs delete mode 100644 src/index.rs create mode 100644 src/journal_store.rs create mode 100644 src/op_counter.rs create mode 100644 src/slot_store.rs create mode 100644 src/storage/journal_mmap.rs delete mode 100644 src/storage/mmap_journal.rs create mode 100644 src/storage/slot_mmap.rs delete mode 100644 src/store.rs diff --git a/Cargo.lock b/Cargo.lock index e0ecff9..3ab1b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,24 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -73,12 +91,30 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "arc-swap" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" +dependencies = [ + "rustversion", +] + [[package]] name = "assert_no_alloc" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55ca83137a482d61d916ceb1eba52a684f98004f18e0cafea230fe5579c178a3" +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -91,6 +127,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + [[package]] name = "bumpalo" version = "3.19.1" @@ -147,6 +189,19 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -220,6 +275,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "crc32fast" version = "1.5.0" @@ -231,25 +292,24 @@ dependencies = [ [[package]] name = "criterion" -version = "0.5.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ + "alloca", "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", "itertools", "num-traits", - "once_cell", "oorandom", + "page_size", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -257,9 +317,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.5.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools", @@ -375,6 +435,12 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "either" version = "1.15.0" @@ -387,6 +453,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "erased-serde" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" +dependencies = [ + "serde", + "serde_core", + "typeid", +] + [[package]] name = "fallible-streaming-iterator" version = "0.1.9" @@ -448,7 +525,7 @@ dependencies = [ "byteorder", "crossbeam-channel", "flate2", - "nom", + "nom 7.1.3", "num-traits", ] @@ -464,6 +541,30 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -493,9 +594,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -542,6 +643,21 @@ version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + [[package]] name = "memchr" version = "2.8.0" @@ -583,6 +699,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "num-conv" version = "0.2.0" @@ -644,6 +769,39 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135cef32720c6746450d910890b0b69bcba2bbf6f85c9f4583df13fe415de828" +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -737,6 +895,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.12.3" @@ -778,7 +945,16 @@ dependencies = [ "dbn", "hdrhistogram", "memmap2", - "zstd", + "spdlog-rs", +] + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", ] [[package]] @@ -802,6 +978,18 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -812,6 +1000,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_buf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc948de1bbead18a61be0b33182636603ea0239ca2577b9704fc39eba900e4e5" +dependencies = [ + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -832,6 +1029,15 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_fmt" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e497af288b3b95d067a23a4f749f2861121ffcb2f6d8379310dcda040c345ed" +dependencies = [ + "serde_core", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -857,12 +1063,163 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "spdlog-internal" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c4ffbdbc9f2d819ffb53ef00a253f524ba7bfd7a3aa8dcd50789b9b27be550" +dependencies = [ + "nom 8.0.0", + "strum", + "strum_macros", + "thiserror", +] + +[[package]] +name = "spdlog-macros" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b87a8a157696b61e2a87ed9753da2afb2a27c1e7490786fddf3e71d7e0c3b69e" +dependencies = [ + "proc-macro2", + "quote", + "spdlog-internal", + "syn", +] + +[[package]] +name = "spdlog-rs" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c3a3480e91f3f57be460a227415f08975bc39f667c5eff18a76b2dfbeb2f09b" +dependencies = [ + "arc-swap", + "atomic", + "bytemuck", + "chrono", + "dyn-clone", + "is-terminal", + "libc", + "once_cell", + "parking_lot", + "rustc_version", + "spdlog-macros", + "thiserror", + "value-bag", + "winapi", +] + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sval" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1aaf178a50bbdd86043fce9bf0a5867007d9b382db89d1c96ccae4601ff1ff9" + +[[package]] +name = "sval_buffer" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89273e48f03807ebf51c4d81c52f28d35ffa18a593edf97e041b52de143df89" +dependencies = [ + "sval", + "sval_ref", +] + +[[package]] +name = "sval_dynamic" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0430f4e18e7eba21a49d10d25a8dec3ce0e044af40b162347e99a8e3c3ced864" +dependencies = [ + "sval", +] + +[[package]] +name = "sval_fmt" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835f51b9d7331b9d7fc48fc716c02306fa88c4a076b1573531910c91a525882d" +dependencies = [ + "itoa", + "ryu", + "sval", +] + +[[package]] +name = "sval_json" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13cbfe3ef406ee2366e7e8ab3678426362085fa9eaedf28cb878a967159dced3" +dependencies = [ + "itoa", + "ryu", + "sval", +] + +[[package]] +name = "sval_nested" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b20358af4af787c34321a86618c3cae12eabdd0e9df22cd9dd2c6834214c518" +dependencies = [ + "sval", + "sval_buffer", + "sval_ref", +] + +[[package]] +name = "sval_ref" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5e500f8eb2efa84f75e7090f7fc43f621b9f8b6cde571c635b3855f97b332a" +dependencies = [ + "sval", +] + +[[package]] +name = "sval_serde" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2032ae39b11dcc6c18d5fbc50a661ea191cac96484c59ccf49b002261ca2c1" +dependencies = [ + "serde_core", + "sval", + "sval_nested", +] + [[package]] name = "syn" version = "2.0.114" @@ -965,6 +1322,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "unicode-ident" version = "1.0.23" @@ -977,6 +1340,43 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "value-bag" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" +dependencies = [ + "value-bag-serde1", + "value-bag-sval2", +] + +[[package]] +name = "value-bag-serde1" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16530907bfe2999a1773ca5900a65101e092c70f642f25cc23ca0c43573262c5" +dependencies = [ + "erased-serde", + "serde_buf", + "serde_core", + "serde_fmt", +] + +[[package]] +name = "value-bag-sval2" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00ae130edd690eaa877e4f40605d534790d1cf1d651e7685bd6a144521b251f" +dependencies = [ + "sval", + "sval_buffer", + "sval_dynamic", + "sval_fmt", + "sval_json", + "sval_ref", + "sval_serde", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -1051,6 +1451,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -1060,12 +1476,71 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.61.2" diff --git a/Cargo.toml b/Cargo.toml index 925166b..9d6b5e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,17 +6,17 @@ description = "A wait-free, cache-friendly state synchronization engine for HFT. authors = ["Your Name"] [dependencies] -bytemuck = {version = "1.25.0", features = ["derive"]} +bytemuck = { version = "1.25.0", features = ["derive"] } memmap2 = "0.9.9" crossbeam-skiplist = "0.1" clap = { version = "4.5.57", features = ["derive"] } hdrhistogram = "7.5" +spdlog-rs = "0.5.2" [dev-dependencies] assert_no_alloc = { version = "1.1.2" } -criterion = { version = "0.5", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } dbn = { version = "0.48.0" } -zstd = "0.13" clap = { version = "4.0", features = ["derive"] } [lib] diff --git a/benches/comprehensive_bench.rs b/benches/comprehensive_bench.rs index 47af6c8..56e4b6d 100644 --- a/benches/comprehensive_bench.rs +++ b/benches/comprehensive_bench.rs @@ -1,8 +1,7 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, black_box, criterion_group, criterion_main}; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; use roda_state::measure::LatencyMeasurer; -use roda_state::{Aggregator, RodaEngine, Window}; +use roda_state::{Aggregator, JournalStoreOptions, RodaEngine, Window}; #[derive(Clone, Copy, Zeroable, Pod, Default)] #[repr(C)] @@ -26,7 +25,7 @@ fn bench_index(c: &mut Criterion) { let mut group = c.benchmark_group("index"); let size = 16 * 1024 * 1024 * 1024; - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "bench_index_store", size, in_memory: true, @@ -34,7 +33,7 @@ fn bench_index(c: &mut Criterion) { // Fill data for i in 0..10000 { - store.push(RawData { + store.append(RawData { id: i as u32, value: i as f64, ..Default::default() @@ -85,7 +84,7 @@ fn bench_index(c: &mut Criterion) { b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - store.push(RawData { + store.append(RawData { id: i, value: i as f64, ..Default::default() @@ -95,7 +94,10 @@ fn bench_index(c: &mut Criterion) { i += 1; }); }); - println!("index_incremental_compute latency:{}", measurer.format_stats()); + println!( + "index_incremental_compute latency:{}", + measurer.format_stats() + ); group.finish(); } @@ -105,12 +107,12 @@ fn bench_aggregator(c: &mut Criterion) { let mut group = c.benchmark_group("aggregator"); for num_partitions in [10, 100, 1000] { - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "bench_agg_source", size: 8 * 1024 * 1024 * 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "bench_agg_target", size: 8 * 1024 * 1024 * 1024, in_memory: true, @@ -126,7 +128,7 @@ fn bench_aggregator(c: &mut Criterion) { let mut i = 0u32; b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - source.push(RawData { + source.append(RawData { id: i % num_partitions, value: 1.0, ..Default::default() @@ -136,7 +138,7 @@ fn bench_aggregator(c: &mut Criterion) { .from(&source_reader) .to(&mut target) .partition_by(|r| r.id) - .reduce(|_idx, r, s| { + .reduce(|_idx, r, s, _keep| { s.id = r.id; s.sum += r.value; s.count += 1; @@ -160,12 +162,12 @@ fn bench_window(c: &mut Criterion) { let mut group = c.benchmark_group("window_component"); let size = 8 * 1024 * 1024 * 1024; - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "bench_window_source", size, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "bench_window_target", size, in_memory: true, @@ -180,7 +182,7 @@ fn bench_window(c: &mut Criterion) { let mut i = 0u32; b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - source.push(RawData { + source.append(RawData { id: i, value: i as f64, ..Default::default() @@ -215,17 +217,17 @@ fn bench_mixed(c: &mut Criterion) { let mut group = c.benchmark_group("mixed_pipeline"); let size = 8 * 1024 * 1024 * 1024; - let mut s1 = engine.store::(StoreOptions { + let mut s1 = engine.new_journal_store::(JournalStoreOptions { name: "mixed_s1", size, in_memory: true, }); - let mut s2 = engine.store::(StoreOptions { + let mut s2 = engine.new_journal_store::(JournalStoreOptions { name: "mixed_s2", size, in_memory: true, }); - let mut s3 = engine.store::(StoreOptions { + let mut s3 = engine.new_journal_store::(JournalStoreOptions { name: "mixed_s3", size, in_memory: true, @@ -243,7 +245,7 @@ fn bench_mixed(c: &mut Criterion) { b.iter(|| { let _latency_guard = measurer.measure_with_guard(); // Push to S1 - s1.push(RawData { + s1.append(RawData { id: i % 10, value: 1.0, ..Default::default() @@ -255,7 +257,7 @@ fn bench_mixed(c: &mut Criterion) { .from(&r1) .to(&mut s2) .partition_by(|r| r.id) - .reduce(|_idx, r, s| { + .reduce(|_idx, r, s, _keep| { s.id = r.id; s.sum += r.value; s.count += 1; diff --git a/benches/store_bench.rs b/benches/store_bench.rs index 988898a..f0ad6fe 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -1,10 +1,8 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; -use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; use roda_state::measure::LatencyMeasurer; +use roda_state::{JournalStoreOptions, RodaEngine}; use std::hint::black_box; -use std::time::Instant; #[derive(Clone, Copy, Zeroable, Pod)] #[repr(C)] @@ -15,11 +13,11 @@ struct LargeState { fn bench_push(c: &mut Criterion) { let mut engine = RodaEngine::new(); engine.enable_latency_stats(true); - let mut group = c.benchmark_group("push"); + let mut group = c.benchmark_group("append"); // 1GB buffer to ensure we don't overflow during benchmarking let size = 16 * 1024 * 1024 * 1024; - let mut store_u64 = engine.store::(StoreOptions { + let mut store_u64 = engine.new_journal_store::(JournalStoreOptions { name: "bench_push_u64", size, in_memory: true, @@ -31,13 +29,13 @@ fn bench_push(c: &mut Criterion) { let mut val = 0u64; b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - store_u64.push(black_box(val)); + store_u64.append(black_box(val)); val += 1; }); }); println!("push_u64 latency:{}", measurer.format_stats()); - let mut store_large = engine.store::(StoreOptions { + let mut store_large = engine.new_journal_store::(JournalStoreOptions { name: "bench_push_large", size, in_memory: true, @@ -48,7 +46,7 @@ fn bench_push(c: &mut Criterion) { let val = LargeState { data: [42; 16] }; b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - store_large.push(black_box(val)); + store_large.append(black_box(val)); }); }); println!("push_128b latency:{}", measurer.format_stats()); @@ -62,7 +60,7 @@ fn bench_fetch(c: &mut Criterion) { let mut group = c.benchmark_group("fetch"); let size = 1024 * 1024 * 100; // 100MB - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "bench_fetch", size, in_memory: true, @@ -70,7 +68,7 @@ fn bench_fetch(c: &mut Criterion) { // Pre-fill some data for i in 0..10000 { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); @@ -93,13 +91,13 @@ fn bench_fetch(c: &mut Criterion) { }); println!("get_last_u64 latency:{}", measurer.format_stats()); - let mut store_large = engine.store::(StoreOptions { + let mut store_large = engine.new_journal_store::(JournalStoreOptions { name: "bench_fetch_large", size, in_memory: true, }); for _ in 0..10000 { - store_large.push(LargeState { data: [42; 16] }); + store_large.append(LargeState { data: [42; 16] }); } let reader_large = store_large.reader(); @@ -132,7 +130,7 @@ fn bench_window(c: &mut Criterion) { let mut group = c.benchmark_group("window"); let size = 1024 * 1024 * 100; // 100MB - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "bench_window", size, in_memory: true, @@ -140,7 +138,7 @@ fn bench_window(c: &mut Criterion) { // Pre-fill some data for i in 0..10000 { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); diff --git a/examples/databento_replay.rs b/examples/databento_replay.rs deleted file mode 100644 index e6804e5..0000000 --- a/examples/databento_replay.rs +++ /dev/null @@ -1,259 +0,0 @@ -use std::collections::HashMap; -use std::path::PathBuf; -use std::time::Instant; - -use bytemuck::{Pod, Zeroable}; -use clap::Parser; -use dbn::decode::{DbnDecoder as Decoder, DecodeRecordRef}; -use dbn::enums::{Action, rtype, Side}; -use dbn::record::MboMsg; -use dbn::Record; - -// Use your specific high-level API modules -use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; -use roda_state::{RodaEngine, Window}; - -// ============================================================================== -// 1. DATA CONTRACT -// ============================================================================== - -/// The normalized "Top of Book" update. -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct BboUpdate { - pub ts: u64, - pub instrument_id: u32, - pub _pad0: u32, - pub bid_px: i64, - pub ask_px: i64, - pub bid_sz: u32, - pub ask_sz: u32, -} - -/// The output signal generated by the strategy. -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct TradeSignal { - pub ts: u64, - pub instrument_id: u32, - pub signal_strength: f32, // > 0.8 is Buy - pub is_buy: i32, // 1 = Buy, -1 = Sell - pub _pad0: i32, -} - -struct OrderInfo { - price: i64, - size: u32, - is_bid: bool, -} - -#[derive(Parser)] -struct Args { - #[arg(long)] - file: PathBuf, -} - -// ============================================================================== -// 2. THE PIPELINE IMPLEMENTATION -// ============================================================================== - -fn main() -> Result<(), Box> { - let args = Args::parse(); - println!("[System] Booting Roda Showcase (Declarative Mode)..."); - - let mut engine = RodaEngine::new(); - - // A. RESOURCES - // -------------------------------------------------------------------------- - // 1. Market Data Store (The "River" of BBO updates) - let mut market_store = engine.store::(StoreOptions { - name: "market_data", - size: 10_000_000, - in_memory: true, - }); - let market_reader = market_store.reader(); - - // We create an index to look up BBO by Instrument ID - // We will use this for partitioning later - // let _market_index = market_store.direct_index::(); - - // 2. Signal Store (The Output of our Strategy) - let mut signal_store = engine.store::(StoreOptions { - name: "signals", - size: 10_000, - in_memory: true, - }); - // We don't read signals in this example, but we could - let _signal_reader = signal_store.reader(); - - // B. PIPELINES - // -------------------------------------------------------------------------- - // Strategy Pipeline: Look at the last 1 tick (Current) to calculate imbalance - // In a real strategy, we might look at a window of 5. - let strategy_pipeline: Window = Window::new(); - - // C. WORKERS - // -------------------------------------------------------------------------- - - // --- WORKER 1: STRATEGY ENGINE --- - // This runs in a background thread managed by RodaEngine - engine.run_worker(move || { - // 1. Drive the reader - market_reader.next(); - - // 2. Execute Pipeline - strategy_pipeline - .from(&market_reader) - .to(&mut signal_store) - .reduce(1, |window| { - // Window size 1 means window[0] is the current item - let bbo = &window[0]; - - // Logic: Calculate Book Imbalance - let bid_vol = bbo.bid_sz as f32; - let ask_vol = bbo.ask_sz as f32; - let total_vol = bid_vol + ask_vol; - - if total_vol > 0.0 { - let imbalance = (bid_vol - ask_vol) / total_vol; - - // Signal Logic: Strong Buy Imbalance (> 0.8) - if imbalance > 0.8 { - return Some(TradeSignal { - ts: bbo.ts, - instrument_id: bbo.instrument_id, - signal_strength: imbalance, - is_buy: 1, - _pad0: 0, - }); - } - } - None - }); - }); - - // 3. Partitioning: we can use direct index to partition by symbol - let market_index = market_store.direct_index::(); - engine.run_worker(move || { - market_index.compute(|bbo| bbo.instrument_id); - }); - - // --- WORKER 2: FEED HANDLER (The Data Source) --- - // Since this reads from a File (Zstd) and not a Roda Store, - // we run it as the "Driver" on the main thread (or a separate spawn). - // It acts as the Producer for 'market_store'. - - println!("[Writer] Starting Feed Handler for {:?}...", args.file); - let start = Instant::now(); - let mut count = 0u64; - - // 1. Setup Decoder - let mut decoder = Decoder::from_zstd_file(&args.file)?; - - // 3. Local State (Order Book Reconstruction) - let mut books = HashMap::>::new(); - let mut last_bbos = HashMap::::new(); // instrument_id -> (Bid, Ask) - - // 4. Hot Loop - while let Some(record) = decoder.decode_record_ref()? { - if record.header().rtype != rtype::MBO { - continue; - } - - let msg = record.get::().unwrap(); - let instrument_id = msg.hd.instrument_id; - - let book = books.entry(instrument_id).or_default(); - let last_bbo = last_bbos.entry(instrument_id).or_insert((0i64, i64::MAX)); - - let action = Action::try_from(msg.action as u8).unwrap_or(Action::None); - let side = Side::try_from(msg.side as u8).unwrap_or(Side::None); - - // Update Local Book - let mut changed = false; - match action { - Action::Add => { - book.insert( - msg.order_id, - OrderInfo { - price: msg.price, - size: msg.size, - is_bid: side == Side::Bid, - }, - ); - changed = true; - } - Action::Cancel | Action::Fill => { - book.remove(&msg.order_id); - changed = true; - } - Action::Modify => { - if let Some(o) = book.get_mut(&msg.order_id) { - o.price = msg.price; - o.size = msg.size; - changed = true; - } - } - _ => {} - } - - // Compute BBO and Push to Roda Store - if changed { - let (bid, ask, b_sz, a_sz) = compute_bbo(book); - - if bid != last_bbo.0 || ask != last_bbo.1 { - let update = BboUpdate { - ts: msg.hd.ts_event, - instrument_id, - bid_px: bid, - ask_px: ask, - bid_sz: b_sz, - ask_sz: a_sz, - _pad0: 0, - }; - market_store.push(update); - *last_bbo = (bid, ask); - count += 1; - } - } - } - - let duration = start.elapsed(); - println!( - "[Writer] Finished! Pushed {} updates in {:?}", - count, duration - ); - - Ok(()) -} - -fn compute_bbo(book: &HashMap) -> (i64, i64, u32, u32) { - let mut best_bid = 0; - let mut bid_sz = 0; - let mut best_ask = i64::MAX; - let mut ask_sz = 0; - - for info in book.values() { - if info.is_bid { - if info.price > best_bid { - best_bid = info.price; - bid_sz = info.size; - } else if info.price == best_bid { - bid_sz += info.size; - } - } else { - if info.price < best_ask { - best_ask = info.price; - ask_sz = info.size; - } else if info.price == best_ask { - ask_sz += info.size; - } - } - } - - if best_ask == i64::MAX { - best_ask = 0; - } - - (best_bid, best_ask, bid_sz, ask_sz) -} \ No newline at end of file diff --git a/examples/databento_replay/README.md b/examples/databento_replay/README.md new file mode 100644 index 0000000..b130ecc --- /dev/null +++ b/examples/databento_replay/README.md @@ -0,0 +1,38 @@ +# Liquidity Monitor + +This example demonstrates a market data replay system using the Roda engine. It processes raw Market-By-Order (MBO) data to perform real-time liquidity analysis. + +## Overview + +The "Liquidity Monitor" goes beyond simple price tracking. It focuses on three main objectives: + +### 1. Reconstruct the Aggregate Book (Level 2) +Convert the raw stream of individual orders (MBO) into a consolidated map of **Price → Total Volume**. +* **Why useful?** This is what exchanges actually sell as "Level 2 Data." You are building it from scratch from the most granular data available. + +### 2. Calculate "Order Book Imbalance" +Measure the ratio of buy vs. sell pressure in the book. + +**Formula:** +$$Imbalance = \frac{Bid\ Vol - Ask\ Vol}{Bid\ Vol + Ask\ Vol}$$ + +* **Why useful?** This is a primary signal for predicting short-term price movement. A positive value indicates buy pressure. + +### 3. Detect "Liquidity Voids" +Monitor the book for sudden drops in available volume. +* **Condition:** If the total volume at the Top 5 levels drops by 50% in < 1ms, trigger an alert. +* **Why useful?** This predicts "Flash Crashes" and high-volatility events where price might slip significantly. + +## Usage + +To run the replay, provide the path to a Databento MBO file: + +```bash +cargo run --example databento_replay -- --file path/to/your/data.dbn +``` + +## Architecture + +- `main.rs`: Sets up the Roda engine, market data store, and the processing pipeline. +- `importer.rs`: Handles reading and decoding the Databento MBO file. +- `light_mbo_entry.rs`: Defines the compact data structure for storing MBO records in the Roda store. diff --git a/examples/databento_replay/book_level_entry.rs b/examples/databento_replay/book_level_entry.rs new file mode 100644 index 0000000..4f52ee5 --- /dev/null +++ b/examples/databento_replay/book_level_entry.rs @@ -0,0 +1,11 @@ +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelEntry { + pub symbol: u64, // or instrument_id + pub price: i64, + pub volume: u64, // "size" is also common + pub side: u8, // 0=Bid, 1=Ask + pub _pad: [u8; 7], +} diff --git a/examples/databento_replay/importer.rs b/examples/databento_replay/importer.rs new file mode 100644 index 0000000..f903a84 --- /dev/null +++ b/examples/databento_replay/importer.rs @@ -0,0 +1,46 @@ +use std::error::Error; +use std::path::PathBuf; +use std::time::Instant; + +use dbn::Record; +use dbn::decode::{DbnDecoder as Decoder, DecodeRecordRef}; +use dbn::enums::rtype; +use dbn::record::MboMsg; +use spdlog::prelude::*; + +// Use your specific high-level API modules +use crate::light_mbo_entry::LightMboEntry; +use roda_state::components::Appendable; +use roda_state::measure::latency_measurer::LatencyMeasurer; + +pub fn import_mbo_file( + file: PathBuf, + market_store: &mut impl Appendable, +) -> Result<(), Box> { + info!("[Writer] Starting Feed Handler for {:?}...", file); + let mut latency_measurer = LatencyMeasurer::new(1); + let start = Instant::now(); + let mut count = 0u64; + + // 1. Setup Decoder + let mut decoder = Decoder::from_zstd_file(&file)?; + + // 3. Hot Loop + while let Some(record) = decoder.decode_record_ref()? { + let _latency_guard = latency_measurer.measure_with_guard(); + if record.header().rtype == rtype::MBO { + let msg = record.get::().unwrap(); + market_store.append(LightMboEntry::from(msg)); + count += 1; + } + } + + let duration = start.elapsed(); + info!( + "[Writer] Finished! Pushed {} updates in {:?}", + count, duration + ); + // info!("[Writer] Store size: {}", market_store.size()); + info!("[Latency/Import]{}", latency_measurer.format_stats()); + Ok(()) +} diff --git a/examples/databento_replay/light_mbo_entry.rs b/examples/databento_replay/light_mbo_entry.rs new file mode 100644 index 0000000..6b26ac2 --- /dev/null +++ b/examples/databento_replay/light_mbo_entry.rs @@ -0,0 +1,57 @@ +use bytemuck::{Pod, Zeroable}; +use dbn::record::MboMsg; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct LightMboEntry { + /// 1. The Event Timestamp (UNIX nanos). + /// Essential for detecting "Flash Crash" speed or latency. + pub ts: u64, + + /// 2. The Unique Order ID. + /// Critical for linking a 'Cancel' message back to the original 'Add'. + pub order_id: u64, + + /// 3. The Price. + /// Signed integer (fixed precision, usually 1e-9). + pub price: i64, + + /// 4. The Size (Quantity). + pub size: u32, + + // --- PACKING SECTION (32-Bit Alignment) --- + /// 5. The Instrument ID (from Header). + /// Needed if your store contains multiple symbols (e.g., MSFT and AAPL). + pub instrument_id: u32, + + // --- PACKING SECTION (8-Bit Alignment) --- + /// 6. Action (Add='A', Cancel='C', Modify='M', etc.) + /// We store as u8 to match the raw byte. + pub action: u8, + + /// 7. Side (Bid='B', Ask='A'). + pub side: u8, + + /// 8. Explicit Padding. + /// We have used: 8+8+8+4+4+1+1 = 34 bytes. + /// The next multiple of 8 (for u64 alignment) is 40. + /// So we need 6 bytes of padding. + pub _pad: [u8; 6], +} + +impl From<&MboMsg> for LightMboEntry { + fn from(msg: &MboMsg) -> Self { + Self { + ts: msg.hd.ts_event, + order_id: msg.order_id, + price: msg.price, + size: msg.size, + instrument_id: msg.hd.instrument_id, + // Cast char (i8) to u8 directly. + // 'A' is 65, 'B' is 66, etc. + action: msg.action as u8, + side: msg.side as u8, + _pad: [0; 6], + } + } +} diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs new file mode 100644 index 0000000..1afcdcc --- /dev/null +++ b/examples/databento_replay/main.rs @@ -0,0 +1,170 @@ +use clap::Parser; +use spdlog::kv::Key; +use spdlog::prelude::*; +use std::path::PathBuf; +use std::thread::sleep; +use std::time::Duration; +// Use your specific high-level API modules +use roda_state::JournalStoreOptions; +use roda_state::components::{Appendable, IterativeReadable}; +use roda_state::{Aggregator, DirectIndex, RodaEngine}; + +mod book_level_entry; +mod importer; +mod light_mbo_entry; + +use crate::book_level_entry::BookLevelEntry; +use importer::import_mbo_file; +use light_mbo_entry::LightMboEntry; + +#[derive(Parser)] +struct Args { + #[arg(long)] + file: PathBuf, +} + +// ============================================================================== +// 2. THE PIPELINE IMPLEMENTATION +// ============================================================================== + +fn main() -> Result<(), Box> { + let args = Args::parse(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); + info!("[System] Booting Roda Data Bento Replay..."); + + // 1. Market Data Store (The "River" of MBO updates) + let mut market_store = engine.new_journal_store::(JournalStoreOptions { + name: "market_data", + size: 30000000 * size_of::(), + in_memory: true, + }); + + let mut market_book_store = engine.new_journal_store::(JournalStoreOptions { + name: "market_book", + size: 30000000 * size_of::(), + in_memory: true, + }); + + let market_book_store_reader = market_book_store.reader(); + let final_reader = market_book_store.reader(); + let market_book_store_index = market_book_store.direct_index(); + let market_book_store_index_reader = market_book_store_index.reader(); + let market_book_store_index_reader2 = market_book_store_index.reader(); + + let mut market_book_aggregator: Aggregator = + Aggregator::new(); + + let market_reader = market_store.reader(); + + // Prepare Book Level + engine.run_worker(move || { + if market_reader.next() { + market_book_aggregator + .from(&market_reader) + .to(&mut market_book_store) + .partition_by(|entry| (entry.instrument_id, entry.side, entry.price)) + .reduce(|_, entry, book, keep| { + book.side = entry.side; + book.price = entry.price; + book.symbol = entry.instrument_id as u64; + match entry.action { + // Add: New liquidity + b'A' => { + book.volume = book.volume.saturating_add(entry.size as u64); + } + // Cancel, Fill, or Trade: Remove liquidity + // Note: Check your feed docs. Usually 'F' is the one that reduces the book. + b'C' | b'F' | b'T' => { + book.volume = book.volume.saturating_sub(entry.size as u64); + } + // Clear Book: Wipe level + b'R' => { + book.volume = 0; + } + // Modify: This is tricky without order-id tracking. + // For a showcase, if you don't have 'old_size', ignoring it is + // safer than guessing, but your book will slowly drift. + b'M' | b'N' => {} + + _ => {} + } + + if book.volume == 0 { + market_book_store_index.delete(&(book.side, book.price)); + *keep = false; + } + }); + market_book_store_index.compute(|entry| (entry.side, entry.price)); + } + }); + + // Prepare Weighted L5 and OB Imbalance + engine.run_worker(move || { + if market_book_store_reader.next() { + // 1. Get Bids: Everything <= (b'B', MAX) + // We go REV to get Highest Price first + // 1. Get Bids (Highest Bids first) + // Range: From (b'B', 0) to (b'B', i64::MAX) + let bids = market_book_store_index_reader + .range(( + std::ops::Bound::Included(&(66, 0)), + std::ops::Bound::Included(&(66, i64::MAX)), + )) + .rev() // Start at highest price + .take(5); + + // --- 2. GET ASKS (Lowest prices first) --- + // Range: From (b'A', 0) to (b'A', i64::MAX) + let asks = market_book_store_index_reader + .range(( + std::ops::Bound::Included((65, 0)), + std::ops::Bound::Included((65, i64::MAX)), + )) + .take(5); // Already starts at lowest price + + let mut bid_vol = 0.0; + let mut ask_vol = 0.0; + + // 3. Sum Bids + for (i, (_key, state)) in bids.enumerate() { + let weight = 1.0 - (i as f64 * 0.2); + bid_vol += state.volume as f64 * weight; + } + + // 4. Sum Asks + for (i, (_key, state)) in asks.enumerate() { + let weight = 1.0 - (i as f64 * 0.2); + ask_vol += state.volume as f64 * weight; + } + + // 5. Compute Final Imbalance + let total_vol = bid_vol + ask_vol; + if total_vol > 0.0 { + let imbalance = (bid_vol - ask_vol) / total_vol; + if imbalance > 0.95 { + println!( + "Imbalance: {:.2} (B: {:.0}, A: {:.0})", + imbalance, bid_vol, ask_vol + ); + println!("{:?}", market_book_store_index_reader.size()); + } + } + } + }); + + import_mbo_file(args.file, &mut market_store)?; + + info!("[System] Waiting for all workers to finish..."); + + engine.await_idle(Duration::from_mins(100)); + + info!( + "[System] Book Size: {}", + market_book_store_index_reader2.size() + ); + + info!("[System] Done!"); + + Ok(()) +} diff --git a/examples/sensor_test.rs b/examples/sensor_test/main.rs similarity index 92% rename from examples/sensor_test.rs rename to examples/sensor_test/main.rs index 51b178a..8a8dc06 100644 --- a/examples/sensor_test.rs +++ b/examples/sensor_test/main.rs @@ -1,5 +1,6 @@ use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; +use roda_state::JournalStoreOptions; +use roda_state::components::{Appendable, IterativeReadable}; use roda_state::{Aggregator, RodaEngine, Window}; use std::thread; use std::time::Duration; @@ -58,21 +59,21 @@ fn main() { // 1. SETUP STORES // Stores are bounded, pre-allocated buffers for your state. - let mut reading_store = engine.store::(StoreOptions { + let mut reading_store = engine.new_journal_store::(JournalStoreOptions { name: "readings", size: 1000, in_memory: true, }); let reading_reader = reading_store.reader(); - let mut summary_store = engine.store::(StoreOptions { + let mut summary_store = engine.new_journal_store::(JournalStoreOptions { name: "summaries", size: 100, in_memory: true, }); let summary_reader = summary_store.reader(); - let mut alert_store = engine.store::(StoreOptions { + let mut alert_store = engine.new_journal_store::(JournalStoreOptions { name: "alerts", size: 100, in_memory: true, @@ -98,7 +99,7 @@ fn main() { sensor_id: r.sensor_id, timestamp: r.timestamp / 100_000, }) - .reduce(|idx, r, s| { + .reduce(|idx, r, s, _keep| { if idx == 0 { *s = Summary { sensor_id: r.sensor_id, @@ -161,7 +162,7 @@ fn main() { ]; for r in readings { - reading_store.push(r); + reading_store.append(r); } // Give workers a moment to process diff --git a/src/aggregator.rs b/src/aggregator.rs index 262c0c3..718c1df 100644 --- a/src/aggregator.rs +++ b/src/aggregator.rs @@ -1,4 +1,4 @@ -use crate::components::{Store, StoreReader}; +use crate::components::{Appendable, IterativeReadable}; use bytemuck::Pod; use std::cell::{Cell, RefCell}; use std::collections::HashMap; @@ -36,7 +36,7 @@ impl Default impl Aggregator { - pub fn from<'a, R: StoreReader>( + pub fn from<'a, R: IterativeReadable>( &'a self, reader: &'a R, ) -> AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> { @@ -48,10 +48,6 @@ impl _partition_key: PhantomData, } } - - pub fn pipe(_source: impl Store, _target: impl Store) -> Self { - Self::new() - } } pub struct AggregatorFrom< @@ -59,7 +55,7 @@ pub struct AggregatorFrom< InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, - R: StoreReader, + R: IterativeReadable, > { aggregator: &'a Aggregator, reader: &'a R, @@ -68,10 +64,10 @@ pub struct AggregatorFrom< _partition_key: PhantomData, } -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, R: StoreReader> +impl<'a, InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, R: IterativeReadable> AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> { - pub fn to<'b, S: Store>( + pub fn to<'b, S: Appendable>( self, store: &'b mut S, ) -> AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> { @@ -92,8 +88,8 @@ pub struct AggregatorTo< InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, - R: StoreReader, - S: Store, + R: IterativeReadable, + S: Appendable, > { aggregator: &'a Aggregator, reader: &'a R, @@ -109,8 +105,8 @@ impl< InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, - R: StoreReader, - S: Store, + R: IterativeReadable, + S: Appendable, > AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> { pub fn partition_by( @@ -157,28 +153,33 @@ where InValue: Pod + Send, OutValue: Pod + Send, PartitionKey: Hash + Eq + Send, - R: StoreReader, - S: Store, + R: IterativeReadable, + S: Appendable, F: Fn(&InValue) -> PartitionKey, { - pub fn reduce(self, mut update_fn: impl FnMut(u64, &InValue, &mut OutValue)) { + pub fn reduce(self, mut update_fn: impl FnMut(u64, &InValue, &mut OutValue, &mut bool)) { let mut states = self.aggregator.states.borrow_mut(); - let mut last_index = self.aggregator.last_index.get(); + let mut last_idx = self.aggregator.last_index.get(); let current_index = self.reader.get_index(); - if current_index > last_index { + if current_index > last_idx { if let Some(val) = self.reader.get() { let key = (self.key_fn)(&val); let (index, mut state) = states.get(&key).cloned().unwrap_or((0, OutValue::zeroed())); - update_fn(index, &val, &mut state); - self.store.push(state); + let mut keep = true; + update_fn(index, &val, &mut state, &mut keep); + if keep { + self.store.append(state); - states.insert(key, (index + 1, state)); + states.insert(key, (index + 1, state)); + } else { + states.remove(&key); + } } - last_index = current_index; - self.aggregator.last_index.set(last_index); + last_idx = current_index; + self.aggregator.last_index.set(last_idx); } } } diff --git a/src/components.rs b/src/components.rs index 9f0049f..7055323 100644 --- a/src/components.rs +++ b/src/components.rs @@ -1,55 +1,18 @@ -use crate::index::DirectIndex; use bytemuck::Pod; -pub struct StoreOptions { - pub name: &'static str, - pub size: usize, - pub in_memory: bool, +/// For structures where we append data to the end (Journals, Logs). +pub trait Appendable { + fn append(&mut self, state: State); } -pub trait Engine { - fn run_worker(&mut self, runnable: impl FnMut() + Send + 'static); - fn store(&self, options: StoreOptions) -> impl Store + 'static; +/// For structures where we update a specific "address" or "slot" (State Maps, Arrays). +pub trait Settable { + fn set(&mut self, at: usize, state: State); } -pub trait Store: Send { - type Reader: StoreReader; - fn push(&mut self, state: State); - fn reader(&self) -> Self::Reader; - fn direct_index(&self) -> DirectIndex; -} - -pub trait StoreReader: Send { +/// The base for anything that can be read. +pub trait IterativeReadable { fn next(&self) -> bool; - fn get_index(&self) -> usize; - - fn with(&self, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn get(&self) -> Option; - fn get_at(&self, at: usize) -> Option; - fn get_last(&self) -> Option; - fn get_window(&self, at: usize) -> Option<&[State]> - where - Self: Sized; -} - -pub trait Index { - type Reader: IndexReader; - fn compute(&self, key_fn: impl FnOnce(&State) -> Key); - fn reader(&self) -> Self::Reader; - fn iter(&self) -> impl Iterator + '_; -} - -pub trait IndexReader { - fn with(&self, key: &Key, handler: impl FnOnce(&State) -> R) -> Option; - fn get(&self, key: &Key) -> Option; - fn iter(&self) -> impl Iterator + '_; + fn get_index(&self) -> usize; } diff --git a/src/direct_index.rs b/src/direct_index.rs new file mode 100644 index 0000000..923b124 --- /dev/null +++ b/src/direct_index.rs @@ -0,0 +1,123 @@ +use crate::components::IterativeReadable; +use bytemuck::Pod; +use crossbeam_skiplist::SkipMap; +use std::ops::Bound; +use std::sync::Arc; + +pub struct DirectIndex< + Key: Clone + Ord + Send, + State: Pod + Send, + StoreReader: IterativeReadable + 'static, +> { + pub(crate) map: Arc>, + pub reader: StoreReader, +} + +pub struct DirectIndexReader { + pub(crate) map: Arc>, +} + +impl DirectIndex +where + Key: Clone + Ord + Send + 'static, + State: Pod + Send, + StoreReader: IterativeReadable + 'static, +{ + pub fn compute(&self, key_fn: impl FnOnce(&State) -> Key) { + if self.reader.next() + && let Some(state) = self.reader.get() + { + let key = key_fn(&state); + self.map.insert(key.clone(), state); + } + } + pub fn delete(&self, key: &Key) { + self.map.remove(key); + } + + pub fn reader(&self) -> DirectIndexReader { + DirectIndexReader { + map: self.map.clone(), + } + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.map + .iter() + .map(|entry| (entry.key().clone(), *entry.value())) + } + + pub fn size(&self) -> usize { + self.map.len() + } +} + +impl DirectIndexReader +where + Key: Clone + Ord + Send + 'static, // 'static or appropriate lifetime for the Map + State: Pod + Send, +{ + pub fn with(&self, key: &Key, handler: impl FnOnce(&State) -> R) -> Option { + self.map.get(key).map(|entry| handler(entry.value())) + } + + pub fn get(&self, key: &Key) -> Option { + self.map.get(key).map(|entry| *entry.value()) + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.map + .iter() + .map(|entry| (entry.key().clone(), *entry.value())) + } + + // --- New Navigation Implementations --- + + /// Replicates lower_bound: starts at the first key >= provided key. + pub fn find_ge<'a>( + &'a self, + key: &'a Key, + ) -> impl DoubleEndedIterator + 'a { + self.map + .range((Bound::Included(key), Bound::Unbounded)) + .map(move |entry| (entry.key().clone(), *entry.value())) + } + + /// Replicates upper_bound: starts at the first key <= provided key, + /// but usually used with .rev() to get the Best Bid. + pub fn find_le<'a>( + &'a self, + key: &'a Key, + ) -> impl DoubleEndedIterator + 'a { + self.map + .range((Bound::Unbounded, Bound::Included(key))) + .map(move |entry| (entry.key().clone(), *entry.value())) + } + + /// Standard range scan (e.g., for getting a specific slice of the book). + pub fn range<'a, R>(&'a self, range: R) -> impl DoubleEndedIterator + 'a + where + R: std::ops::RangeBounds + 'a, + { + self.map + .range(range) + .map(move |entry| (entry.key().clone(), *entry.value())) + } + + /// Efficiency helper to jump straight to the Best Bid or Best Ask. + pub fn first_after(&self, key: &Key) -> Option<(Key, State)> { + self.map + .lower_bound(Bound::Included(key)) + .map(|e| (e.key().clone(), *e.value())) + } + + pub fn last_before(&self, key: &Key) -> Option<(Key, State)> { + // upper_bound finds first > key, then prev() finds highest <= key. + let entry = self.map.upper_bound(Bound::Included(key))?; + entry.prev().map(|e| (e.key().clone(), *e.value())) + } + + pub fn size(&self) -> usize { + self.map.len() + } +} diff --git a/src/engine.rs b/src/engine.rs index 9b685d6..f259355 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,21 +1,48 @@ -use crate::components::{Engine, Store, StoreOptions}; +use crate::journal_store::{JournalStore, JournalStoreOptions}; use crate::measure::latency_measurer::LatencyMeasurer; -use crate::store::StoreJournal; +use crate::op_counter::OpCounter; use bytemuck::Pod; +use spdlog::info; use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::thread; -use std::time::Instant; +use std::thread::sleep; +use std::time::{Duration, Instant}; pub struct RodaEngine { root_path: &'static str, running: Arc, enable_latency_stats: bool, worker_handlers: Vec>, + op_counter: Arc, } -impl Engine for RodaEngine { - fn run_worker(&mut self, mut runnable: impl FnMut() + Send + 'static) { +impl RodaEngine { + pub fn new() -> Self { + Self { + root_path: "data", + running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], + op_counter: OpCounter::new(), + } + } + + pub fn new_with_root_path(root_path: &'static str) -> Self { + Self { + root_path, + running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], + op_counter: OpCounter::new(), + } + } + + pub fn enable_latency_stats(&mut self, enable: bool) { + self.enable_latency_stats = enable; + } + + pub fn run_worker(&mut self, mut runnable: impl FnMut() + Send + 'static) { let worker_id = self.worker_handlers.len(); let running = self.running.clone(); let enable_latency_stats = self.enable_latency_stats; @@ -27,7 +54,7 @@ impl Engine for RodaEngine { runnable(); measurer.measure(instant.elapsed()); } - println!("[Worker:{}]{}", worker_id, measurer.format_stats()); + info!("[Latency/Worker:{}]{}", worker_id, measurer.format_stats()); } else { while running.load(std::sync::atomic::Ordering::Relaxed) { runnable(); @@ -37,33 +64,34 @@ impl Engine for RodaEngine { self.worker_handlers.push(handler); } - fn store(&self, options: StoreOptions) -> impl Store + 'static { - StoreJournal::new(self.root_path, options, size_of::()) - } -} - -impl RodaEngine { - pub fn new() -> Self { - Self { - root_path: "data", - running: Arc::new(AtomicBool::new(true)), - enable_latency_stats: false, - worker_handlers: vec![], - } + pub fn new_journal_store( + &self, + options: JournalStoreOptions, + ) -> JournalStore { + JournalStore::new( + self.root_path, + self.op_counter.clone(), + options, + size_of::(), + ) } - pub fn new_with_root_path(root_path: &'static str) -> Self { - Self { - root_path, - running: Arc::new(AtomicBool::new(true)), - enable_latency_stats: false, - worker_handlers: vec![], + pub fn await_idle(&self, timeout: Duration) { + let start = Instant::now(); + let mut last_op_count = self.op_counter.total_op_count(); + loop { + sleep(Duration::from_millis(100)); + let new_op_count = self.op_counter.total_op_count(); + if new_op_count == last_op_count { + break; + } + if start.elapsed() > timeout { + break; + } + println!("[OPC]{}", new_op_count); + last_op_count = new_op_count; } } - - pub fn enable_latency_stats(&mut self, enable: bool) { - self.enable_latency_stats = enable; - } } impl Default for RodaEngine { @@ -71,3 +99,13 @@ impl Default for RodaEngine { Self::new() } } + +impl Drop for RodaEngine { + fn drop(&mut self) { + self.running + .store(false, std::sync::atomic::Ordering::Relaxed); + for handler in self.worker_handlers.drain(..) { + handler.join().unwrap(); + } + } +} diff --git a/src/index.rs b/src/index.rs deleted file mode 100644 index a748c38..0000000 --- a/src/index.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::components::{Index, IndexReader, StoreReader}; -use bytemuck::Pod; -use crossbeam_skiplist::SkipMap; -use std::sync::Arc; - -pub struct DirectIndex> { - pub(crate) map: Arc>, - pub reader: Reader, -} - -pub struct DirectIndexReader { - pub(crate) map: Arc>, -} - -impl> Index for DirectIndex -where - Key: Pod + Ord + Send, - Value: Pod + Send, -{ - type Reader = DirectIndexReader; - fn compute(&self, key_fn: impl FnOnce(&Value) -> Key) { - if self.reader.next() - && let Some(value) = self.reader.get() - { - let key = key_fn(&value); - self.map.insert(key, value); - } - } - - fn reader(&self) -> DirectIndexReader { - DirectIndexReader { - map: self.map.clone(), - } - } - - fn iter(&self) -> impl Iterator + '_ { - self.map.iter().map(|entry| (*entry.key(), *entry.value())) - } -} - -impl IndexReader for DirectIndexReader -where - Key: Pod + Ord + Send, - Value: Pod + Send, -{ - fn with(&self, key: &Key, handler: impl FnOnce(&Value) -> R) -> Option { - self.map.get(key).map(|entry| handler(entry.value())) - } - - fn get(&self, key: &Key) -> Option { - self.map.get(key).map(|entry| *entry.value()) - } - - fn iter(&self) -> impl Iterator + '_ { - self.map.iter().map(|entry| (*entry.key(), *entry.value())) - } -} diff --git a/src/journal_store.rs b/src/journal_store.rs new file mode 100644 index 0000000..228150e --- /dev/null +++ b/src/journal_store.rs @@ -0,0 +1,185 @@ +use crate::components::{Appendable, IterativeReadable}; +use crate::op_counter::OpCounter; +use crate::storage::journal_mmap::JournalMmap; +use bytemuck::Pod; +use std::cell::Cell; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering::Relaxed; + +pub struct JournalStoreOptions { + pub name: &'static str, + pub size: usize, + pub in_memory: bool, +} + +pub struct JournalStore { + storage: JournalMmap, + op_counter: Arc, + _marker: std::marker::PhantomData, +} + +pub struct StoreJournalReader { + next_index: Cell, + storage: JournalMmap, + op_count: Arc, + _marker: std::marker::PhantomData, +} + +impl JournalStore { + pub fn new( + root_path: &'static str, + op_counter: Arc, + option: JournalStoreOptions, + state_size: usize, + ) -> Self { + let total_size = option.size * state_size; + let storage = if option.in_memory { + JournalMmap::new(None, total_size).unwrap() + } else { + let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); + if path.exists() { + JournalMmap::load(path).unwrap() + } else { + JournalMmap::new(Some(path), total_size).unwrap() + } + }; + + Self { + op_counter, + storage, + _marker: Default::default(), + } + } + + pub fn append(&mut self, state: State) { + let size = size_of::(); + let current_pos = self.storage.get_write_index(); + assert!( + current_pos + size <= self.storage.len(), + "Store is full. Capacity: {}, Current position: {}, State size: {}", + self.storage.len(), + current_pos, + size + ); + self.storage.append(&state); + } + + pub fn reader(&self) -> StoreJournalReader { + StoreJournalReader { + op_count: self.op_counter.new_counter(), + next_index: Cell::new(0), + storage: self.storage.reader(), + _marker: Default::default(), + } + } + + pub fn direct_index( + &self, + ) -> crate::direct_index::DirectIndex> { + crate::direct_index::DirectIndex { + map: std::sync::Arc::new(crossbeam_skiplist::SkipMap::new()), + reader: self.reader(), + } + } + + pub fn size(&self) -> usize { + self.storage.get_write_index() / size_of::() + } +} + +impl Appendable for JournalStore { + fn append(&mut self, state: State) { + self.append(state); + } +} + +impl StoreJournalReader { + pub fn next(&self) -> bool { + let index_to_read = self.next_index.get(); + let offset = index_to_read * size_of::(); + let write_index = self.storage.get_write_index(); + + if offset + size_of::() > write_index { + return false; + } + + self.next_index.set(index_to_read + 1); + self.op_count.fetch_add(1, Relaxed); + + true + } + + pub fn get_index(&self) -> usize { + self.next_index.get() + } + + pub fn with(&self, handler: impl FnOnce(&State) -> R) -> Option { + let next_index = self.next_index.get(); + if next_index == 0 { + return None; + } + let current_index = next_index - 1; + let offset = current_index * size_of::(); + Some(handler(self.storage.read(offset))) + } + + pub fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { + let offset = at * size_of::(); + let write_index = self.storage.get_write_index(); + if offset + size_of::() > write_index { + return None; + } + Some(handler(self.storage.read(offset))) + } + + pub fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option { + let write_index = self.storage.get_write_index(); + if write_index < size_of::() { + return None; + } + let offset = write_index - size_of::(); + Some(handler(self.storage.read(offset))) + } + + pub fn get(&self) -> Option { + self.with(|s| *s) + } + + pub fn get_at(&self, at: usize) -> Option { + self.with_at(at, |s| *s) + } + + pub fn get_last(&self) -> Option { + self.with_last(|s| *s) + } + + pub fn get_window(&self, at: usize) -> Option<&[State]> { + let offset = at * size_of::(); + let write_index = self.storage.get_write_index(); + if offset + size_of::() * N > write_index { + return None; + } + + Some(self.storage.read_window::(offset)) + } + + pub fn size(&self) -> usize { + self.storage.get_write_index() / size_of::() + } +} + +impl IterativeReadable for StoreJournalReader { + fn next(&self) -> bool { + self.next() + } + + fn get(&self) -> Option { + self.get() + } + + fn get_index(&self) -> usize { + self.get_index() + } +} diff --git a/src/lib.rs b/src/lib.rs index a23200e..27afdda 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,16 @@ pub mod aggregator; pub mod components; +pub mod direct_index; pub mod engine; -pub mod index; +pub mod journal_store; +pub mod measure; +mod op_counter; +mod slot_store; mod storage; -pub mod store; pub mod window; -pub mod measure; pub use crate::aggregator::Aggregator; +pub use crate::direct_index::{DirectIndex, DirectIndexReader}; pub use crate::engine::RodaEngine; -pub use crate::index::{DirectIndex, DirectIndexReader}; -pub use crate::store::{StoreJournal, StoreJournalReader}; +pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; pub use crate::window::Window; diff --git a/src/measure/latency_measurer.rs b/src/measure/latency_measurer.rs index af9cb4e..f879685 100644 --- a/src/measure/latency_measurer.rs +++ b/src/measure/latency_measurer.rs @@ -124,8 +124,7 @@ impl LatencyMeasurer { } format!( - "count={},\tmin={},\tmax={},\tmean={},\tp50={},\tp90={},\tp99={},\tp999={},\tp9999={}", - Self::format_count(stats.count), + "\tmin={},\tmax={},\tmean={},\tp50={},\tp90={},\tp99={},\tp999={},\tp9999={}", Self::format_duration(stats.min as f64), Self::format_duration(stats.max as f64), Self::format_duration(stats.mean), diff --git a/src/measure/mod.rs b/src/measure/mod.rs index 69c1e95..7dfaab5 100644 --- a/src/measure/mod.rs +++ b/src/measure/mod.rs @@ -1,2 +1,2 @@ pub mod latency_measurer; -pub use latency_measurer::{LatencyMeasurer, LatencyStats}; \ No newline at end of file +pub use latency_measurer::{LatencyMeasurer, LatencyStats}; diff --git a/src/op_counter.rs b/src/op_counter.rs new file mode 100644 index 0000000..e2ecb72 --- /dev/null +++ b/src/op_counter.rs @@ -0,0 +1,31 @@ +use std::sync::atomic::AtomicU64; +use std::sync::{Arc, Mutex}; + +pub struct OpCounter { + counters: Mutex>>, +} + +impl OpCounter { + pub fn new() -> Arc { + Arc::new(Self { + counters: Mutex::new(vec![]), + }) + } + + pub fn total_op_count(&self) -> u64 { + self.counters + .lock() + .unwrap() + .iter() + .map(|c| c.load(std::sync::atomic::Ordering::Relaxed)) + .sum() + } + + pub fn new_counter(&self) -> Arc { + let counter = Arc::new(AtomicU64::new(0)); + + self.counters.lock().unwrap().push(counter.clone()); + + counter + } +} diff --git a/src/slot_store.rs b/src/slot_store.rs new file mode 100644 index 0000000..5622008 --- /dev/null +++ b/src/slot_store.rs @@ -0,0 +1,90 @@ +use crate::components::Settable; +use crate::op_counter::OpCounter; +use crate::storage::slot_mmap::SlotMmap; +// Using the new SlotMmap logic +use bytemuck::Pod; +use std::path::PathBuf; +use std::sync::atomic::AtomicU64; +use std::sync::Arc; + +pub struct SlotStore { + storage: SlotMmap, + pub op_counter: Arc, + num_slots: usize, +} + +pub struct SlotStoreReader { + storage: SlotMmap, + op_count: Arc, +} + +pub struct SlotStoreOptions { + pub name: &'static str, + pub size: usize, + pub in_memory: bool, +} + +impl SlotStore { + pub fn new( + root_path: &'static str, + op_counter: Arc, + option: SlotStoreOptions, + ) -> Self { + let storage = if option.in_memory { + SlotMmap::new(None, option.size).unwrap() + } else { + let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); + if path.exists() { + SlotMmap::load(path).unwrap() + } else { + SlotMmap::new(Some(path), option.size).unwrap() + } + }; + + Self { + num_slots: option.size, + op_counter, + storage, + } + } + + /// Unique to SlotStore: Write to a specific slot instead of appending + pub fn update_at(&mut self, index: usize, state: State) { + self.storage.write(index, &state); + } + + pub fn reader(&self) -> SlotStoreReader { + SlotStoreReader { + op_count: self.op_counter.new_counter(), + storage: self.storage.reader(), + } + } + + pub fn size(&self) -> usize { + self.num_slots + } +} + +impl Settable for SlotStore { + fn set(&mut self, at: usize, state: State) { + self.update_at(at, state); + } +} + +impl SlotStoreReader { + /// Performs a consistent snapshot read with retry logic + pub fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { + // Using 100 retries to ensure we get a consistent L5 snapshot + self.storage + .read_snapshot_with_retry(at, 100) + .map(|state| handler(&state)) + } + + pub fn get_at(&self, at: usize) -> Option { + self.with_at(at, |s| *s) + } + + pub fn size(&self) -> usize { + self.storage.num_slots() + } +} diff --git a/src/storage/journal_mmap.rs b/src/storage/journal_mmap.rs new file mode 100644 index 0000000..07271bc --- /dev/null +++ b/src/storage/journal_mmap.rs @@ -0,0 +1,335 @@ +use bytemuck::{Pod, Zeroable}; +use memmap2::{MmapMut, MmapOptions}; +use std::fs::OpenOptions; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::AtomicUsize; + +pub(crate) struct JournalMmap { + _mmap: Arc, + ptr: *mut u8, + len: usize, + write_index: Arc, + read_only: bool, +} + +impl JournalMmap { + /// CREATE: Creates a brand new file, truncating any existing data. + pub(crate) fn new(path: Option, total_size: usize) -> Result { + let mut mmap = if let Some(p) = &path { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(p)?; + + file.set_len(total_size as u64)?; + unsafe { MmapOptions::new().map_mut(&file)? } + } else { + MmapOptions::new().len(total_size).map_anon()? + }; + + let ptr = mmap.as_mut_ptr(); + let len = mmap.len(); + Ok(Self { + _mmap: Arc::new(mmap), + ptr, + len, + write_index: Arc::new(Default::default()), + read_only: false, + }) + } + + /// OPEN: Loads an existing file and maps its current size. + pub(crate) fn load(path: PathBuf) -> Result { + let file = OpenOptions::new().read(true).write(true).open(&path)?; + + let mut mmap = unsafe { MmapOptions::new().map_mut(&file)? }; + + let ptr = mmap.as_mut_ptr(); + let len = mmap.len(); + Ok(Self { + _mmap: Arc::new(mmap), + ptr, + len, + write_index: Arc::new(Default::default()), + read_only: false, + }) + } + + // --- Bytemuck Methods --- + + /// 1. Read (Immutable) + /// + /// Casts bytes at offset to a reference of T. + pub(crate) fn read(&self, offset: usize) -> &T { + let end = offset + size_of::(); + assert!( + end <= self.len, + "Read crosses buffer boundary - alignment issue?" + ); + bytemuck::from_bytes(&self.slice()[offset..end]) + } + + pub(crate) fn read_window(&self, offset: usize) -> &[T] { + let end = offset + size_of::() * N; + assert!( + end <= self.len, + "Read crosses buffer boundary - alignment issue?" + ); + let bytes = &self.slice()[offset..end]; + + bytemuck::cast_slice(bytes) + } + + pub(crate) fn append(&mut self, state: &T) { + let current_pos = self.write_index.load(std::sync::atomic::Ordering::Relaxed); + let size = size_of::(); + let end = current_pos + size; + + let dest_slice = self.slice_mut(); + + // Check for boundary crossing + assert!( + end <= dest_slice.len(), + "Journal is full. Cannot append more data." + ); + + // Perform the write + dest_slice[current_pos..end].copy_from_slice(bytemuck::bytes_of(state)); + + self.write_index + .store(end, std::sync::atomic::Ordering::Release); + } + + fn slice(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } + + fn slice_mut(&mut self) -> &mut [u8] { + assert!(!self.read_only, "Cannot mutate read-only buffer"); + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + + pub(crate) fn get_write_index(&self) -> usize { + self.write_index.load(std::sync::atomic::Ordering::Acquire) + } + + pub(crate) fn len(&self) -> usize { + self.len + } + + pub(crate) fn reader(&self) -> JournalMmap { + JournalMmap { + _mmap: self._mmap.clone(), + ptr: self.ptr, + len: self.len, + write_index: self.write_index.clone(), + read_only: true, + } + } +} + +unsafe impl Send for JournalMmap {} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_new_anonymous() { + let size = 1024; + let journal = JournalMmap::new(None, size).unwrap(); + assert_eq!(journal.len(), size); + assert_eq!(journal.get_write_index(), 0); + } + + #[test] + fn test_append_and_read() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let val: u32 = 0x12345678; + journal.append(&val); + assert_eq!(journal.get_write_index(), 4); + + let read_val: u32 = *journal.read(0); + assert_eq!(read_val, val); + } + + #[test] + fn test_append_multiple() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + journal.append(&10u64); + journal.append(&20u64); + assert_eq!(journal.get_write_index(), 16); + + assert_eq!(*journal.read::(0), 10); + assert_eq!(*journal.read::(8), 20); + } + + #[test] + fn test_read_window() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + journal.append(&1u32); + journal.append(&2u32); + journal.append(&3u32); + + let window: &[u32] = journal.read_window::(0); + assert_eq!(window, &[1, 2, 3]); + } + + #[test] + #[should_panic(expected = "Journal is full. Cannot append more data.")] + fn test_boundary_append() { + let mut journal = JournalMmap::new(None, 4).unwrap(); + journal.append(&1u32); + journal.append(&1u8); // This should panic + } + + #[test] + #[should_panic(expected = "Read crosses buffer boundary")] + fn test_boundary_read() { + let journal = JournalMmap::new(None, 4).unwrap(); + let _: &u64 = journal.read(0); // Should panic + } + + #[test] + #[should_panic(expected = "Read crosses buffer boundary")] + fn test_boundary_read_window() { + let mut journal = JournalMmap::new(None, 8).unwrap(); + journal.append(&1u32); + journal.append(&2u32); + let _: &[u32] = journal.read_window::(0); // Should panic + } + + #[test] + fn test_reader_concurrency() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let reader = journal.reader(); + + let handle = thread::spawn(move || { + let mut last_idx = 0; + let mut count = 0; + while count < 10 { + let current_idx = reader.get_write_index(); + if current_idx > last_idx { + let val: u32 = *reader.read(last_idx); + assert_eq!(val, count); + last_idx = current_idx; + count += 1; + } + thread::yield_now(); + } + }); + + for i in 0..10u32 { + journal.append(&i); + thread::sleep(Duration::from_millis(1)); + } + + handle.join().unwrap(); + } + + #[test] + #[should_panic(expected = "Cannot mutate read-only buffer")] + fn test_reader_cannot_append() { + let journal = JournalMmap::new(None, 1024).unwrap(); + let mut reader = journal.reader(); + reader.append(&1u32); + } + + #[test] + fn test_file_backed() { + let path = std::env::temp_dir().join(format!("test_journal_{}.mmap", std::process::id())); + if path.exists() { + let _ = std::fs::remove_file(&path); + } + + { + let mut journal = JournalMmap::new(Some(path.clone()), 1024).unwrap(); + journal.append(&123u64); + } + + { + let journal = JournalMmap::load(path.clone()).unwrap(); + assert_eq!(journal.len(), 1024); + // write_index is not persisted + assert_eq!(journal.get_write_index(), 0); + assert_eq!(*journal.read::(0), 123u64); + } + + let _ = std::fs::remove_file(&path); + } + + #[repr(C)] + #[derive(Copy, Clone, Debug, Pod, Zeroable, PartialEq)] + struct LargeData { + a: u64, + b: u64, + c: u64, + d: u64, + } + + #[test] + fn test_reader_no_corruption() { + let mut journal = JournalMmap::new(None, 1024 * 1024).unwrap(); + let reader = journal.reader(); + + let handle = thread::spawn(move || { + let mut last_idx = 0; + while last_idx < 1000 * size_of::() { + let current_idx = reader.get_write_index(); + while last_idx < current_idx { + let data: LargeData = *reader.read(last_idx); + // Check if data is corrupted (a, b, c, d should all be equal to the same value) + assert_eq!( + data.a, data.b, + "Data corruption detected at index {}", + last_idx + ); + assert_eq!( + data.a, data.c, + "Data corruption detected at index {}", + last_idx + ); + assert_eq!( + data.a, data.d, + "Data corruption detected at index {}", + last_idx + ); + last_idx += size_of::(); + } + thread::yield_now(); + } + }); + + for i in 0..1000u64 { + let data = LargeData { + a: i, + b: i, + c: i, + d: i, + }; + journal.append(&data); + } + + handle.join().unwrap(); + } + + #[test] + fn test_immediate_read() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let val: u64 = 0xDEADBEEFCAFEBABE; + journal.append(&val); + + // Data should be immediately available at the expected offset + let read_val: u64 = *journal.read(0); + assert_eq!(read_val, val); + + let val2: u64 = 0x1122334455667788; + journal.append(&val2); + assert_eq!(*journal.read::(8), val2); + } +} diff --git a/src/storage/mmap_journal.rs b/src/storage/mmap_journal.rs deleted file mode 100644 index 2d17cc0..0000000 --- a/src/storage/mmap_journal.rs +++ /dev/null @@ -1,134 +0,0 @@ -use bytemuck::Pod; -use memmap2::{MmapMut, MmapOptions}; -use std::fs::OpenOptions; -use std::path::PathBuf; -use std::sync::Arc; -use std::sync::atomic::AtomicUsize; - -pub(crate) struct MmapJournal { - _mmap: Arc, - ptr: *mut u8, - len: usize, - write_index: Arc, - read_only: bool, -} - -impl MmapJournal { - /// CREATE: Creates a brand new file, truncating any existing data. - pub fn new(path: Option, total_size: usize) -> Result { - let mut mmap = if let Some(p) = &path { - let file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(p)?; - - file.set_len(total_size as u64)?; - unsafe { MmapOptions::new().map_mut(&file)? } - } else { - MmapOptions::new().len(total_size).map_anon()? - }; - - let ptr = mmap.as_mut_ptr(); - let len = mmap.len(); - Ok(Self { - _mmap: Arc::new(mmap), - ptr, - len, - write_index: Arc::new(Default::default()), - read_only: false, - }) - } - - /// OPEN: Loads an existing file and maps its current size. - pub fn load(path: PathBuf) -> Result { - let file = OpenOptions::new().read(true).write(true).open(&path)?; - - let mut mmap = unsafe { MmapOptions::new().map_mut(&file)? }; - - let ptr = mmap.as_mut_ptr(); - let len = mmap.len(); - Ok(Self { - _mmap: Arc::new(mmap), - ptr, - len, - write_index: Arc::new(Default::default()), - read_only: false, - }) - } - - // --- Bytemuck Methods --- - - /// 1. Read (Immutable) - /// - /// Casts bytes at offset to a reference of T. - pub fn read(&self, offset: usize) -> &T { - let end = offset + size_of::(); - assert!( - end <= self.len, - "Read crosses buffer boundary - alignment issue?" - ); - bytemuck::from_bytes(&self.slice()[offset..end]) - } - - pub(crate) fn read_window(&self, offset: usize) -> &[T] { - let end = offset + size_of::() * N; - assert!( - end <= self.len, - "Read crosses buffer boundary - alignment issue?" - ); - let bytes = &self.slice()[offset..end]; - - bytemuck::cast_slice(bytes) - } - - pub fn append(&mut self, state: &T) { - let current_pos = self.write_index.load(std::sync::atomic::Ordering::Relaxed); - let size = size_of::(); - let end = current_pos + size; - - let dest_slice = self.slice_mut(); - - // Check for boundary crossing - assert!( - end <= dest_slice.len(), - "Journal is full. Cannot append more data." - ); - - // Perform the write - dest_slice[current_pos..end].copy_from_slice(bytemuck::bytes_of(state)); - - self.write_index - .store(end, std::sync::atomic::Ordering::Release); - } - - fn slice(&self) -> &[u8] { - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } - - fn slice_mut(&mut self) -> &mut [u8] { - assert!(!self.read_only, "Cannot mutate read-only buffer"); - unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } - } - - pub(crate) fn get_write_index(&self) -> usize { - self.write_index.load(std::sync::atomic::Ordering::Acquire) - } - - pub(crate) fn len(&self) -> usize { - self.len - } - - pub(crate) fn reader(&self) -> MmapJournal { - MmapJournal { - _mmap: self._mmap.clone(), - ptr: self.ptr, - len: self.len, - write_index: self.write_index.clone(), - read_only: true, - } - } -} - -unsafe impl Send for MmapJournal {} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c83b146..075644a 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1 +1,2 @@ -pub mod mmap_journal; +pub mod journal_mmap; +pub mod slot_mmap; diff --git a/src/storage/slot_mmap.rs b/src/storage/slot_mmap.rs new file mode 100644 index 0000000..0700482 --- /dev/null +++ b/src/storage/slot_mmap.rs @@ -0,0 +1,348 @@ +use bytemuck::Pod; +use memmap2::{MmapMut, MmapOptions}; +use std::fs::OpenOptions; +use std::hint::spin_loop; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +pub struct SlotMmap { + _mmap: Arc, + ptr: *mut u8, + num_slots: usize, + slot_size: usize, + read_only: bool, + _marker: std::marker::PhantomData, +} + +impl SlotMmap { + pub fn new(path: Option, num_slots: usize) -> Result { + // We manually calculate the slot size. + // 8 bytes for version + T + padding to reach 64-byte alignment (cache line). + let slot_size = 8 + size_of::(); + + let mut mmap = if let Some(p) = path { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(p)?; + + file.set_len((num_slots * slot_size) as u64)?; + unsafe { MmapOptions::new().map_mut(&file)? } + } else { + MmapOptions::new().len(num_slots * slot_size).map_anon()? + }; + + Ok(Self { + ptr: mmap.as_mut_ptr(), + num_slots, + slot_size, + _mmap: Arc::new(mmap), + read_only: false, + _marker: std::marker::PhantomData, + }) + } + + /// OPEN: Loads an existing file and maps its current size. + pub fn load(path: PathBuf) -> Result { + let file = OpenOptions::new().read(true).write(true).open(&path)?; + + let mmap = unsafe { MmapOptions::new().map_mut(&file)? }; + + let len = mmap.len(); + + let slot_size = 8 + size_of::(); + let num_slots = len / slot_size; + Ok(Self { + ptr: mmap.as_ptr() as *mut u8, + num_slots, + slot_size, + _mmap: Arc::new(mmap), + read_only: false, + _marker: std::marker::PhantomData, + }) + } + + /// WRITER: Updates the specific slot by index. + pub fn write(&mut self, index: usize, state: &T) { + assert!(index < self.num_slots); + let offset = index * self.slot_size; + + unsafe { + let version_ptr = self.ptr.add(offset) as *const AtomicU64; + + // 1. Increment to ODD + (*version_ptr).fetch_add(1, Ordering::Relaxed); + std::sync::atomic::fence(Ordering::SeqCst); + + // 2. Copy data + let data_ptr = self.ptr.add(offset + 8); + std::ptr::copy_nonoverlapping( + bytemuck::bytes_of(state).as_ptr(), + data_ptr, + std::mem::size_of::(), + ); + + // 3. Increment to EVEN + std::sync::atomic::fence(Ordering::SeqCst); + (*version_ptr).fetch_add(1, Ordering::Relaxed); + } + } + + /// READER: Snapshot with spin-retry logic. + pub fn read_snapshot_with_retry(&self, index: usize, max_retries: usize) -> Option { + assert!(index < self.num_slots); + let offset = index * self.slot_size; + + unsafe { + let version_ptr = self.ptr.add(offset) as *const AtomicU64; + let data_ptr = self.ptr.add(offset + 8); + + for _ in 0..max_retries { + let v1 = (*version_ptr).load(Ordering::Relaxed); + std::sync::atomic::fence(Ordering::SeqCst); + + if v1 % 2 == 0 { + let mut data: T = std::mem::zeroed(); + std::ptr::copy_nonoverlapping( + data_ptr, + &mut data as *mut T as *mut u8, + std::mem::size_of::(), + ); + + std::sync::atomic::fence(Ordering::SeqCst); + let v2 = (*version_ptr).load(Ordering::Relaxed); + if v1 == v2 { + return Some(data); + } + } + spin_loop(); + } + } + None + } + + pub fn reader(&self) -> Self { + Self { + _mmap: self._mmap.clone(), + ptr: self.ptr, + num_slots: self.num_slots, + slot_size: self.slot_size, + read_only: true, + _marker: std::marker::PhantomData, + } + } + + pub fn num_slots(&self) -> usize { + self.num_slots + } +} + +unsafe impl Send for SlotMmap {} +unsafe impl Sync for SlotMmap {} + +#[cfg(test)] +mod tests { + use super::*; + use bytemuck::Zeroable; + use std::thread; + + #[repr(C)] + #[derive(Copy, Clone, Debug, Pod, Zeroable, PartialEq)] + struct TestData { + a: u64, + b: u64, + c: u64, + d: u64, + } + + #[test] + fn test_new_anonymous() { + let mut slot_mmap = SlotMmap::::new(None, 10).unwrap(); + assert_eq!(slot_mmap.num_slots(), 10); + + let data = TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }; + slot_mmap.write(0, &data); + + let read_data = slot_mmap.read_snapshot_with_retry(0, 10).unwrap(); + assert_eq!(data, read_data); + } + + #[test] + fn test_file_backed() { + let path = std::env::temp_dir().join(format!("test_slots_{}.mmap", std::process::id())); + if path.exists() { + let _ = std::fs::remove_file(&path); + } + + { + let mut slot_mmap = SlotMmap::::new(Some(path.clone()), 5).unwrap(); + slot_mmap.write( + 2, + &TestData { + a: 10, + b: 20, + c: 30, + d: 40, + }, + ); + } + + { + let slot_mmap = SlotMmap::::load(path.clone()).unwrap(); + assert_eq!(slot_mmap.num_slots(), 5); + let data = slot_mmap.read_snapshot_with_retry(2, 10).unwrap(); + assert_eq!( + TestData { + a: 10, + b: 20, + c: 30, + d: 40 + }, + data + ); + } + + let _ = std::fs::remove_file(&path); + } + + #[test] + #[should_panic] + fn test_boundary_write() { + let mut slot_mmap = SlotMmap::::new(None, 5).unwrap(); + slot_mmap.write( + 5, + &TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }, + ); + } + + #[test] + #[should_panic] + fn test_boundary_read() { + let slot_mmap = SlotMmap::::new(None, 5).unwrap(); + slot_mmap.read_snapshot_with_retry(5, 10); + } + + #[test] + fn test_multithreaded_consistency() { + let mut slot_mmap = SlotMmap::::new(None, 1).unwrap(); + let reader = slot_mmap.reader(); + + let writer_thread = thread::spawn(move || { + for i in 0..1_000_000 { + slot_mmap.write( + 0, + &TestData { + a: i, + b: i, + c: i, + d: i, + }, + ); + } + }); + + let reader_thread = thread::spawn(move || { + let mut success_count = 0; + for _ in 0..1_000_000 { + if let Some(data) = reader.read_snapshot_with_retry(0, 100) { + success_count += 1; + assert_eq!( + data.a, data.b, + "Data corruption detected! a: {}, b: {}", + data.a, data.b + ); + assert_eq!( + data.a, data.c, + "Data corruption detected! a: {}, b: {}", + data.a, data.c + ); + assert_eq!( + data.a, data.d, + "Data corruption detected! a: {}, b: {}", + data.a, data.d + ); + } + } + assert!(success_count > 0, "Reader thread made no successful reads"); + }); + + writer_thread.join().unwrap(); + reader_thread.join().unwrap(); + } + + #[test] + fn test_multiple_readers_consistency() { + let mut slot_mmap = SlotMmap::::new(None, 1).unwrap(); + + let mut readers = vec![]; + for _ in 0..4 { + readers.push(slot_mmap.reader()); + } + + let writer_thread = thread::spawn(move || { + for i in 0..1_000_000 { + slot_mmap.write( + 0, + &TestData { + a: i, + b: i, + c: i, + d: i, + }, + ); + } + }); + + let mut reader_threads = vec![]; + for reader in readers { + reader_threads.push(thread::spawn(move || { + let mut success_count = 0; + for _ in 0..1_000_000 { + if let Some(data) = reader.read_snapshot_with_retry(0, 100) { + success_count += 1; + assert_eq!(data.a, data.b); + assert_eq!(data.a, data.c); + assert_eq!(data.a, data.d); + } + } + assert!(success_count > 0, "Reader thread made no successful reads"); + })); + } + + writer_thread.join().unwrap(); + for t in reader_threads { + t.join().unwrap(); + } + } + + #[test] + fn test_reader_cloning() { + let mut slot_mmap = SlotMmap::::new(None, 10).unwrap(); + let reader1 = slot_mmap.reader(); + let reader2 = reader1.reader(); + + let data = TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }; + slot_mmap.write(5, &data); + + assert_eq!(reader1.read_snapshot_with_retry(5, 10), Some(data)); + assert_eq!(reader2.read_snapshot_with_retry(5, 10), Some(data)); + } +} diff --git a/src/store.rs b/src/store.rs deleted file mode 100644 index 9a7e162..0000000 --- a/src/store.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::components::{Store, StoreOptions, StoreReader}; -use crate::index::DirectIndex; -use crate::storage::mmap_journal::MmapJournal; -use bytemuck::Pod; -use std::cell::Cell; -use std::path::PathBuf; - -pub struct StoreJournal { - storage: MmapJournal, -} - -pub struct StoreJournalReader { - next_index: Cell, - storage: MmapJournal, -} - -impl StoreJournal { - pub fn new(root_path: &'static str, option: StoreOptions, state_size: usize) -> Self { - let total_size = option.size * state_size; - let storage = if option.in_memory { - MmapJournal::new(None, total_size).unwrap() - } else { - let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); - if path.exists() { - MmapJournal::load(path).unwrap() - } else { - MmapJournal::new(Some(path), total_size).unwrap() - } - }; - - Self { storage } - } -} - -impl Store for StoreJournal { - type Reader = StoreJournalReader; - - fn push(&mut self, state: State) { - let size = size_of::(); - let current_pos = self.storage.get_write_index(); - assert!( - current_pos + size <= self.storage.len(), - "Store is full. Capacity: {}, Current position: {}, State size: {}", - self.storage.len(), - current_pos, - size - ); - self.storage.append(&state); - } - - fn reader(&self) -> StoreJournalReader { - StoreJournalReader { - next_index: Cell::new(0), - storage: self.storage.reader(), - } - } - - fn direct_index(&self) -> DirectIndex { - DirectIndex { - map: std::sync::Arc::new(crossbeam_skiplist::SkipMap::new()), - reader: StoreJournalReader { - next_index: Cell::new(0), - storage: self.storage.reader(), - }, - } - } -} - -impl StoreReader for StoreJournalReader { - fn next(&self) -> bool { - let index_to_read = self.next_index.get(); - let offset = index_to_read * size_of::(); - let write_index = self.storage.get_write_index(); - - if offset + size_of::() > write_index { - return false; - } - - self.next_index.set(index_to_read + 1); - - true - } - - fn get_index(&self) -> usize { - self.next_index.get() - } - - fn with(&self, handler: impl FnOnce(&State) -> R) -> Option { - let next_index = self.next_index.get(); - if next_index == 0 { - return None; - } - let current_index = next_index - 1; - let offset = current_index * size_of::(); - Some(handler(self.storage.read(offset))) - } - - fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { - let offset = at * size_of::(); - let write_index = self.storage.get_write_index(); - if offset + size_of::() > write_index { - return None; - } - Some(handler(self.storage.read(offset))) - } - - fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option { - let write_index = self.storage.get_write_index(); - if write_index < size_of::() { - return None; - } - let offset = write_index - size_of::(); - Some(handler(self.storage.read(offset))) - } - - fn get(&self) -> Option { - self.with(|s| *s) - } - - fn get_at(&self, at: usize) -> Option { - self.with_at(at, |s| *s) - } - - fn get_last(&self) -> Option { - self.with_last(|s| *s) - } - - fn get_window(&self, at: usize) -> Option<&[State]> { - let offset = at * size_of::(); - let write_index = self.storage.get_write_index(); - if offset + size_of::() * N > write_index { - return None; - } - - Some(self.storage.read_window::(offset)) - } -} diff --git a/src/window.rs b/src/window.rs index 165aeea..c9a602c 100644 --- a/src/window.rs +++ b/src/window.rs @@ -1,4 +1,4 @@ -use crate::components::{Store, StoreReader}; +use crate::components::{Appendable, IterativeReadable}; use bytemuck::Pod; use std::cell::{Cell, RefCell}; use std::marker::PhantomData; @@ -28,7 +28,7 @@ impl Default for Window { } impl Window { - pub fn from<'a, R: StoreReader>( + pub fn from<'a, R: IterativeReadable>( &'a self, reader: &'a R, ) -> WindowFrom<'a, InValue, OutValue, R> { @@ -40,22 +40,26 @@ impl Window { } } - pub fn pipe(_source: impl StoreReader, _target: impl Store) -> Self { + pub fn pipe( + _source: impl IterativeReadable, + _target: impl Appendable, + ) -> Self { Self::new() } } -pub struct WindowFrom<'a, InValue: Pod + Send, OutValue: Pod + Send, R: StoreReader> { +pub struct WindowFrom<'a, InValue: Pod + Send, OutValue: Pod + Send, R: IterativeReadable> +{ window: &'a Window, reader: &'a R, _in: PhantomData, _out_v: PhantomData, } -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, R: StoreReader> +impl<'a, InValue: Pod + Send, OutValue: Pod + Send, R: IterativeReadable> WindowFrom<'a, InValue, OutValue, R> { - pub fn to<'b, S: Store>( + pub fn to<'b, S: Appendable>( self, store: &'b mut S, ) -> WindowTo<'a, 'b, InValue, OutValue, R, S> { @@ -74,8 +78,8 @@ pub struct WindowTo< 'b, InValue: Pod + Send, OutValue: Pod + Send, - R: StoreReader, - S: Store, + R: IterativeReadable, + S: Appendable, > { window: &'a Window, reader: &'a R, @@ -88,8 +92,8 @@ impl<'a, 'b, InValue, OutValue, R, S> WindowTo<'a, 'b, InValue, OutValue, R, S> where InValue: Pod + Send, OutValue: Pod + Send, - R: StoreReader, - S: Store, + R: IterativeReadable, + S: Appendable, { pub fn reduce( &mut self, @@ -110,7 +114,7 @@ where if buffer.len() == window_size as usize && let Some(out) = update_fn(&buffer) { - self.store.push(out); + self.store.append(out); } } last_index = current_index; diff --git a/tests/aggregator_tests.rs b/tests/aggregator_tests.rs index 406c1dd..73b450f 100644 --- a/tests/aggregator_tests.rs +++ b/tests/aggregator_tests.rs @@ -1,5 +1,6 @@ use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::JournalStoreOptions; +use roda_state::components::{Appendable, IterativeReadable}; use roda_state::{Aggregator, RodaEngine}; #[repr(C)] @@ -29,13 +30,13 @@ pub struct GroupKey { #[test] fn test_aggregator_count_and_sum() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 1024, in_memory: true, @@ -52,7 +53,7 @@ fn test_aggregator_count_and_sum() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { stats.sensor_id = reading.sensor_id; stats.count = (index + 1) as u32; stats.sum += reading.value; @@ -60,12 +61,12 @@ fn test_aggregator_count_and_sum() { }); // Push readings - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 10.0, ..Default::default() }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 20.0, ..Default::default() @@ -83,13 +84,13 @@ fn test_aggregator_count_and_sum() { #[test] fn test_aggregator_min_max_tracking() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 1024, in_memory: true, @@ -106,7 +107,7 @@ fn test_aggregator_min_max_tracking() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { if index == 0 { stats.min = reading.value; stats.max = reading.value; @@ -119,17 +120,17 @@ fn test_aggregator_min_max_tracking() { }); // Push readings - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 10.0, ..Default::default() }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 20.0, ..Default::default() }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 5.0, ..Default::default() @@ -146,13 +147,13 @@ fn test_aggregator_min_max_tracking() { #[test] fn test_aggregator_multiple_partitions() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 1024, in_memory: true, @@ -169,24 +170,24 @@ fn test_aggregator_multiple_partitions() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { stats.sensor_id = reading.sensor_id; stats.count = (index + 1) as u32; }); }); // Push readings across partitions - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 1.0, ..Default::default() }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 2, value: 2.0, ..Default::default() }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 3.0, ..Default::default() @@ -207,13 +208,13 @@ fn test_aggregator_multiple_partitions() { #[test] fn test_aggregator_complex_key() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 1024, in_memory: true, @@ -233,13 +234,13 @@ fn test_aggregator_complex_key() { sensor_id: r.sensor_id, group_id: (r.value / 10.0) as u16, }) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { stats.sensor_id = reading.sensor_id; stats.count = (index + 1) as u32; }); }); - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 15.0, ..Default::default() @@ -255,13 +256,13 @@ fn test_aggregator_complex_key() { #[test] fn test_aggregator_reset_behavior() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 10, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 10, in_memory: true, @@ -278,7 +279,7 @@ fn test_aggregator_reset_behavior() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { stats.sensor_id = reading.sensor_id; stats.count = (index + 1) as u32; }); @@ -286,7 +287,7 @@ fn test_aggregator_reset_behavior() { // Push several readings for sensor 1 for i in 0..5 { - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: i as f64, ..Default::default() @@ -294,7 +295,7 @@ fn test_aggregator_reset_behavior() { } // Switch to sensor 2 - source.push(SensorReading { + source.append(SensorReading { sensor_id: 2, value: 100.0, ..Default::default() @@ -315,13 +316,13 @@ fn test_aggregator_reset_behavior() { #[test] fn test_aggregator_large_index() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 1024, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 1024, in_memory: true, @@ -337,14 +338,14 @@ fn test_aggregator_large_index() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, _reading, stats| { + .reduce(|index, _reading, stats, _keep| { stats.count = (index + 1) as u32; }); }); // Simulate 1000 items in one partition for i in 0..1000 { - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: i as f64, ..Default::default() @@ -364,12 +365,12 @@ fn test_aggregator_large_index() { #[test] fn test_aggregator_worker_large() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 2000, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 2000, in_memory: true, @@ -385,7 +386,7 @@ fn test_aggregator_worker_large() { .from(&source_reader) .to(&mut target) .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { + .reduce(|index, reading, stats, _keep| { stats.sensor_id = reading.sensor_id; stats.count = (index + 1) as u32; stats.sum += reading.value; @@ -393,7 +394,7 @@ fn test_aggregator_worker_large() { }); for _ in 0..1000 { - source.push(SensorReading { + source.append(SensorReading { sensor_id: 1, value: 1.0, ..Default::default() diff --git a/tests/comprehensive_tests.rs b/tests/comprehensive_tests.rs index 772a79c..48a50a9 100644 --- a/tests/comprehensive_tests.rs +++ b/tests/comprehensive_tests.rs @@ -1,12 +1,13 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; +use roda_state::components::{Appendable, IterativeReadable}; use std::sync::{Arc, Barrier}; use std::thread; #[test] fn test_store_reader_edge_cases() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "edge_cases", size: 1024, in_memory: true, @@ -26,7 +27,7 @@ fn test_store_reader_edge_cases() { // 4. get before next() assert_eq!(reader.get(), None); - store.push(42); + store.append(42); // 5. get before next() but after push assert_eq!(reader.get(), None); @@ -57,14 +58,14 @@ fn test_store_reader_edge_cases() { #[test] fn test_index_reader_with_and_get() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "index_with", size: 1024, in_memory: true, }); let index = store.direct_index::(); - store.push(123); + store.append(123); index.compute(|&v| v); let reader = index.reader(); @@ -77,16 +78,16 @@ fn test_index_reader_with_and_get() { #[test] fn test_store_full_capacity() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); let num_items = 10; - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "full_capacity", size: num_items, in_memory: true, }); for i in 0..num_items { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); @@ -97,7 +98,7 @@ fn test_store_full_capacity() { assert!(!reader.next()); // This should panic if it exceeds capacity - // However, looking at store.rs: + // However, looking at journal_store: // self.storage.append(&state); // and MmapJournal::append // Let's see what happens if we push one more. @@ -106,26 +107,26 @@ fn test_store_full_capacity() { #[test] #[should_panic(expected = "Store is full")] fn test_store_overflow_panic() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "overflow", size: 1, in_memory: true, }); - store.push(1); - store.push(2); // Should panic here + store.append(1); + store.append(2); // Should panic here } #[test] fn test_store_concurrent_load() { let engine = Arc::new(RodaEngine::new()); - let store_options = StoreOptions { + let store_options = JournalStoreOptions { name: "concurrent_load", size: 1024 * 1024, in_memory: true, }; - let mut store = engine.store::(store_options); + let mut store = engine.new_journal_store::(store_options); let num_readers = 4; let num_pushes = 1000; @@ -163,7 +164,7 @@ fn test_store_concurrent_load() { barrier.wait(); for i in 1..=num_pushes { - store.push(i as u32); + store.append(i as u32); } let mut total_read = 0; @@ -176,8 +177,8 @@ fn test_store_concurrent_load() { #[test] fn test_index_load_and_edge_cases() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "index_edge", size: 1024 * 1024, in_memory: true, @@ -192,7 +193,7 @@ fn test_index_load_and_edge_cases() { // 2. Load test let num_items = 1000; for i in 0..num_items { - store.push(i as u64); + store.append(i as u64); index.compute(|&v| v); } @@ -201,10 +202,10 @@ fn test_index_load_and_edge_cases() { } // 3. Duplicate keys (overwrites) - store.push(100); // 1001st item + store.append(100); // 1001st item index.compute(|&v| v); // index the 100th -> 100 (key 100) - store.push(10000); // 1002nd item + store.append(10000); // 1002nd item index.compute(|_v| 100); // Force key 100 to map to value 10000 assert_eq!(index_reader.get(&100), Some(10000)); } @@ -212,7 +213,7 @@ fn test_index_load_and_edge_cases() { #[test] fn test_index_concurrent_compute() { let engine = Arc::new(RodaEngine::new()); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "index_concurrent", size: 1024 * 1024, in_memory: true, @@ -222,7 +223,7 @@ fn test_index_concurrent_compute() { let num_items = 5000; for i in 0..num_items { - store.push(i as u32); + store.append(i as u32); } let num_workers = 5; @@ -262,8 +263,8 @@ fn test_index_concurrent_compute() { #[test] fn test_index_reader_concurrent_get() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "index_read_concurrent", size: 1024 * 1024, in_memory: true, @@ -272,7 +273,7 @@ fn test_index_reader_concurrent_get() { let num_items = 1000; for i in 0..num_items { - store.push(i as u32); + store.append(i as u32); index.compute(|&v| v); } diff --git a/tests/index_tests.rs b/tests/index_tests.rs index 2b11097..d09b726 100644 --- a/tests/index_tests.rs +++ b/tests/index_tests.rs @@ -1,6 +1,6 @@ use bytemuck::{Pod, Zeroable}; use roda_state::RodaEngine; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions}; +use roda_state::journal_store::JournalStoreOptions; use std::thread; use std::time::Duration; @@ -14,7 +14,7 @@ struct ComplexKey { #[test] fn test_index_multiple_values() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -22,7 +22,7 @@ fn test_index_multiple_values() { let index = store.direct_index::(); for i in 0..5 { - store.push(i); + store.append(i); } // Index them all @@ -39,7 +39,7 @@ fn test_index_multiple_values() { #[test] fn test_multiple_indices_on_same_store() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -48,7 +48,7 @@ fn test_multiple_indices_on_same_store() { let index_double = store.direct_index::(); let index_triple = store.direct_index::(); - store.push(10); + store.append(10); index_double.compute(|x| x * 2); index_triple.compute(|x| x * 3); @@ -63,14 +63,14 @@ fn test_multiple_indices_on_same_store() { #[test] fn test_index_complex_key() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, }); let index = store.direct_index::(); - store.push(100); + store.append(100); index.compute(|&val| ComplexKey { id: val, category: 1, @@ -96,7 +96,7 @@ fn test_index_complex_key() { #[test] fn test_index_shallow_clone_sharing() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -105,7 +105,7 @@ fn test_index_shallow_clone_sharing() { let clone1 = index.reader(); let clone2 = index.reader(); - store.push(42); + store.append(42); index.compute(|&x| x); assert_eq!(clone1.get(&42), Some(42)); @@ -115,7 +115,7 @@ fn test_index_shallow_clone_sharing() { #[test] fn test_index_collision_overwrite() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -123,8 +123,8 @@ fn test_index_collision_overwrite() { let index = store.direct_index::(); // Both 10 and 20 will map to key 1 - store.push(10); - store.push(20); + store.append(10); + store.append(20); index.compute(|_| 1); index.compute(|_| 1); @@ -137,14 +137,14 @@ fn test_index_collision_overwrite() { #[test] fn test_index_not_found() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, }); let index = store.direct_index::(); - store.push(10); + store.append(10); index.compute(|x| x + 1); let reader = index.reader(); @@ -155,7 +155,7 @@ fn test_index_not_found() { #[test] fn test_concurrent_push_and_index() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -172,7 +172,7 @@ fn test_concurrent_push_and_index() { // Push values from another thread (main thread) for i in 0..10 { - store.push(i); + store.append(i); // Give worker some time to process thread::sleep(Duration::from_millis(1)); } @@ -188,12 +188,12 @@ fn test_concurrent_push_and_index() { #[test] fn test_run_worker_with_multiple_stores() { let mut engine = RodaEngine::new(); - let mut store_u32 = engine.store::(StoreOptions { + let mut store_u32 = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, }); - let mut store_string = engine.store::<[u8; 16]>(StoreOptions { + let mut store_string = engine.new_journal_store::<[u8; 16]>(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -207,13 +207,13 @@ fn test_run_worker_with_multiple_stores() { let index_string_reader = index_string.reader(); for _ in 0..10 { - store_u32.push(100); + store_u32.append(100); } let mut pushed_u32 = false; engine.run_worker(move || { if !pushed_u32 { - store_u32.push(100); + store_u32.append(100); pushed_u32 = true; } index_u32.compute(|&x| x); @@ -224,7 +224,7 @@ fn test_run_worker_with_multiple_stores() { if !pushed_string { let mut bytes = [0u8; 16]; bytes[..5].copy_from_slice(b"hello"); - store_string.push(bytes); + store_string.append(bytes); pushed_string = true; } index_string.compute(|s: &[u8; 16]| s.iter().take_while(|&&b| b != 0).count()); @@ -241,7 +241,7 @@ fn test_run_worker_with_multiple_stores() { #[test] fn test_multiple_workers_reading_index_only_original_computes() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -251,8 +251,8 @@ fn test_multiple_workers_reading_index_only_original_computes() { let reader1 = index.reader(); let reader2 = index.reader(); - store.push(1); - store.push(2); + store.append(1); + store.append(2); // Only the original index can compute; shallow clones are read-only engine.run_worker(move || { @@ -269,7 +269,7 @@ fn test_multiple_workers_reading_index_only_original_computes() { #[test] fn test_index_iterator() { let mut engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, in_memory: true, @@ -277,7 +277,7 @@ fn test_index_iterator() { let index = store.direct_index::(); for i in 0..5 { - store.push(i); + store.append(i); index.compute(|&x| x * 2); } @@ -292,3 +292,115 @@ fn test_index_iterator() { let items_from_index: Vec<_> = index.iter().collect(); assert_eq!(items_from_index, expected); } + +#[repr(C)] +#[derive(Clone, Copy, Pod, Zeroable, Default, Debug, PartialEq)] +struct PriceLevel { + pub price: i64, + pub volume: u64, +} + +#[test] +fn test_index_navigation() { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { + name: "test_nav", + size: 1024, + in_memory: true, + }); + + let index = store.direct_index::(); + let reader = index.reader(); + + // Push some data + store.append(PriceLevel { + price: 100, + volume: 10, + }); + store.append(PriceLevel { + price: 200, + volume: 20, + }); + store.append(PriceLevel { + price: 300, + volume: 30, + }); + + // Compute index + index.compute(|p| p.price); // for 100 + index.compute(|p| p.price); // for 200 + index.compute(|p| p.price); // for 300 + + // Test find_ge + { + let key = 150; + let mut it = reader.find_ge(&key); + assert_eq!(it.next().unwrap().0, 200); + assert_eq!(it.next().unwrap().0, 300); + assert!(it.next().is_none()); + } + + // Test find_le + { + let key = 250; + let mut it = reader.find_le(&key); + assert_eq!(it.next().unwrap().0, 100); + assert_eq!(it.next().unwrap().0, 200); + assert!(it.next().is_none()); + } + + // Test range + { + let mut it = reader.range(150..250); + assert_eq!(it.next().unwrap().0, 200); + assert!(it.next().is_none()); + } +} + +#[test] +fn test_index_navigation_rev() { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { + name: "test_nav_rev", + size: 1024, + in_memory: true, + }); + + let index = store.direct_index::(); + let reader = index.reader(); + + store.append(PriceLevel { + price: 100, + volume: 10, + }); + store.append(PriceLevel { + price: 200, + volume: 20, + }); + store.append(PriceLevel { + price: 300, + volume: 30, + }); + + index.compute(|p| p.price); + index.compute(|p| p.price); + index.compute(|p| p.price); + + // Test find_ge().rev() + { + let key = 150; + let mut it = reader.find_ge(&key).rev(); + assert_eq!(it.next().unwrap().0, 300); + assert_eq!(it.next().unwrap().0, 200); + assert!(it.next().is_none()); + } + + // Test find_le().rev() + { + let key = 250; + let mut it = reader.find_le(&key).rev(); + assert_eq!(it.next().unwrap().0, 200); + assert_eq!(it.next().unwrap().0, 100); + assert!(it.next().is_none()); + } +} diff --git a/tests/journal_tests.rs b/tests/journal_tests.rs index c81b68b..465ccc5 100644 --- a/tests/journal_tests.rs +++ b/tests/journal_tests.rs @@ -1,33 +1,34 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::components::{Appendable, IterativeReadable}; #[test] #[should_panic(expected = "Store is full")] fn test_journal_panic_when_full() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "full_test", size: 2, // Can hold only 2 u64 in_memory: true, }); - store.push(1); - store.push(2); - store.push(3); // This should panic + store.append(1); + store.append(2); + store.append(3); // This should panic } #[test] fn test_journal_no_circularity() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_circular_test", size: 2, in_memory: true, }); let reader = store.reader(); - store.push(1); - store.push(2); + store.append(1); + store.append(2); assert_eq!(reader.get_at(0), Some(1)); assert_eq!(reader.get_at(1), Some(2)); diff --git a/tests/logic_tests.rs b/tests/logic_tests.rs index 2bdeb39..7047a3f 100644 --- a/tests/logic_tests.rs +++ b/tests/logic_tests.rs @@ -1,10 +1,11 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::components::{Appendable, IterativeReadable}; #[test] fn test_reader_next_and_with_logic() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "logic_test", size: 1024, in_memory: true, @@ -16,7 +17,7 @@ fn test_reader_next_and_with_logic() { assert!(reader.with(|&x| x).is_none()); // Push one value - store.push(100); + store.append(100); // next() should now be true assert!(reader.next()); @@ -29,7 +30,7 @@ fn test_reader_next_and_with_logic() { assert_eq!(reader.with(|&x| x), Some(100)); // Push another value - store.push(200); + store.append(200); // next() should be true assert!(reader.next()); @@ -40,16 +41,16 @@ fn test_reader_next_and_with_logic() { #[test] fn test_reader_get_at_and_last() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "logic_test_2", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(10); - store.push(20); - store.push(30); + store.append(10); + store.append(20); + store.append(30); assert_eq!(reader.get_at(0), Some(10)); assert_eq!(reader.get_at(1), Some(20)); diff --git a/tests/push_read_tests.rs b/tests/push_read_tests.rs index 06d5521..a1ca783 100644 --- a/tests/push_read_tests.rs +++ b/tests/push_read_tests.rs @@ -1,17 +1,18 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::components::{Appendable, IterativeReadable}; #[test] fn test_push_then_read_single() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test1", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(42); + store.append(42); let res = reader.get_window::<1>(0).unwrap(); assert_eq!(res[0], 42); @@ -19,8 +20,8 @@ fn test_push_then_read_single() { #[test] fn test_multiple_push_read_in_order() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test2", size: 1024, in_memory: true, @@ -28,7 +29,7 @@ fn test_multiple_push_read_in_order() { let reader = store.reader(); for v in [1u32, 2, 3, 4, 5] { - store.push(v); + store.append(v); } let res = reader.get_window::<5>(0).unwrap(); @@ -39,8 +40,8 @@ fn test_multiple_push_read_in_order() { #[test] fn test_interleaved_push_and_read() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test3", size: 1024, in_memory: true, @@ -48,10 +49,10 @@ fn test_interleaved_push_and_read() { let reader = store.reader(); // Push values; verify FIFO order via get_window - store.push(10); - store.push(20); - store.push(30); - store.push(40); + store.append(10); + store.append(20); + store.append(30); + store.append(40); let res = reader.get_window::<4>(0).unwrap(); assert_eq!(res[0], 10); @@ -62,14 +63,14 @@ fn test_interleaved_push_and_read() { #[test] fn test_stores_are_isolated_by_type() { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); - let mut u_store = engine.store::(StoreOptions { + let mut u_store = engine.new_journal_store::(JournalStoreOptions { name: "u32", size: 1024, in_memory: true, }); - let mut i_store = engine.store::(StoreOptions { + let mut i_store = engine.new_journal_store::(JournalStoreOptions { name: "i64", size: 1024, in_memory: true, @@ -77,10 +78,10 @@ fn test_stores_are_isolated_by_type() { let u_reader = u_store.reader(); let i_reader = i_store.reader(); - u_store.push(1); - i_store.push(-1); - u_store.push(2); - i_store.push(-2); + u_store.append(1); + i_store.append(-1); + u_store.append(2); + i_store.append(-2); let u_res = u_reader.get_window::<2>(0).unwrap(); let i_res = i_reader.get_window::<2>(0).unwrap(); @@ -93,18 +94,18 @@ fn test_stores_are_isolated_by_type() { #[test] fn test_push_after_partial_reads() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test4", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(100); - store.push(200); - store.push(300); - store.push(400); + store.append(100); + store.append(200); + store.append(300); + store.append(400); let res = reader.get_window::<4>(0).unwrap(); assert_eq!(res[0], 100); diff --git a/tests/store_no_alloc_tests.rs b/tests/store_no_alloc_tests.rs index ed2aad1..f9c0d65 100644 --- a/tests/store_no_alloc_tests.rs +++ b/tests/store_no_alloc_tests.rs @@ -1,6 +1,6 @@ use assert_no_alloc::*; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::journal_store::JournalStoreOptions; #[cfg(debug_assertions)] #[global_allocator] @@ -8,27 +8,27 @@ static ALLOC: AllocDisabler = AllocDisabler; #[test] fn test_store_push_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_push", size: 1024, in_memory: true, }); assert_no_alloc(|| { - store.push(42); + store.append(42); }); } #[test] fn test_store_reader_next_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_next", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { @@ -38,13 +38,13 @@ fn test_store_reader_next_no_alloc() { #[test] fn test_store_reader_get_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); reader.next(); @@ -55,14 +55,14 @@ fn test_store_reader_get_no_alloc() { #[test] fn test_store_reader_get_window_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_window", size: 1024, in_memory: true, }); - store.push(42); - store.push(43); + store.append(42); + store.append(43); let reader = store.reader(); assert_no_alloc(|| { @@ -74,13 +74,13 @@ fn test_store_reader_get_window_no_alloc() { #[test] fn test_store_reader_get_at_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_at", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { @@ -90,13 +90,13 @@ fn test_store_reader_get_at_no_alloc() { #[test] fn test_store_reader_get_last_no_alloc() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_last", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { @@ -106,8 +106,8 @@ fn test_store_reader_get_last_no_alloc() { #[test] fn test_store_direct_index_allocations_allowed() { - let engine = RodaEngine::new(); - let store = engine.store::(StoreOptions { + let mut engine = RodaEngine::new(); + let store = engine.new_journal_store::(JournalStoreOptions { name: "direct_index_alloc", size: 1024, in_memory: true, diff --git a/tests/window_tests.rs b/tests/window_tests.rs index c81de3d..067c73a 100644 --- a/tests/window_tests.rs +++ b/tests/window_tests.rs @@ -1,5 +1,6 @@ use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::JournalStoreOptions; +use roda_state::components::{Appendable, IterativeReadable}; use roda_state::{RodaEngine, Window}; #[repr(C)] @@ -19,12 +20,12 @@ pub struct Analysis { #[test] fn test_window_filling_and_sliding() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 10, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 10, in_memory: true, @@ -56,7 +57,7 @@ fn test_window_filling_and_sliding() { // Push data points for i in 1..=5 { - source.push(DataPoint { value: i as f64 }); + source.append(DataPoint { value: i as f64 }); } // Give some time for the worker to process @@ -75,12 +76,12 @@ fn test_window_filling_and_sliding() { #[test] fn test_window_size_one() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 10, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 10, in_memory: true, @@ -107,7 +108,7 @@ fn test_window_size_one() { // Push values for v in [10.0, 20.0, 30.0] { - source.push(DataPoint { value: v }); + source.append(DataPoint { value: v }); } // Give some time for the worker to process @@ -125,12 +126,12 @@ fn test_window_size_one() { #[test] fn test_window_large_sliding() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 100, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 100, in_memory: true, @@ -164,7 +165,7 @@ fn test_window_large_sliding() { // Push values 0..11 -> expect 3 outputs for i in 0..12 { - source.push(DataPoint { value: i as f64 }); + source.append(DataPoint { value: i as f64 }); } // Give some time for the worker to process @@ -182,12 +183,12 @@ fn test_window_large_sliding() { #[test] fn test_window_worker_large() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 2000, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 2000, in_memory: true, @@ -219,7 +220,7 @@ fn test_window_worker_large() { }); for i in 0..1000 { - source.push(DataPoint { value: i as f64 }); + source.append(DataPoint { value: i as f64 }); } // Give some time for the worker to process @@ -233,12 +234,12 @@ fn test_window_worker_large() { #[test] fn test_window_max_value() { let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 10, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 10, in_memory: true, @@ -259,7 +260,7 @@ fn test_window_max_value() { // Push values: expect maxima per 3-sized window for v in [1.0, 3.0, 2.0, 5.0, 4.0] { - source.push(DataPoint { value: v }); + source.append(DataPoint { value: v }); } // Give some time for the worker to process @@ -276,12 +277,12 @@ fn test_window_all_none_until_full() { use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; let mut engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { + let mut source = engine.new_journal_store::(JournalStoreOptions { name: "source", size: 10, in_memory: true, }); - let mut target = engine.store::(StoreOptions { + let mut target = engine.new_journal_store::(JournalStoreOptions { name: "target", size: 10, in_memory: true, @@ -304,7 +305,7 @@ fn test_window_all_none_until_full() { }); for i in 0..5 { - source.push(DataPoint { value: i as f64 }); + source.append(DataPoint { value: i as f64 }); } // Give some time for the worker to process From 5a4f39509e557aaac0e3ec02c87e0a085983e6ba Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sat, 14 Feb 2026 22:24:25 +0100 Subject: [PATCH 04/11] fixes --- examples/databento_replay/book_level_top.rs | 74 +++++++++++++++++++++ examples/databento_replay/main.rs | 3 + src/engine.rs | 13 ++-- src/journal_store.rs | 3 +- src/lib.rs | 2 +- 5 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 examples/databento_replay/book_level_top.rs diff --git a/examples/databento_replay/book_level_top.rs b/examples/databento_replay/book_level_top.rs new file mode 100644 index 0000000..893ae22 --- /dev/null +++ b/examples/databento_replay/book_level_top.rs @@ -0,0 +1,74 @@ +use crate::book_level_entry::BookLevelEntry; +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelTopEntry { + pub size: u64, + pub price: i64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelTop { + pub symbol: u64, // or instrument_id + pub asks: [BookLevelTopEntry; 5], + pub bids: [BookLevelTopEntry; 5], +} + +impl BookLevelTop { + pub(crate) fn adjust(&mut self, entry: BookLevelEntry) { + let levels = match entry.side { + b'A' => &mut self.asks, + b'B' => &mut self.bids, + _ => return, + }; + + if let Some(existing_idx) = levels.iter().position(|l| l.price == entry.price) { + if entry.volume == 0 { + for i in existing_idx..4 { + levels[i] = levels[i + 1]; + } + levels[4] = BookLevelTopEntry::default(); + } else { + levels[existing_idx].size = entry.volume; + } + return; + } + + if entry.volume > 0 { + // PASS ONLY THE SLICE: This avoids borrowing 'self' again + Self::insert_if_better(entry, levels); + } + } + + // Removed '&mut self' and changed to a static helper + fn insert_if_better(entry: BookLevelEntry, levels: &mut [BookLevelTopEntry; 5]) { + let is_ask = entry.side == b'A'; + + let pos = levels.iter().position(|l| { + if l.price == 0 { return true; } + if is_ask { entry.price < l.price } else { entry.price > l.price } + }); + + if let Some(i) = pos { + for j in (i + 1..5).rev() { + levels[j] = levels[j - 1]; + } + levels[i] = BookLevelTopEntry { + price: entry.price, + size: entry.volume, + }; + } + } +} + +impl From for BookLevelTop { + fn from(entry: BookLevelEntry) -> Self { + Self { + symbol: entry.symbol, + asks: [BookLevelTopEntry::default(); 5], + bids: [BookLevelTopEntry::default(); 5], + } + } +} diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs index 1afcdcc..15e944f 100644 --- a/examples/databento_replay/main.rs +++ b/examples/databento_replay/main.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use clap::Parser; use spdlog::kv::Key; use spdlog::prelude::*; @@ -57,8 +58,10 @@ fn main() -> Result<(), Box> { let market_reader = market_store.reader(); + let mut map: BTreeMap = BTreeMap::new(); // Prepare Book Level engine.run_worker(move || { + map.clear(); if market_reader.next() { market_book_aggregator .from(&market_reader) diff --git a/src/engine.rs b/src/engine.rs index f259355..034bf8d 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,6 +1,7 @@ use crate::journal_store::{JournalStore, JournalStoreOptions}; use crate::measure::latency_measurer::LatencyMeasurer; use crate::op_counter::OpCounter; +use crate::slot_store::{SlotStore, SlotStoreOptions}; use bytemuck::Pod; use spdlog::info; use std::sync::Arc; @@ -68,12 +69,11 @@ impl RodaEngine { &self, options: JournalStoreOptions, ) -> JournalStore { - JournalStore::new( - self.root_path, - self.op_counter.clone(), - options, - size_of::(), - ) + JournalStore::new(self.root_path, self.op_counter.clone(), options) + } + + pub fn new_slot_store(&self, options: SlotStoreOptions) -> SlotStore { + SlotStore::new(self.root_path, self.op_counter.clone(), options) } pub fn await_idle(&self, timeout: Duration) { @@ -88,7 +88,6 @@ impl RodaEngine { if start.elapsed() > timeout { break; } - println!("[OPC]{}", new_op_count); last_op_count = new_op_count; } } diff --git a/src/journal_store.rs b/src/journal_store.rs index 228150e..cb972a5 100644 --- a/src/journal_store.rs +++ b/src/journal_store.rs @@ -32,9 +32,8 @@ impl JournalStore { root_path: &'static str, op_counter: Arc, option: JournalStoreOptions, - state_size: usize, ) -> Self { - let total_size = option.size * state_size; + let total_size = option.size * size_of::(); let storage = if option.in_memory { JournalMmap::new(None, total_size).unwrap() } else { diff --git a/src/lib.rs b/src/lib.rs index 27afdda..d448f2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ pub mod engine; pub mod journal_store; pub mod measure; mod op_counter; -mod slot_store; +pub mod slot_store; mod storage; pub mod window; From 4881c2150a23a04f754f41e5ee5795de5f58872e Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sat, 14 Feb 2026 23:48:03 +0100 Subject: [PATCH 05/11] new multistage engine --- src/lib.rs | 4 ++ src/new_engine.rs | 119 +++++++++++++++++++++++++++++++++++++++ src/stage.rs | 139 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 src/new_engine.rs create mode 100644 src/stage.rs diff --git a/src/lib.rs b/src/lib.rs index d448f2a..3263638 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,13 @@ mod op_counter; pub mod slot_store; mod storage; pub mod window; +pub mod stage; +pub mod new_engine; pub use crate::aggregator::Aggregator; pub use crate::direct_index::{DirectIndex, DirectIndexReader}; pub use crate::engine::RodaEngine; pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; +pub use crate::stage::{OutputCollector, Stage}; pub use crate::window::Window; +pub use crate::new_engine::NewEngine; diff --git a/src/new_engine.rs b/src/new_engine.rs new file mode 100644 index 0000000..9169135 --- /dev/null +++ b/src/new_engine.rs @@ -0,0 +1,119 @@ +use std::sync::mpsc::{channel, Sender, Receiver}; +use std::thread; +use bytemuck::Pod; +use crate::stage::Stage; + +/// A threaded pipeline engine that grows by adding stages. +/// Each stage runs in its own thread. +pub struct NewEngine { + input_tx: Sender, + output_rx: Receiver, +} + +impl NewEngine { + /// Adds a new stage to the pipeline. + /// This method consumes the current engine and returns a new one with the updated output type. + /// A new thread is spawned to run the provided stage. + pub fn add_stage + Send + 'static>( + self, + mut stage: S, + ) -> NewEngine { + let (next_tx, next_rx) = channel(); + let current_rx = self.output_rx; + + thread::spawn(move || { + while let Ok(data) = current_rx.recv() { + stage.process(data, &mut |out: NextOut| { + let _ = next_tx.send(out); + }); + } + }); + + NewEngine { + input_tx: self.input_tx, + output_rx: next_rx, + } + } + + /// Sends data into the start of the pipeline. + pub fn send(&self, data: In) { + let _ = self.input_tx.send(data); + } + + /// Receives data from the end of the pipeline. + /// This will block until data is available or the pipeline is broken. + pub fn receive(&self) -> Option { + self.output_rx.recv().ok() + } +} + +impl NewEngine { + /// Creates a new engine with no stages. + /// Acts as a passthrough until stages are added. + pub fn new() -> Self { + let (tx, rx) = channel(); + Self { + input_tx: tx, + output_rx: rx, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn test_new_engine_threaded_pipeline() { + let engine = NewEngine::::new() + .add_stage(|x: u32| Some(x as u64)) + .add_stage(|x: u64| Some(x as u8)); + + engine.send(100u32); + + let result = engine.receive(); + assert_eq!(result, Some(100u8)); + } + + #[test] + fn test_new_engine_multiple_outputs() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u32, collector: &mut C) + where + C: crate::stage::OutputCollector, + { + collector.push(data); + collector.push(data + 1); + } + } + + let engine = NewEngine::::new() + .add_stage(Duplicate) + .add_stage(|x: u32| Some(x as u64)); + + engine.send(10u32); + + assert_eq!(engine.receive(), Some(10u64)); + assert_eq!(engine.receive(), Some(11u64)); + } + + #[test] + fn test_engine_concurrency() { + let engine = NewEngine::::new() + .add_stage(|x: u32| { + // Simulate some work + thread::sleep(Duration::from_millis(10)); + Some(x * 2) + }); + + engine.send(1); + engine.send(2); + engine.send(3); + + assert_eq!(engine.receive(), Some(2)); + assert_eq!(engine.receive(), Some(4)); + assert_eq!(engine.receive(), Some(6)); + } +} diff --git a/src/stage.rs b/src/stage.rs new file mode 100644 index 0000000..59a9adf --- /dev/null +++ b/src/stage.rs @@ -0,0 +1,139 @@ +use bytemuck::Pod; +use std::marker::PhantomData; + +pub trait Stage { + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector; +} + +pub trait OutputCollector { + fn push(&mut self, item: T); +} + +impl OutputCollector for F +where + F: FnMut(T), +{ + #[inline(always)] + fn push(&mut self, item: T) { + (self)(item); + } +} + +impl Stage for F +where + F: FnMut(In) -> Option, + In: Pod + Send, + Out: Pod + Send, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + // Execute the closure and pass the result downstream + let out = (self)(data); + if let Some(out) = out { + collector.push(out); + } + } +} + +pub struct Pipeline { + s1: S1, + s2: S2, + _phantom: PhantomData<(In, Mid, Out)>, +} + +impl Stage for Pipeline +where + In: Pod + Send, + Mid: Pod + Send, + Out: Pod + Send, + S1: Stage, + S2: Stage, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + self.s1.process(data, &mut |mid| { + self.s2.process(mid, collector); + }); + } +} + +pub trait StageExt: Stage { + #[inline(always)] + fn pipe>( + self, + s2: S2, + ) -> Pipeline + where + Self: Sized, + { + Pipeline { + s1: self, + s2, + _phantom: PhantomData, + } + } +} + +impl StageExt for S +where + In: Pod + Send, + Mid: Pod + Send, + S: Stage, +{ +} + +#[macro_export] +macro_rules! pipe { + ($s1:expr) => { $s1 }; + ($s1:expr, $($rest:expr),+ $(,)?) => { + { + use $crate::stage::StageExt; + $s1.pipe($crate::pipe!($($rest),+)) + } + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pipe_closures() { + let mut p = pipe![ + |x: u32| Some(x as u64), + |x: u64| Some(x as u8), + ]; + + let mut out = Vec::new(); + p.process(100u32, &mut |x: u8| out.push(x)); + assert_eq!(out, vec![100u8]); + } + + #[test] + fn test_pipe_one_to_many() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u64, collector: &mut C) + where + C: OutputCollector, + { + collector.push(data); + collector.push(data); + } + } + + let mut p = pipe![|x: u32| Some(x as u64), Duplicate, |x: u64| Some(x as u8),]; + + let mut out = Vec::new(); + p.process(10u32, &mut |x: u8| out.push(x)); + assert_eq!(out, vec![10u8, 10u8]); + } +} From 9a2e55ee7aae9fb0b54f0ef510eef8bb76862589 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 01:34:51 +0100 Subject: [PATCH 06/11] new multistage engine improvements --- .../databento_replay/aggregation_stage.rs | 58 +++++ examples/databento_replay/analysis_stage.rs | 81 +++++++ examples/databento_replay/book_level_entry.rs | 1 + examples/databento_replay/book_level_top.rs | 3 + examples/databento_replay/imbalance_signal.rs | 11 + examples/databento_replay/main.rs | 169 +++------------ examples/sensor_test/main.rs | 176 +++++++--------- src/engine.rs | 2 +- src/lib.rs | 4 +- src/new_engine.rs | 119 ----------- src/stage_engine.rs | 198 ++++++++++++++++++ 11 files changed, 457 insertions(+), 365 deletions(-) create mode 100644 examples/databento_replay/aggregation_stage.rs create mode 100644 examples/databento_replay/analysis_stage.rs create mode 100644 examples/databento_replay/imbalance_signal.rs delete mode 100644 src/new_engine.rs create mode 100644 src/stage_engine.rs diff --git a/examples/databento_replay/aggregation_stage.rs b/examples/databento_replay/aggregation_stage.rs new file mode 100644 index 0000000..b646d2d --- /dev/null +++ b/examples/databento_replay/aggregation_stage.rs @@ -0,0 +1,58 @@ +use std::collections::HashMap; +use roda_state::stage::{Stage, OutputCollector}; +use crate::light_mbo_entry::LightMboEntry; +use crate::book_level_entry::BookLevelEntry; + +pub struct AggregationStage { + book_volumes: HashMap<(u32, u8, i64), BookLevelEntry>, +} + +impl Default for AggregationStage { + fn default() -> Self { + Self { + book_volumes: HashMap::new(), + } + } +} + +impl Stage for AggregationStage { + fn process(&mut self, entry: LightMboEntry, collector: &mut C) + where + C: OutputCollector, + { + let key = (entry.instrument_id, entry.side, entry.price); + let book = self.book_volumes.entry(key).or_insert(BookLevelEntry { + ts: entry.ts, + symbol: entry.instrument_id as u64, + price: entry.price, + volume: 0, + side: entry.side, + _pad: [0; 7], + }); + + book.ts = entry.ts; + + match entry.action { + // Add + b'A' => { + book.volume = book.volume.saturating_add(entry.size as u64); + } + // Cancel, Fill, or Trade + b'C' | b'F' | b'T' => { + book.volume = book.volume.saturating_sub(entry.size as u64); + } + // Clear Book + b'R' => { + book.volume = 0; + } + _ => {} + } + + // Always push the update so downstream knows about deletions/volume=0 + collector.push(*book); + + if book.volume == 0 { + self.book_volumes.remove(&key); + } + } +} diff --git a/examples/databento_replay/analysis_stage.rs b/examples/databento_replay/analysis_stage.rs new file mode 100644 index 0000000..5fcbf3d --- /dev/null +++ b/examples/databento_replay/analysis_stage.rs @@ -0,0 +1,81 @@ +use std::collections::HashMap; +use std::time::{Duration, Instant}; +use spdlog::prelude::*; +use roda_state::stage::{Stage, OutputCollector}; +use crate::book_level_entry::BookLevelEntry; +use crate::book_level_top::BookLevelTop; +use crate::imbalance_signal::ImbalanceSignal; + +pub struct AnalysisStage { + book_tops: HashMap, + last_print: Instant, + counter: u64, +} + +impl Default for AnalysisStage { + fn default() -> Self { + Self { + book_tops: HashMap::new(), + last_print: Instant::now(), + counter: 0, + } + } +} + +impl Stage for AnalysisStage { + fn process(&mut self, entry: BookLevelEntry, collector: &mut C) + where + C: OutputCollector, + { + self.counter += 1; + let book_top = self.book_tops.entry(entry.symbol).or_insert_with(|| { + let mut bt = BookLevelTop::default(); + bt.symbol = entry.symbol; + bt + }); + book_top.adjust(entry); + + let mut bid_vol = 0.0; + let mut ask_vol = 0.0; + + for (i, level) in book_top.bids.iter().enumerate() { + if level.price == 0 { break; } + let weight = 1.0 - (i as f64 * 0.2); + bid_vol += level.size as f64 * weight; + } + + for (i, level) in book_top.asks.iter().enumerate() { + if level.price == 0 { break; } + let weight = 1.0 - (i as f64 * 0.2); + ask_vol += level.size as f64 * weight; + } + + let total_vol = bid_vol + ask_vol; + if total_vol > 0.0 { + let imbalance = (bid_vol - ask_vol) / total_vol; + + // Produce the signal + collector.push(ImbalanceSignal { + ts: entry.ts, + symbol: entry.symbol, + imbalance, + bid_vol, + ask_vol, + }); + + if imbalance.abs() > 0.95 && self.last_print.elapsed() > Duration::from_millis(500) { + info!( + "[Sym:{}] Imbalance: {:.2} (B: {:.0}, A: {:.0})", + entry.symbol, imbalance, bid_vol, ask_vol + ); + self.last_print = Instant::now(); + } + } + } +} + +impl Drop for AnalysisStage { + fn drop(&mut self) { + info!("[System] Final Imbalance Signals processed: {}", self.counter); + } +} diff --git a/examples/databento_replay/book_level_entry.rs b/examples/databento_replay/book_level_entry.rs index 4f52ee5..bd9bff5 100644 --- a/examples/databento_replay/book_level_entry.rs +++ b/examples/databento_replay/book_level_entry.rs @@ -3,6 +3,7 @@ use bytemuck::{Pod, Zeroable}; #[repr(C)] #[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] pub struct BookLevelEntry { + pub ts: u64, pub symbol: u64, // or instrument_id pub price: i64, pub volume: u64, // "size" is also common diff --git a/examples/databento_replay/book_level_top.rs b/examples/databento_replay/book_level_top.rs index 893ae22..e6dd205 100644 --- a/examples/databento_replay/book_level_top.rs +++ b/examples/databento_replay/book_level_top.rs @@ -11,6 +11,7 @@ pub struct BookLevelTopEntry { #[repr(C)] #[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] pub struct BookLevelTop { + pub ts: u64, pub symbol: u64, // or instrument_id pub asks: [BookLevelTopEntry; 5], pub bids: [BookLevelTopEntry; 5], @@ -18,6 +19,7 @@ pub struct BookLevelTop { impl BookLevelTop { pub(crate) fn adjust(&mut self, entry: BookLevelEntry) { + self.ts = entry.ts; let levels = match entry.side { b'A' => &mut self.asks, b'B' => &mut self.bids, @@ -66,6 +68,7 @@ impl BookLevelTop { impl From for BookLevelTop { fn from(entry: BookLevelEntry) -> Self { Self { + ts: entry.ts, symbol: entry.symbol, asks: [BookLevelTopEntry::default(); 5], bids: [BookLevelTopEntry::default(); 5], diff --git a/examples/databento_replay/imbalance_signal.rs b/examples/databento_replay/imbalance_signal.rs new file mode 100644 index 0000000..6b25c5f --- /dev/null +++ b/examples/databento_replay/imbalance_signal.rs @@ -0,0 +1,11 @@ +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct ImbalanceSignal { + pub ts: u64, + pub symbol: u64, + pub imbalance: f64, + pub bid_vol: f64, + pub ask_vol: f64, +} diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs index 15e944f..c277f34 100644 --- a/examples/databento_replay/main.rs +++ b/examples/databento_replay/main.rs @@ -1,22 +1,21 @@ -use std::collections::BTreeMap; use clap::Parser; -use spdlog::kv::Key; use spdlog::prelude::*; use std::path::PathBuf; -use std::thread::sleep; use std::time::Duration; -// Use your specific high-level API modules -use roda_state::JournalStoreOptions; -use roda_state::components::{Appendable, IterativeReadable}; -use roda_state::{Aggregator, DirectIndex, RodaEngine}; + +use roda_state::StageEngine; mod book_level_entry; mod importer; mod light_mbo_entry; +mod book_level_top; +mod imbalance_signal; +mod aggregation_stage; +mod analysis_stage; -use crate::book_level_entry::BookLevelEntry; +use crate::aggregation_stage::AggregationStage; +use crate::analysis_stage::AnalysisStage; use importer::import_mbo_file; -use light_mbo_entry::LightMboEntry; #[derive(Parser)] struct Args { @@ -24,149 +23,31 @@ struct Args { file: PathBuf, } -// ============================================================================== -// 2. THE PIPELINE IMPLEMENTATION -// ============================================================================== - fn main() -> Result<(), Box> { let args = Args::parse(); - let mut engine = RodaEngine::new(); - engine.enable_latency_stats(true); - info!("[System] Booting Roda Data Bento Replay..."); - - // 1. Market Data Store (The "River" of MBO updates) - let mut market_store = engine.new_journal_store::(JournalStoreOptions { - name: "market_data", - size: 30000000 * size_of::(), - in_memory: true, - }); - - let mut market_book_store = engine.new_journal_store::(JournalStoreOptions { - name: "market_book", - size: 30000000 * size_of::(), - in_memory: true, - }); - - let market_book_store_reader = market_book_store.reader(); - let final_reader = market_book_store.reader(); - let market_book_store_index = market_book_store.direct_index(); - let market_book_store_index_reader = market_book_store_index.reader(); - let market_book_store_index_reader2 = market_book_store_index.reader(); - - let mut market_book_aggregator: Aggregator = - Aggregator::new(); - - let market_reader = market_store.reader(); - - let mut map: BTreeMap = BTreeMap::new(); - // Prepare Book Level - engine.run_worker(move || { - map.clear(); - if market_reader.next() { - market_book_aggregator - .from(&market_reader) - .to(&mut market_book_store) - .partition_by(|entry| (entry.instrument_id, entry.side, entry.price)) - .reduce(|_, entry, book, keep| { - book.side = entry.side; - book.price = entry.price; - book.symbol = entry.instrument_id as u64; - match entry.action { - // Add: New liquidity - b'A' => { - book.volume = book.volume.saturating_add(entry.size as u64); - } - // Cancel, Fill, or Trade: Remove liquidity - // Note: Check your feed docs. Usually 'F' is the one that reduces the book. - b'C' | b'F' | b'T' => { - book.volume = book.volume.saturating_sub(entry.size as u64); - } - // Clear Book: Wipe level - b'R' => { - book.volume = 0; - } - // Modify: This is tricky without order-id tracking. - // For a showcase, if you don't have 'old_size', ignoring it is - // safer than guessing, but your book will slowly drift. - b'M' | b'N' => {} + + info!("[System] Booting Roda Data Bento Replay with StageEngine..."); - _ => {} - } - - if book.volume == 0 { - market_book_store_index.delete(&(book.side, book.price)); - *keep = false; - } - }); - market_book_store_index.compute(|entry| (entry.side, entry.price)); - } - }); - - // Prepare Weighted L5 and OB Imbalance - engine.run_worker(move || { - if market_book_store_reader.next() { - // 1. Get Bids: Everything <= (b'B', MAX) - // We go REV to get Highest Price first - // 1. Get Bids (Highest Bids first) - // Range: From (b'B', 0) to (b'B', i64::MAX) - let bids = market_book_store_index_reader - .range(( - std::ops::Bound::Included(&(66, 0)), - std::ops::Bound::Included(&(66, i64::MAX)), - )) - .rev() // Start at highest price - .take(5); - - // --- 2. GET ASKS (Lowest prices first) --- - // Range: From (b'A', 0) to (b'A', i64::MAX) - let asks = market_book_store_index_reader - .range(( - std::ops::Bound::Included((65, 0)), - std::ops::Bound::Included((65, i64::MAX)), - )) - .take(5); // Already starts at lowest price - - let mut bid_vol = 0.0; - let mut ask_vol = 0.0; - - // 3. Sum Bids - for (i, (_key, state)) in bids.enumerate() { - let weight = 1.0 - (i as f64 * 0.2); - bid_vol += state.volume as f64 * weight; - } - - // 4. Sum Asks - for (i, (_key, state)) in asks.enumerate() { - let weight = 1.0 - (i as f64 * 0.2); - ask_vol += state.volume as f64 * weight; - } - - // 5. Compute Final Imbalance - let total_vol = bid_vol + ask_vol; - if total_vol > 0.0 { - let imbalance = (bid_vol - ask_vol) / total_vol; - if imbalance > 0.95 { - println!( - "Imbalance: {:.2} (B: {:.0}, A: {:.0})", - imbalance, bid_vol, ask_vol - ); - println!("{:?}", market_book_store_index_reader.size()); - } - } - } - }); + // 1. Initialize StageEngine with enough capacity for the input + // Using 30M as in original example + let mut engine = StageEngine::with_capacity(30_000_000); + engine.enable_latency_stats(true); - import_mbo_file(args.file, &mut market_store)?; + // 2. Add Aggregation Stage: LightMboEntry -> BookLevelEntry + let engine = engine.add_stage_with_capacity(30_000_000, AggregationStage::default()); - info!("[System] Waiting for all workers to finish..."); + // 3. Add Imbalance Analysis Stage: BookLevelEntry -> ImbalanceSignal + let mut engine = engine.add_stage_with_capacity(30_000_000, AnalysisStage::default()); - engine.await_idle(Duration::from_mins(100)); + // 4. Start importing data + // import_mbo_file expects &mut impl Appendable + // StageEngine implements it. + import_mbo_file(args.file, &mut engine)?; - info!( - "[System] Book Size: {}", - market_book_store_index_reader2.size() - ); + info!("[System] Waiting for all stages to finish processing..."); + engine.await_idle(Duration::from_secs(600)); + info!("[System] Final Imbalance Signals: {}", engine.output_size()); info!("[System] Done!"); Ok(()) diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs index 8a8dc06..6f7b6b5 100644 --- a/examples/sensor_test/main.rs +++ b/examples/sensor_test/main.rs @@ -1,8 +1,7 @@ use bytemuck::{Pod, Zeroable}; -use roda_state::JournalStoreOptions; -use roda_state::components::{Appendable, IterativeReadable}; -use roda_state::{Aggregator, RodaEngine, Window}; -use std::thread; +use roda_state::StageEngine; +use roda_state::pipe; +use std::collections::HashMap; use std::time::Duration; /// Raw sensor reading @@ -55,101 +54,81 @@ pub struct Alert { } fn main() { - let mut engine = RodaEngine::new(); - - // 1. SETUP STORES - // Stores are bounded, pre-allocated buffers for your state. - let mut reading_store = engine.new_journal_store::(JournalStoreOptions { - name: "readings", - size: 1000, - in_memory: true, - }); - let reading_reader = reading_store.reader(); - - let mut summary_store = engine.new_journal_store::(JournalStoreOptions { - name: "summaries", - size: 100, - in_memory: true, - }); - let summary_reader = summary_store.reader(); - - let mut alert_store = engine.new_journal_store::(JournalStoreOptions { - name: "alerts", - size: 100, - in_memory: true, - }); - let alert_reader_for_print = alert_store.reader(); - - // Secondary index to look up summaries by sensor and time - let summary_index = summary_store.direct_index::(); - let summary_index_reader = summary_index.reader(); - - // 2. DEFINE PIPELINES - let summary_pipeline: Aggregator = Aggregator::new(); - let alert_pipeline: Window = Window::new(); - - // 3. WORKER: Aggregate readings into summaries - engine.run_worker(move || { - reading_reader.next(); // Wait for data - - summary_pipeline - .from(&reading_reader) - .to(&mut summary_store) - .partition_by(|r| SensorKey { + println!("Starting Sensor Multistage Pipeline with Closures (StageEngine)..."); + + // 1. Initialize StageEngine + // StageEngine starts as a passthrough for Reading + let engine = StageEngine::::with_capacity(1000); + + // 2. Add Aggregation Stage: Reading -> Summary + // Redesigned as a pipeline of closures + let mut summaries: HashMap = HashMap::new(); + let engine = engine.add_stage(pipe![ + move |r: Reading| { + let key = SensorKey { sensor_id: r.sensor_id, - timestamp: r.timestamp / 100_000, - }) - .reduce(|idx, r, s, _keep| { - if idx == 0 { - *s = Summary { + timestamp: (r.timestamp / 100_000) * 100_000, + }; + + let entry = summaries.entry(key); + let summary = match entry { + std::collections::hash_map::Entry::Vacant(e) => { + let s = Summary { sensor_id: r.sensor_id, min: r.value, max: r.value, avg: r.value, count: 1, - timestamp: (r.timestamp / 100_000) * 100_000, + timestamp: key.timestamp, }; - } else { + e.insert(s); + s + } + std::collections::hash_map::Entry::Occupied(mut e) => { + let s = e.get_mut(); s.min = s.min.min(r.value); s.max = s.max.max(r.value); s.avg = (s.avg * s.count as f64 + r.value) / (s.count + 1) as f64; s.count += 1; + *s } - }); - - // Update the index so summaries can be found by key - summary_index.compute(|s| SensorKey { - sensor_id: s.sensor_id, - timestamp: s.timestamp / 100_000, - }); - }); - - // 4. WORKER: Detect anomalies from summaries - engine.run_worker(move || { - summary_reader.next(); // Wait for data - - alert_pipeline - .from(&summary_reader) - .to(&mut alert_store) - .reduce(2, |window| { - let (prev, cur) = (window[0], window[1]); - + }; + Some(summary) + }, + |s: Summary| { + println!( + "AGGREGATOR: Sensor {} at {}: Avg={:.2}, Count={}", + s.sensor_id, s.timestamp, s.avg, s.count + ); + Some(s) + } + ]); + + // 3. Add Anomaly Detection Stage: Summary -> Alert + // Redesigned as a closure (which is also a pipeline of one) + let mut last_summaries: HashMap = HashMap::new(); + let mut engine = engine.add_stage(pipe![ + move |s: Summary| { + let prev = last_summaries.get(&s.sensor_id).copied(); + last_summaries.insert(s.sensor_id, s); + + if let Some(prev) = prev { // Alert if average value jumps by more than 50% - if cur.avg > prev.avg * 1.5 { - Some(Alert { - sensor_id: cur.sensor_id, - timestamp: cur.timestamp, + if s.avg > prev.avg * 1.5 { + return Some(Alert { + sensor_id: s.sensor_id, + timestamp: s.timestamp, severity: 1, ..Default::default() - }) - } else { - None + }); } - }); - }); + } + None + } + ]); - // 5. INGEST DATA - println!("Pushing sensor readings..."); + // 4. INGEST DATA + println!("\nPushing sensor readings..."); let readings = [ Reading::from(1, 10.0, 10_000), Reading::from(1, 12.0, 20_000), @@ -162,28 +141,27 @@ fn main() { ]; for r in readings { - reading_store.append(r); + engine.send(r); } // Give workers a moment to process - thread::sleep(Duration::from_millis(100)); - - // 6. DISPLAY RESULTS - println!("\nSummaries in Index:"); - for (_, summary) in summary_index_reader.iter() { - println!( - "Sensor {} at {}: Avg={:.2}, Count={}", - summary.sensor_id, summary.timestamp, summary.avg, summary.count - ); - } + engine.await_idle(Duration::from_millis(100)); + // 5. DISPLAY FINAL RESULTS println!("\nAlerts Detected:"); - while alert_reader_for_print.next() { - if let Some(alert) = alert_reader_for_print.get() { - println!( - "ALERT: Sensor {} anomaly at {}", - alert.sensor_id, alert.timestamp - ); + let total_alerts = engine.output_size(); + if total_alerts == 0 { + println!("No alerts detected."); + } else { + for _ in 0..total_alerts { + if let Some(alert) = engine.receive() { + println!( + "ALERT: Sensor {} anomaly at {}", + alert.sensor_id, alert.timestamp + ); + } } } + + println!("\nDone!"); } diff --git a/src/engine.rs b/src/engine.rs index 034bf8d..ac086c5 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -80,7 +80,7 @@ impl RodaEngine { let start = Instant::now(); let mut last_op_count = self.op_counter.total_op_count(); loop { - sleep(Duration::from_millis(100)); + sleep(Duration::from_millis(1)); let new_op_count = self.op_counter.total_op_count(); if new_op_count == last_op_count { break; diff --git a/src/lib.rs b/src/lib.rs index 3263638..ec909b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ pub mod slot_store; mod storage; pub mod window; pub mod stage; -pub mod new_engine; +pub mod stage_engine; pub use crate::aggregator::Aggregator; pub use crate::direct_index::{DirectIndex, DirectIndexReader}; @@ -17,4 +17,4 @@ pub use crate::engine::RodaEngine; pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; pub use crate::stage::{OutputCollector, Stage}; pub use crate::window::Window; -pub use crate::new_engine::NewEngine; +pub use crate::stage_engine::StageEngine; diff --git a/src/new_engine.rs b/src/new_engine.rs deleted file mode 100644 index 9169135..0000000 --- a/src/new_engine.rs +++ /dev/null @@ -1,119 +0,0 @@ -use std::sync::mpsc::{channel, Sender, Receiver}; -use std::thread; -use bytemuck::Pod; -use crate::stage::Stage; - -/// A threaded pipeline engine that grows by adding stages. -/// Each stage runs in its own thread. -pub struct NewEngine { - input_tx: Sender, - output_rx: Receiver, -} - -impl NewEngine { - /// Adds a new stage to the pipeline. - /// This method consumes the current engine and returns a new one with the updated output type. - /// A new thread is spawned to run the provided stage. - pub fn add_stage + Send + 'static>( - self, - mut stage: S, - ) -> NewEngine { - let (next_tx, next_rx) = channel(); - let current_rx = self.output_rx; - - thread::spawn(move || { - while let Ok(data) = current_rx.recv() { - stage.process(data, &mut |out: NextOut| { - let _ = next_tx.send(out); - }); - } - }); - - NewEngine { - input_tx: self.input_tx, - output_rx: next_rx, - } - } - - /// Sends data into the start of the pipeline. - pub fn send(&self, data: In) { - let _ = self.input_tx.send(data); - } - - /// Receives data from the end of the pipeline. - /// This will block until data is available or the pipeline is broken. - pub fn receive(&self) -> Option { - self.output_rx.recv().ok() - } -} - -impl NewEngine { - /// Creates a new engine with no stages. - /// Acts as a passthrough until stages are added. - pub fn new() -> Self { - let (tx, rx) = channel(); - Self { - input_tx: tx, - output_rx: rx, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::time::Duration; - - #[test] - fn test_new_engine_threaded_pipeline() { - let engine = NewEngine::::new() - .add_stage(|x: u32| Some(x as u64)) - .add_stage(|x: u64| Some(x as u8)); - - engine.send(100u32); - - let result = engine.receive(); - assert_eq!(result, Some(100u8)); - } - - #[test] - fn test_new_engine_multiple_outputs() { - struct Duplicate; - impl Stage for Duplicate { - fn process(&mut self, data: u32, collector: &mut C) - where - C: crate::stage::OutputCollector, - { - collector.push(data); - collector.push(data + 1); - } - } - - let engine = NewEngine::::new() - .add_stage(Duplicate) - .add_stage(|x: u32| Some(x as u64)); - - engine.send(10u32); - - assert_eq!(engine.receive(), Some(10u64)); - assert_eq!(engine.receive(), Some(11u64)); - } - - #[test] - fn test_engine_concurrency() { - let engine = NewEngine::::new() - .add_stage(|x: u32| { - // Simulate some work - thread::sleep(Duration::from_millis(10)); - Some(x * 2) - }); - - engine.send(1); - engine.send(2); - engine.send(3); - - assert_eq!(engine.receive(), Some(2)); - assert_eq!(engine.receive(), Some(4)); - assert_eq!(engine.receive(), Some(6)); - } -} diff --git a/src/stage_engine.rs b/src/stage_engine.rs new file mode 100644 index 0000000..0fbc2f9 --- /dev/null +++ b/src/stage_engine.rs @@ -0,0 +1,198 @@ +use crate::components::Appendable; +use crate::stage::Stage; +use crate::{JournalStore, JournalStoreOptions, RodaEngine, StoreJournalReader}; +use bytemuck::Pod; +use std::thread; +use std::time::Duration; + +/// A threaded pipeline engine that grows by adding stages. +/// Each stage runs in its own thread and communicates via JournalStore. +pub struct StageEngine { + engine: RodaEngine, + input_store: JournalStore, + output_reader: StoreJournalReader, + stage_count: usize, +} + +impl StageEngine { + /// Adds a new stage to the pipeline. + /// This method consumes the current engine and returns a new one with the updated output type. + /// A new thread is spawned to run the provided stage. + pub fn add_stage + Send + 'static>( + self, + stage: S, + ) -> StageEngine { + self.add_stage_with_capacity(1024, stage) + } + + /// Adds a new stage to the pipeline with a specific capacity for the output store. + pub fn add_stage_with_capacity< + NextOut: Pod + Send + 'static, + S: Stage + Send + 'static, + >( + mut self, + capacity: usize, + mut stage: S, + ) -> StageEngine { + let stage_idx = self.stage_count; + self.stage_count += 1; + + // Use a leaked string for the store name as JournalStoreOptions requires &'static str. + // In a production long-running system, we would use a more robust name management, + // but for a pipeline that lasts the lifetime of the process, this is acceptable. + let name = Box::leak(format!("stage_{}", stage_idx).into_boxed_str()); + + let mut next_store = self.engine.new_journal_store::(JournalStoreOptions { + name, + size: capacity, + in_memory: true, + }); + + let reader = self.output_reader; + let next_reader = next_store.reader(); + + self.engine.run_worker(move || { + // Process all available data + while reader.next() { + if let Some(data) = reader.get() { + stage.process(data, &mut |out: NextOut| { + next_store.append(out); + }); + } + } + // Yield to prevent 100% CPU usage when no data is available + std::thread::yield_now(); + }); + + StageEngine { + engine: self.engine, + input_store: self.input_store, + output_reader: next_reader, + stage_count: self.stage_count, + } + } + + /// Sends data into the start of the pipeline. + /// Requires &mut self because JournalStore::append requires it (Single-Writer). + pub fn send(&mut self, data: In) { + self.input_store.append(data); + } + + /// Receives data from the end of the pipeline. + /// This will block/poll until data is available. + pub fn receive(&self) -> Option { + loop { + if self.output_reader.next() { + if let Some(data) = self.output_reader.get() { + return Some(data); + } + } + thread::yield_now(); + } + } + + /// Returns the number of items in the output store. + pub fn output_size(&self) -> usize { + self.output_reader.size() + } + + pub fn enable_latency_stats(&mut self, enabled: bool) { + self.engine.enable_latency_stats(enabled); + } + + /// Waits for all workers to finish processing. + pub fn await_idle(&self, timeout: Duration) { + self.engine.await_idle(timeout); + } +} + +impl Appendable for StageEngine { + fn append(&mut self, state: In) { + self.send(state); + } +} + +impl StageEngine { + /// Creates a new engine with no stages. + /// Acts as a passthrough until stages are added. + pub fn new() -> Self { + Self::with_capacity(1024) + } + + /// Creates a new engine with a specific capacity for the input store. + pub fn with_capacity(capacity: usize) -> Self { + let engine = RodaEngine::new(); + let input_store = engine.new_journal_store(JournalStoreOptions { + name: "input", + size: capacity, + in_memory: true, + }); + let output_reader = input_store.reader(); + + Self { + engine, + input_store, + output_reader, + stage_count: 0, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn test_new_engine_threaded_pipeline() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| Some(x as u64)) + .add_stage(|x: u64| Some(x as u8)); + + engine.send(100u32); + + let result = engine.receive(); + assert_eq!(result, Some(100u8)); + } + + #[test] + fn test_new_engine_multiple_outputs() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u32, collector: &mut C) + where + C: crate::stage::OutputCollector, + { + collector.push(data); + collector.push(data + 1); + } + } + + let mut engine = StageEngine::::new() + .add_stage(Duplicate) + .add_stage(|x: u32| Some(x as u64)); + + engine.send(10u32); + + assert_eq!(engine.receive(), Some(10u64)); + assert_eq!(engine.receive(), Some(11u64)); + } + + #[test] + fn test_engine_concurrency() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| { + // Simulate some work + thread::sleep(Duration::from_millis(10)); + Some(x * 2) + }); + + engine.send(1); + engine.send(2); + engine.send(3); + + assert_eq!(engine.receive(), Some(2)); + assert_eq!(engine.receive(), Some(4)); + assert_eq!(engine.receive(), Some(6)); + } +} From 11d92f117fa8aba8bf85e3a4232ea6a7a98223d8 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 02:27:57 +0100 Subject: [PATCH 07/11] new multistage engine improvements add pipes add new example --- Cargo.toml | 4 + benches/sensor_bench.rs | 192 ++++++++++++++ .../databento_replay/aggregation_stage.rs | 6 +- examples/databento_replay/analysis_stage.rs | 25 +- examples/databento_replay/book_level_top.rs | 10 +- examples/databento_replay/main.rs | 10 +- examples/sensor_test/main.rs | 176 +++---------- examples/sensor_test/models.rs | 94 +++++++ examples/service_health/main.rs | 85 +++++++ examples/service_health/models.rs | 80 ++++++ src/lib.rs | 7 +- src/pipe/dedup_by.rs | 40 +++ src/pipe/delta.rs | 45 ++++ src/pipe/filter.rs | 22 ++ src/pipe/inspect.rs | 30 +++ src/pipe/map.rs | 21 ++ src/pipe/mod.rs | 15 ++ src/pipe/stateful.rs | 53 ++++ src/pipe/windowed.rs | 27 ++ src/slot_store.rs | 2 +- src/stage.rs | 10 +- src/stage_engine.rs | 41 +-- tests/stage_engine_tests.rs | 239 ++++++++++++++++++ 23 files changed, 1050 insertions(+), 184 deletions(-) create mode 100644 benches/sensor_bench.rs create mode 100644 examples/sensor_test/models.rs create mode 100644 examples/service_health/main.rs create mode 100644 examples/service_health/models.rs create mode 100644 src/pipe/dedup_by.rs create mode 100644 src/pipe/delta.rs create mode 100644 src/pipe/filter.rs create mode 100644 src/pipe/inspect.rs create mode 100644 src/pipe/map.rs create mode 100644 src/pipe/mod.rs create mode 100644 src/pipe/stateful.rs create mode 100644 src/pipe/windowed.rs create mode 100644 tests/stage_engine_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 9d6b5e9..9939727 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,10 @@ harness = false name = "comprehensive_bench" harness = false +[[bench]] +name = "sensor_bench" +harness = false + [profile.profiling] inherits = "release" debug = true diff --git a/benches/sensor_bench.rs b/benches/sensor_bench.rs new file mode 100644 index 0000000..8004c42 --- /dev/null +++ b/benches/sensor_bench.rs @@ -0,0 +1,192 @@ +use bytemuck::{Pod, Zeroable}; +use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use roda_state::StageEngine; +use roda_state::pipe; +use roda_state::pipe::{delta, stateful}; +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} + +impl SensorKey { + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +impl Summary { + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + #[inline(always)] + pub fn update(&mut self, r: Reading) { + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} + +fn bench_sensor_pipeline(c: &mut Criterion) { + let num_readings = 1_000_000; + let num_sensors = 1000; + + let mut readings = Vec::with_capacity(num_readings); + for i in 0..num_readings { + let sensor_id = (i % num_sensors) as u64; + let value = if i > 0 && i % 1000 == 0 { + 50.0 + } else { + 10.0 + (i as f64 * 0.0001) + }; + readings.push(Reading::from(sensor_id, value, i as u64 * 10_000)); + } + + let mut group = c.benchmark_group("sensor_pipeline"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(10)); + + group.bench_function("stage_engine", |b| { + b.iter_custom(|iters| { + let mut total_duration = Duration::ZERO; + for _ in 0..iters { + let engine = StageEngine::::with_capacity(num_readings + 1000); + let mut engine = engine + .add_stage_with_capacity( + num_readings + 1000, + pipe![stateful( + |r| SensorKey::from_reading(r), + |r| Summary::init(r), + |state, r| state.update(r) + )], + ) + .add_stage_with_capacity( + num_readings + 1000, + pipe![delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev { + if curr.avg > p.avg * 1.5 { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + } + None + } + )], + ); + + let start = Instant::now(); + for &r in &readings { + engine.send(r); + } + engine.await_idle(Duration::from_secs(5)); + total_duration += start.elapsed(); + + // Drain alerts + while let Some(alert) = engine.try_receive() { + black_box(alert); + } + } + total_duration + }); + }); + + group.bench_function("pure_rust", |b| { + b.iter(|| { + let mut summaries: HashMap = HashMap::new(); + let mut last_summaries: HashMap = HashMap::new(); + let mut alerts = Vec::new(); + + for &r in &readings { + let key = SensorKey::from_reading(&r); + let summary = summaries.entry(key).or_insert_with(|| Summary::init(&r)); + + summary.update(r); + let curr_summary = *summary; + + if let Some(prev) = last_summaries.get(&r.sensor_id) { + if curr_summary.avg > prev.avg * 1.5 { + alerts.push(Alert { + sensor_id: curr_summary.sensor_id, + timestamp: curr_summary.timestamp, + severity: 1, + ..Default::default() + }); + } + } + last_summaries.insert(r.sensor_id, curr_summary); + } + black_box(alerts); + }); + }); + + group.finish(); +} + +criterion_group!(benches, bench_sensor_pipeline); +criterion_main!(benches); diff --git a/examples/databento_replay/aggregation_stage.rs b/examples/databento_replay/aggregation_stage.rs index b646d2d..cb7aa9e 100644 --- a/examples/databento_replay/aggregation_stage.rs +++ b/examples/databento_replay/aggregation_stage.rs @@ -1,7 +1,7 @@ -use std::collections::HashMap; -use roda_state::stage::{Stage, OutputCollector}; -use crate::light_mbo_entry::LightMboEntry; use crate::book_level_entry::BookLevelEntry; +use crate::light_mbo_entry::LightMboEntry; +use roda_state::stage::{OutputCollector, Stage}; +use std::collections::HashMap; pub struct AggregationStage { book_volumes: HashMap<(u32, u8, i64), BookLevelEntry>, diff --git a/examples/databento_replay/analysis_stage.rs b/examples/databento_replay/analysis_stage.rs index 5fcbf3d..cd2157a 100644 --- a/examples/databento_replay/analysis_stage.rs +++ b/examples/databento_replay/analysis_stage.rs @@ -1,10 +1,10 @@ -use std::collections::HashMap; -use std::time::{Duration, Instant}; -use spdlog::prelude::*; -use roda_state::stage::{Stage, OutputCollector}; use crate::book_level_entry::BookLevelEntry; use crate::book_level_top::BookLevelTop; use crate::imbalance_signal::ImbalanceSignal; +use roda_state::stage::{OutputCollector, Stage}; +use spdlog::prelude::*; +use std::collections::HashMap; +use std::time::{Duration, Instant}; pub struct AnalysisStage { book_tops: HashMap, @@ -34,18 +34,22 @@ impl Stage for AnalysisStage { bt }); book_top.adjust(entry); - + let mut bid_vol = 0.0; let mut ask_vol = 0.0; for (i, level) in book_top.bids.iter().enumerate() { - if level.price == 0 { break; } + if level.price == 0 { + break; + } let weight = 1.0 - (i as f64 * 0.2); bid_vol += level.size as f64 * weight; } for (i, level) in book_top.asks.iter().enumerate() { - if level.price == 0 { break; } + if level.price == 0 { + break; + } let weight = 1.0 - (i as f64 * 0.2); ask_vol += level.size as f64 * weight; } @@ -53,7 +57,7 @@ impl Stage for AnalysisStage { let total_vol = bid_vol + ask_vol; if total_vol > 0.0 { let imbalance = (bid_vol - ask_vol) / total_vol; - + // Produce the signal collector.push(ImbalanceSignal { ts: entry.ts, @@ -76,6 +80,9 @@ impl Stage for AnalysisStage { impl Drop for AnalysisStage { fn drop(&mut self) { - info!("[System] Final Imbalance Signals processed: {}", self.counter); + info!( + "[System] Final Imbalance Signals processed: {}", + self.counter + ); } } diff --git a/examples/databento_replay/book_level_top.rs b/examples/databento_replay/book_level_top.rs index e6dd205..d7fcb2e 100644 --- a/examples/databento_replay/book_level_top.rs +++ b/examples/databento_replay/book_level_top.rs @@ -49,8 +49,14 @@ impl BookLevelTop { let is_ask = entry.side == b'A'; let pos = levels.iter().position(|l| { - if l.price == 0 { return true; } - if is_ask { entry.price < l.price } else { entry.price > l.price } + if l.price == 0 { + return true; + } + if is_ask { + entry.price < l.price + } else { + entry.price > l.price + } }); if let Some(i) = pos { diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs index c277f34..366d9d9 100644 --- a/examples/databento_replay/main.rs +++ b/examples/databento_replay/main.rs @@ -5,13 +5,13 @@ use std::time::Duration; use roda_state::StageEngine; +mod aggregation_stage; +mod analysis_stage; mod book_level_entry; -mod importer; -mod light_mbo_entry; mod book_level_top; mod imbalance_signal; -mod aggregation_stage; -mod analysis_stage; +mod importer; +mod light_mbo_entry; use crate::aggregation_stage::AggregationStage; use crate::analysis_stage::AnalysisStage; @@ -25,7 +25,7 @@ struct Args { fn main() -> Result<(), Box> { let args = Args::parse(); - + info!("[System] Booting Roda Data Bento Replay with StageEngine..."); // 1. Initialize StageEngine with enough capacity for the input diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs index 6f7b6b5..18668cc 100644 --- a/examples/sensor_test/main.rs +++ b/examples/sensor_test/main.rs @@ -1,167 +1,71 @@ +mod models; + +use crate::models::{Alert, Reading, SensorKey, Summary}; use bytemuck::{Pod, Zeroable}; use roda_state::StageEngine; use roda_state::pipe; -use std::collections::HashMap; +use roda_state::pipe::{delta, inspect, stateful}; use std::time::Duration; -/// Raw sensor reading -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Reading { - pub sensor_id: u64, - pub value: f64, - pub timestamp: u64, -} - -impl Reading { - pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { - Self { - sensor_id, - value, - timestamp, - } - } -} - -/// Statistical summary of readings for a time window -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Summary { - pub sensor_id: u64, - pub min: f64, - pub max: f64, - pub avg: f64, - pub count: u64, - pub timestamp: u64, -} - -/// Key used for partitioning and indexing summaries -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct SensorKey { - pub sensor_id: u64, - pub timestamp: u64, -} - -/// Alert generated when an anomaly is detected -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Alert { - pub sensor_id: u64, - pub timestamp: u64, - pub severity: i32, - pub _pad0: i32, -} - fn main() { - println!("Starting Sensor Multistage Pipeline with Closures (StageEngine)..."); + println!("Starting Sensor Multistage Pipeline (Optimized)..."); // 1. Initialize StageEngine - // StageEngine starts as a passthrough for Reading let engine = StageEngine::::with_capacity(1000); // 2. Add Aggregation Stage: Reading -> Summary - // Redesigned as a pipeline of closures - let mut summaries: HashMap = HashMap::new(); - let engine = engine.add_stage(pipe![ - move |r: Reading| { - let key = SensorKey { - sensor_id: r.sensor_id, - timestamp: (r.timestamp / 100_000) * 100_000, - }; - - let entry = summaries.entry(key); - let summary = match entry { - std::collections::hash_map::Entry::Vacant(e) => { - let s = Summary { - sensor_id: r.sensor_id, - min: r.value, - max: r.value, - avg: r.value, - count: 1, - timestamp: key.timestamp, - }; - e.insert(s); - s + let mut engine = engine + .add_stage(pipe![ + // Use stateful helper to handle the HashMap and windowing logic + stateful( + |r| SensorKey::from_reading(r), + |r| Summary::init(r), + |state, r| state.update(r) + ) + ]) + .add_stage(pipe![ + // Use delta to compare current summary to previous summary for the same sensor + delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev { + if curr.avg > p.avg * 1.5 { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + } + None } - std::collections::hash_map::Entry::Occupied(mut e) => { - let s = e.get_mut(); - s.min = s.min.min(r.value); - s.max = s.max.max(r.value); - s.avg = (s.avg * s.count as f64 + r.value) / (s.count + 1) as f64; - s.count += 1; - *s - } - }; - Some(summary) - }, - |s: Summary| { - println!( - "AGGREGATOR: Sensor {} at {}: Avg={:.2}, Count={}", - s.sensor_id, s.timestamp, s.avg, s.count - ); - Some(s) - } - ]); - - // 3. Add Anomaly Detection Stage: Summary -> Alert - // Redesigned as a closure (which is also a pipeline of one) - let mut last_summaries: HashMap = HashMap::new(); - let mut engine = engine.add_stage(pipe![ - move |s: Summary| { - let prev = last_summaries.get(&s.sensor_id).copied(); - last_summaries.insert(s.sensor_id, s); - - if let Some(prev) = prev { - // Alert if average value jumps by more than 50% - if s.avg > prev.avg * 1.5 { - return Some(Alert { - sensor_id: s.sensor_id, - timestamp: s.timestamp, - severity: 1, - ..Default::default() - }); - } - } - None - } - ]); + ) + ]); // 4. INGEST DATA println!("\nPushing sensor readings..."); let readings = [ Reading::from(1, 10.0, 10_000), Reading::from(1, 12.0, 20_000), - Reading::from(1, 12.0, 30_000), - Reading::from(1, 12.0, 40_000), - Reading::from(1, 20.0, 110_000), // Average jump here + Reading::from(1, 20.0, 110_000), // Average jump Reading::from(1, 22.0, 120_000), - Reading::from(1, 22.0, 220_000), - Reading::from(1, 22.0, 320_000), ]; for r in readings { engine.send(r); } - // Give workers a moment to process engine.await_idle(Duration::from_millis(100)); - // 5. DISPLAY FINAL RESULTS + // 5. DISPLAY RESULTS println!("\nAlerts Detected:"); - let total_alerts = engine.output_size(); - if total_alerts == 0 { - println!("No alerts detected."); - } else { - for _ in 0..total_alerts { - if let Some(alert) = engine.receive() { - println!( - "ALERT: Sensor {} anomaly at {}", - alert.sensor_id, alert.timestamp - ); - } - } + while let Some(alert) = engine.receive() { + println!( + "ALERT: Sensor {} anomaly at {}", + alert.sensor_id, alert.timestamp + ); } - + println!("\nDone!"); } diff --git a/examples/sensor_test/models.rs b/examples/sensor_test/models.rs new file mode 100644 index 0000000..d8f7b65 --- /dev/null +++ b/examples/sensor_test/models.rs @@ -0,0 +1,94 @@ +use bytemuck::{Pod, Zeroable}; + +/// Raw sensor reading +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +/// Statistical summary of readings for a time window +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +/// Key used for partitioning and indexing summaries +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +/// Alert generated when an anomaly is detected +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} + +impl SensorKey { + /// Helper to create a key aligned to a 100ms (100,000 unit) window. + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + // Aligning timestamp to the floor of the window + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +impl Summary { + /// Initialize a new summary bucket from the first reading encountered. + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + /// Update the existing summary with a new reading. + #[inline(always)] + pub fn update(&mut self, r: Reading) { + // Update Min/Max + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } + + // Online Average Calculation: + // new_avg = ((old_avg * count) + new_val) / (count + 1) + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} diff --git a/examples/service_health/main.rs b/examples/service_health/main.rs new file mode 100644 index 0000000..65d1cab --- /dev/null +++ b/examples/service_health/main.rs @@ -0,0 +1,85 @@ +mod models; + +use models::{Alert, Reading, SensorKey, Summary}; +use roda_state::StageEngine; +use roda_state::pipe; +use roda_state::pipe::{dedup_by, delta, inspect, stateful}; +use std::time::Duration; + +fn main() { + println!("--- Starting StageEngine: Service Health Pipeline ---"); + + // 1. Initialize StageEngine (Initial entry type is Reading) + let engine = StageEngine::::with_capacity(1000); + + // 2. Add Aggregation Stage: Reading -> Summary + // We also include a deduplicator at the start to drop identical raw readings. + let engine = engine.add_stage(pipe![ + dedup_by(|r: &Reading| (r.sensor_id, (r.value * 1000.0) as u64)), // Noise filter + stateful(SensorKey::from_reading, Summary::init, Summary::update), + inspect(|s: &Summary| { + println!( + "STAGE 1 [AGG]: Sensor {} Avg updated to {:.2}", + s.sensor_id, s.avg + ); + }) + ]); + + // 3. Add Anomaly Detection Stage: Summary -> Alert + // Uses Delta to compare current state with previous known state for that sensor. + let mut engine = engine.add_stage(pipe![ + delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev { + // Logic: Alert if the average jumps by more than 50% + if curr.avg > p.avg * 1.5 { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + } + None + } + ), + // Deduplicate Alerts: Only notify if the alert is new/changed for this sensor + dedup_by(|a: &Alert| a.sensor_id), + inspect(|a: &Alert| { + println!( + "STAGE 2 [ALERT]: 🚨 Anomaly detected for Sensor {}!", + a.sensor_id + ); + }) + ]); + + // 4. Ingest Data + println!("\nIngesting readings..."); + let readings = [ + Reading::from(1, 10.0, 10_000), // Baseline + Reading::from(1, 10.0, 20_000), // Duplicate (filtered by dedup) + Reading::from(1, 11.0, 30_000), // Small change + Reading::from(1, 25.0, 110_000), // Spike -> Triggers Alert + Reading::from(2, 5.0, 10_000), // New Sensor + ]; + + for r in readings { + engine.send(r); + } + + // Give workers time to finish processing + engine.await_idle(Duration::from_millis(100)); + + // 5. Display Results from the end of the pipeline + println!("\n--- Final Alert Journal ---"); + while let Some(alert) = engine.try_receive() { + println!( + "Received in Main: Alert for Sensor {} at {}", + alert.sensor_id, alert.timestamp + ); + } + + println!("\nDone."); +} diff --git a/examples/service_health/models.rs b/examples/service_health/models.rs new file mode 100644 index 0000000..f5e75f1 --- /dev/null +++ b/examples/service_health/models.rs @@ -0,0 +1,80 @@ +use bytemuck::{Pod, Zeroable}; + +/// Raw sensor reading +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { sensor_id, value, timestamp } + } +} + +/// Key used for partitioning and indexing summaries (100ms buckets) +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +impl SensorKey { + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + // Aligns to 100,000 unit (100ms) windows + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +/// Statistical summary of readings for a time window +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +impl Summary { + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + #[inline(always)] + pub fn update(&mut self, r: Reading) { + if r.value < self.min { self.min = r.value; } + if r.value > self.max { self.max = r.value; } + // Online average calculation + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} + +/// Alert generated when an anomaly is detected +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index ec909b4..6a7d588 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,16 +5,17 @@ pub mod engine; pub mod journal_store; pub mod measure; mod op_counter; +pub mod pipe; pub mod slot_store; -mod storage; -pub mod window; pub mod stage; pub mod stage_engine; +mod storage; +pub mod window; pub use crate::aggregator::Aggregator; pub use crate::direct_index::{DirectIndex, DirectIndexReader}; pub use crate::engine::RodaEngine; pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; pub use crate::stage::{OutputCollector, Stage}; -pub use crate::window::Window; pub use crate::stage_engine::StageEngine; +pub use crate::window::Window; diff --git a/src/pipe/dedup_by.rs b/src/pipe/dedup_by.rs new file mode 100644 index 0000000..e84a889 --- /dev/null +++ b/src/pipe/dedup_by.rs @@ -0,0 +1,40 @@ +use std::collections::HashMap; + +/// Only emits the event if the value associated with the key has changed. +pub fn dedup_by(mut key_fn: impl FnMut(&T) -> K) -> impl FnMut(T) -> Option +where + K: std::hash::Hash + Eq, + T: bytemuck::Pod + Send + Copy + PartialEq, +{ + let mut last_values: HashMap = HashMap::new(); + move |curr| { + let key = key_fn(&curr); + let prev = last_values.get(&key); + + if let Some(p) = prev { + if *p == curr { + // Value hasn't changed; suppress the event + return None; + } + } + + // Value changed or is new; update cache and emit + last_values.insert(key, curr); + Some(curr) + } +} + +#[cfg(test)] +mod dedup_tests { + use super::*; + + #[test] + fn test_dedup_logic() { + let mut pipe = dedup_by(|_: &i32| 0); // Use a constant key for global consecutive dedup + + assert_eq!(pipe(10), Some(10)); // First time: pass + assert_eq!(pipe(10), None); // Same value: drop + assert_eq!(pipe(20), Some(20)); // New value: pass + assert_eq!(pipe(10), Some(10)); // Changed back: pass + } +} diff --git a/src/pipe/delta.rs b/src/pipe/delta.rs new file mode 100644 index 0000000..2c22246 --- /dev/null +++ b/src/pipe/delta.rs @@ -0,0 +1,45 @@ +use std::collections::HashMap; + +/// Compares current item with the previous item of the same key. +pub fn delta( + mut key_fn: impl FnMut(&T) -> K, + mut logic: impl FnMut(T, Option) -> Option, +) -> impl FnMut(T) -> Option +where + K: std::hash::Hash + Eq, + T: bytemuck::Pod + Send + Copy, + Out: bytemuck::Pod + Send, +{ + let mut last_values: HashMap = HashMap::new(); + move |curr| { + let key = key_fn(&curr); + let prev = last_values.get(&key).copied(); + last_values.insert(key, curr); + logic(curr, prev) + } +} + +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable, Debug, PartialEq)] +struct Metric { + pub id: u64, + pub val: f64, +} + +#[test] +fn test_delta_logic() { + // Return u8 (1 for alert, 0 for none) to satisfy Pod + let mut pipe = delta( + |m: &Metric| m.id, + |curr, prev| match prev { + Some(p) if curr.val >= p.val + 5.0 => Some(1u8), + _ => Some(0u8), + }, + ); + + let m1 = Metric { id: 1, val: 10.0 }; + let m2 = Metric { id: 1, val: 17.0 }; + + assert_eq!(pipe(m1), Some(0u8)); + assert_eq!(pipe(m2), Some(1u8)); // Alert triggered +} diff --git a/src/pipe/filter.rs b/src/pipe/filter.rs new file mode 100644 index 0000000..e8087da --- /dev/null +++ b/src/pipe/filter.rs @@ -0,0 +1,22 @@ +/// Only passes items that satisfy the predicate. +pub fn filter(mut predicate: impl FnMut(&T) -> bool) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + move |item| { + if predicate(&item) { Some(item) } else { None } + } +} + +#[cfg(test)] +mod filter_tests { + use super::*; + + #[test] + fn test_filter_logic() { + let mut pipe = filter(|x: &i32| *x > 0); + + assert_eq!(pipe(10), Some(10)); + assert_eq!(pipe(-5), None); + } +} diff --git a/src/pipe/inspect.rs b/src/pipe/inspect.rs new file mode 100644 index 0000000..8850612 --- /dev/null +++ b/src/pipe/inspect.rs @@ -0,0 +1,30 @@ +/// Passes the item through while performing a side effect. +pub fn inspect(mut f: impl FnMut(&T)) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + move |item| { + f(&item); + Some(item) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[test] + fn test_inspect_logic() { + let count = Arc::new(AtomicUsize::new(0)); + let mut pipe = inspect(|_x: &u32| { + count.fetch_add(1, Ordering::Relaxed); + }); + + let res = pipe(42); + + assert_eq!(res, Some(42)); + assert_eq!(count.load(Ordering::Relaxed), 1); + } +} diff --git a/src/pipe/map.rs b/src/pipe/map.rs new file mode 100644 index 0000000..0fa5799 --- /dev/null +++ b/src/pipe/map.rs @@ -0,0 +1,21 @@ +/// Transforms an item from one type to another. +pub fn map(mut f: impl FnMut(In) -> Out) -> impl FnMut(In) -> Option +where + In: bytemuck::Pod + Send, + Out: bytemuck::Pod + Send, +{ + move |item| Some(f(item)) +} + +#[cfg(test)] +mod map_tests { + use super::*; + + #[test] + fn test_map_logic() { + // Transform u32 to u64 + let mut pipe = map(|x: u32| x as u64 * 2); + + assert_eq!(pipe(21), Some(42u64)); + } +} diff --git a/src/pipe/mod.rs b/src/pipe/mod.rs new file mode 100644 index 0000000..2d3e4c0 --- /dev/null +++ b/src/pipe/mod.rs @@ -0,0 +1,15 @@ +mod dedup_by; +mod delta; +mod filter; +mod inspect; +mod map; +mod stateful; +mod windowed; + +pub use dedup_by::dedup_by; +pub use delta::delta; +pub use filter::filter; +pub use inspect::inspect; +pub use map::map; +pub use stateful::stateful; +pub use windowed::windowed; diff --git a/src/pipe/stateful.rs b/src/pipe/stateful.rs new file mode 100644 index 0000000..a411dc4 --- /dev/null +++ b/src/pipe/stateful.rs @@ -0,0 +1,53 @@ +use std::collections::HashMap; + +/// Manages a per-key state for aggregations. +pub fn stateful( + mut key_fn: impl FnMut(&In) -> K, + mut init_fn: impl FnMut(&In) -> Out, + mut fold_fn: impl FnMut(&mut Out, In), +) -> impl FnMut(In) -> Option +where + K: std::hash::Hash + Eq, + In: bytemuck::Pod + Send, + Out: bytemuck::Pod + Send + Copy, +{ + let mut storage: HashMap = HashMap::new(); + move |item| { + let key = key_fn(&item); + let entry = storage + .entry(key) + .and_modify(|state| fold_fn(state, item)) + .or_insert_with(|| init_fn(&item)); + Some(*entry) + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, bytemuck::Pod, bytemuck::Zeroable)] +pub struct Message { + pub id: u64, + pub value: i64, +} + +#[cfg(test)] +mod stateful_tests { + use super::*; + + #[test] + fn test_stateful_logic() { + // Now using our Pod-compliant struct instead of a tuple + let mut pipe = stateful( + |item: &Message| item.id, // Key: ID + |item| item.value, // Init: First value + |state, item| *state += item.value, // Fold: Add new value + ); + + let m1 = Message { id: 1, value: 10 }; + let m2 = Message { id: 2, value: 5 }; + let m3 = Message { id: 1, value: 20 }; + + assert_eq!(pipe(m1), Some(10)); + assert_eq!(pipe(m2), Some(5)); + assert_eq!(pipe(m3), Some(30)); + } +} diff --git a/src/pipe/windowed.rs b/src/pipe/windowed.rs new file mode 100644 index 0000000..f41efeb --- /dev/null +++ b/src/pipe/windowed.rs @@ -0,0 +1,27 @@ +/// Aligns a timestamp to the start of a fixed-duration window. +#[inline(always)] +pub fn windowed(timestamp: u64, window_size: u64) -> u64 { + if window_size == 0 { + return timestamp; + } + (timestamp / window_size) * window_size +} + +#[cfg(test)] +mod window_tests { + use super::*; + + #[test] + fn test_window_alignment() { + let t1 = 150_200; + let t2 = 199_999; + let window = 100_000; + + // Both should fall into the 100,000 bucket + assert_eq!(windowed(t1, window), 100_000); + assert_eq!(windowed(t2, window), 100_000); + + // Next bucket + assert_eq!(windowed(200_001, window), 200_000); + } +} diff --git a/src/slot_store.rs b/src/slot_store.rs index 5622008..dc628ae 100644 --- a/src/slot_store.rs +++ b/src/slot_store.rs @@ -4,8 +4,8 @@ use crate::storage::slot_mmap::SlotMmap; // Using the new SlotMmap logic use bytemuck::Pod; use std::path::PathBuf; -use std::sync::atomic::AtomicU64; use std::sync::Arc; +use std::sync::atomic::AtomicU64; pub struct SlotStore { storage: SlotMmap, diff --git a/src/stage.rs b/src/stage.rs index 59a9adf..bf42dee 100644 --- a/src/stage.rs +++ b/src/stage.rs @@ -67,10 +67,7 @@ where pub trait StageExt: Stage { #[inline(always)] - fn pipe>( - self, - s2: S2, - ) -> Pipeline + fn pipe>(self, s2: S2) -> Pipeline where Self: Sized, { @@ -107,10 +104,7 @@ mod tests { #[test] fn test_pipe_closures() { - let mut p = pipe![ - |x: u32| Some(x as u64), - |x: u64| Some(x as u8), - ]; + let mut p = pipe![|x: u32| Some(x as u64), |x: u64| Some(x as u8),]; let mut out = Vec::new(); p.process(100u32, &mut |x: u8| out.push(x)); diff --git a/src/stage_engine.rs b/src/stage_engine.rs index 0fbc2f9..c3e3d81 100644 --- a/src/stage_engine.rs +++ b/src/stage_engine.rs @@ -42,18 +42,20 @@ impl StageEngine { // but for a pipeline that lasts the lifetime of the process, this is acceptable. let name = Box::leak(format!("stage_{}", stage_idx).into_boxed_str()); - let mut next_store = self.engine.new_journal_store::(JournalStoreOptions { - name, - size: capacity, - in_memory: true, - }); + let mut next_store = self + .engine + .new_journal_store::(JournalStoreOptions { + name, + size: capacity, + in_memory: true, + }); let reader = self.output_reader; let next_reader = next_store.reader(); self.engine.run_worker(move || { // Process all available data - while reader.next() { + if reader.next() { if let Some(data) = reader.get() { stage.process(data, &mut |out: NextOut| { next_store.append(out); @@ -82,15 +84,21 @@ impl StageEngine { /// This will block/poll until data is available. pub fn receive(&self) -> Option { loop { - if self.output_reader.next() { - if let Some(data) = self.output_reader.get() { - return Some(data); - } + if let Some(data) = self.try_receive() { + return Some(data); } thread::yield_now(); } } + /// Tries to receive data from the end of the pipeline without blocking. + pub fn try_receive(&self) -> Option { + if self.output_reader.next() { + return self.output_reader.get(); + } + None + } + /// Returns the number of items in the output store. pub fn output_size(&self) -> usize { self.output_reader.size() @@ -150,7 +158,7 @@ mod tests { .add_stage(|x: u64| Some(x as u8)); engine.send(100u32); - + let result = engine.receive(); assert_eq!(result, Some(100u8)); } @@ -180,12 +188,11 @@ mod tests { #[test] fn test_engine_concurrency() { - let mut engine = StageEngine::::new() - .add_stage(|x: u32| { - // Simulate some work - thread::sleep(Duration::from_millis(10)); - Some(x * 2) - }); + let mut engine = StageEngine::::new().add_stage(|x: u32| { + // Simulate some work + thread::sleep(Duration::from_millis(10)); + Some(x * 2) + }); engine.send(1); engine.send(2); diff --git a/tests/stage_engine_tests.rs b/tests/stage_engine_tests.rs new file mode 100644 index 0000000..723b570 --- /dev/null +++ b/tests/stage_engine_tests.rs @@ -0,0 +1,239 @@ +use roda_state::{OutputCollector, Stage, StageEngine, pipe}; +use std::thread; +use std::time::Duration; + +#[test] +fn test_basic_pipeline() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| Some(x + 1)) + .add_stage(|x: u32| Some(x * 2)); + + engine.send(10); + engine.send(20); + + assert_eq!(engine.receive(), Some(22)); // (10 + 1) * 2 + assert_eq!(engine.receive(), Some(42)); // (20 + 1) * 2 +} + +#[test] +fn test_none_filtering() { + let mut engine = + StageEngine::::new().add_stage(|x: u32| if x % 2 == 0 { Some(x) } else { None }); + + engine.send(1); + engine.send(2); + engine.send(3); + engine.send(4); + + assert_eq!(engine.receive(), Some(2)); + assert_eq!(engine.receive(), Some(4)); +} + +#[test] +fn test_multiple_outputs() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u32, collector: &mut C) + where + C: OutputCollector, + { + collector.push(data); + collector.push(data); + } + } + + let mut engine = StageEngine::::new().add_stage(Duplicate); + + engine.send(5); + assert_eq!(engine.receive(), Some(5)); + assert_eq!(engine.receive(), Some(5)); +} + +#[test] +fn test_load_moderate() { + let count = 1000; + let mut engine = + StageEngine::::with_capacity(count + 1).add_stage(|x: u32| Some(x + 1)); + + for i in 0..count { + engine.send(i as u32); + } + + for i in 0..count { + assert_eq!(engine.receive(), Some(i as u32 + 1)); + } +} + +#[test] +fn test_concurrency_stress() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| { + // Some artificial delay to force concurrency + thread::sleep(Duration::from_millis(1)); + Some(x) + }) + .add_stage(|x: u32| { + thread::sleep(Duration::from_millis(1)); + Some(x) + }); + + let count = 100; + for i in 0..count { + engine.send(i); + } + + for i in 0..count { + assert_eq!(engine.receive(), Some(i)); + } +} + +#[test] +fn test_complex_pipe_macro() { + let mut engine = StageEngine::::new().add_stage(pipe![ + |x: u32| Some(x as u64), + |x: u64| Some(x * 10), + |x: u64| Some(x + 5), + ]); + + engine.send(1); + assert_eq!(engine.receive(), Some(15)); +} + +#[test] +fn test_empty_pipeline() { + let mut engine = StageEngine::::new(); + engine.send(42); + assert_eq!(engine.receive(), Some(42)); +} + +#[test] +fn test_await_idle() { + let mut engine = StageEngine::::new().add_stage(|x: u32| { + // Very short sleep to test await_idle without being too slow + thread::sleep(Duration::from_millis(1)); + Some(x) + }); + + engine.send(1); + // Give it a tiny bit of time to start + thread::sleep(Duration::from_millis(5)); + engine.await_idle(Duration::from_millis(200)); + assert_eq!(engine.output_size(), 1); + assert_eq!(engine.receive(), Some(1)); +} + +#[test] +fn test_large_pod_struct() { + #[repr(C)] + #[derive(Debug, Clone, Copy, bytemuck::Pod, bytemuck::Zeroable, PartialEq)] + struct Large { + data: [f64; 16], + id: u64, + } + + let mut engine = StageEngine::::new().add_stage(|mut l: Large| { + l.id += 1; + Some(l) + }); + + let input = Large { + data: [1.0; 16], + id: 100, + }; + engine.send(input); + + let expected = Large { + data: [1.0; 16], + id: 101, + }; + assert_eq!(engine.receive(), Some(expected)); +} + +#[test] +fn test_nested_pipes() { + let mut engine = StageEngine::::new().add_stage(pipe![ + |x: u32| Some(x + 1), + pipe![|x: u32| Some(x * 2), |x: u32| Some(x + 1),] + ]); + + engine.send(10); + // (10 + 1) * 2 + 1 = 23 + assert_eq!(engine.receive(), Some(23)); +} + +#[test] +fn test_multi_stage_load() { + let stages = 5; + let items = 100; + + let mut engine = StageEngine::::new(); + for _ in 0..stages { + engine = engine.add_stage(|x: u32| Some(x + 1)); + } + + for i in 0..items { + engine.send(i); + } + + for i in 0..items { + assert_eq!(engine.receive(), Some(i + stages as u32)); + } +} + +#[test] +#[should_panic(expected = "Store is full")] +fn test_input_capacity_limit_panic() { + let mut engine = StageEngine::::with_capacity(1); + engine.send(1); + engine.send(2); // Should panic here +} + +#[test] +fn test_stage_producing_none() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| if x > 10 { Some(x) } else { None }) + .add_stage(|x: u32| Some(x * 2)); + + engine.send(5); + engine.send(15); + + engine.await_idle(Duration::from_millis(100)); + assert_eq!(engine.output_size(), 1); + assert_eq!(engine.receive(), Some(30)); +} + +#[test] +fn test_worker_panic_on_drop() { + // This test ensures that if a worker panics, the engine will panic on drop. + let result = std::panic::catch_unwind(|| { + let mut engine = StageEngine::::new().add_stage(|_| { + panic!("Stage panic"); + #[allow(unreachable_code)] + Some(0u32) + }); + engine.send(1); + // Wait for worker to panic + thread::sleep(Duration::from_millis(50)); + // engine is dropped here + }); + assert!(result.is_err()); +} + +#[test] +fn test_long_pipeline_heavy_load() { + let stages = 10; + let items = 5000; + + let mut engine = StageEngine::::with_capacity(items + 1); + for _ in 0..stages { + engine = engine.add_stage(|x: u32| Some(x + 1)); + } + + for i in 0..items { + engine.send(i as u32); + } + + for i in 0..items { + assert_eq!(engine.receive(), Some(i as u32 + stages as u32)); + } +} From 8ab19d89f84f8e778ebf966f4ad8ac516f637555 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 03:06:00 +0100 Subject: [PATCH 08/11] fix check issues --- benches/comprehensive_bench.rs | 5 ++- benches/sensor_bench.rs | 43 ++++++++++--------- .../databento_replay/aggregation_stage.rs | 9 +--- examples/databento_replay/analysis_stage.rs | 12 +++--- examples/databento_replay/light_mbo_entry.rs | 16 +++---- examples/sensor_test/main.rs | 21 +++++---- examples/service_health/main.rs | 18 ++++---- examples/service_health/models.rs | 16 +++++-- src/measure/latency_measurer.rs | 26 ++--------- src/pipe/dedup_by.rs | 8 ++-- src/slot_store.rs | 3 -- src/stage_engine.rs | 18 +++++--- src/storage/journal_mmap.rs | 5 ++- src/storage/slot_mmap.rs | 6 +-- tests/aggregator_tests.rs | 1 - tests/index_tests.rs | 18 ++++---- tests/journal_tests.rs | 1 - tests/logic_tests.rs | 1 - tests/push_read_tests.rs | 1 - tests/stage_engine_tests.rs | 1 + tests/window_tests.rs | 1 - 21 files changed, 104 insertions(+), 126 deletions(-) diff --git a/benches/comprehensive_bench.rs b/benches/comprehensive_bench.rs index 56e4b6d..064cdd4 100644 --- a/benches/comprehensive_bench.rs +++ b/benches/comprehensive_bench.rs @@ -1,7 +1,8 @@ use bytemuck::{Pod, Zeroable}; -use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use criterion::{Criterion, criterion_group, criterion_main}; use roda_state::measure::LatencyMeasurer; use roda_state::{Aggregator, JournalStoreOptions, RodaEngine, Window}; +use std::hint::black_box; #[derive(Clone, Copy, Zeroable, Pod, Default)] #[repr(C)] @@ -67,7 +68,7 @@ fn bench_index(c: &mut Criterion) { let mut i = 0u32; b.iter(|| { let _latency_guard = measurer.measure_with_guard(); - black_box(index_reader.get(&(i % 10000))); + black_box(index_reader.get(&(i % 10_000))); i += 1; }); }); diff --git a/benches/sensor_bench.rs b/benches/sensor_bench.rs index 8004c42..df9dccd 100644 --- a/benches/sensor_bench.rs +++ b/benches/sensor_bench.rs @@ -1,9 +1,10 @@ use bytemuck::{Pod, Zeroable}; -use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use criterion::{Criterion, criterion_group, criterion_main}; use roda_state::StageEngine; use roda_state::pipe; use roda_state::pipe::{delta, stateful}; use std::collections::HashMap; +use std::hint::black_box; use std::time::{Duration, Instant}; #[repr(C)] @@ -115,8 +116,8 @@ fn bench_sensor_pipeline(c: &mut Criterion) { .add_stage_with_capacity( num_readings + 1000, pipe![stateful( - |r| SensorKey::from_reading(r), - |r| Summary::init(r), + SensorKey::from_reading, + Summary::init, |state, r| state.update(r) )], ) @@ -125,15 +126,15 @@ fn bench_sensor_pipeline(c: &mut Criterion) { pipe![delta( |s: &Summary| s.sensor_id, |curr, prev| { - if let Some(p) = prev { - if curr.avg > p.avg * 1.5 { - return Some(Alert { - sensor_id: curr.sensor_id, - timestamp: curr.timestamp, - severity: 1, - ..Default::default() - }); - } + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); } None } @@ -169,15 +170,15 @@ fn bench_sensor_pipeline(c: &mut Criterion) { summary.update(r); let curr_summary = *summary; - if let Some(prev) = last_summaries.get(&r.sensor_id) { - if curr_summary.avg > prev.avg * 1.5 { - alerts.push(Alert { - sensor_id: curr_summary.sensor_id, - timestamp: curr_summary.timestamp, - severity: 1, - ..Default::default() - }); - } + if let Some(prev) = last_summaries.get(&r.sensor_id) + && curr_summary.avg > prev.avg * 1.5 + { + alerts.push(Alert { + sensor_id: curr_summary.sensor_id, + timestamp: curr_summary.timestamp, + severity: 1, + ..Default::default() + }); } last_summaries.insert(r.sensor_id, curr_summary); } diff --git a/examples/databento_replay/aggregation_stage.rs b/examples/databento_replay/aggregation_stage.rs index cb7aa9e..3ab7f62 100644 --- a/examples/databento_replay/aggregation_stage.rs +++ b/examples/databento_replay/aggregation_stage.rs @@ -3,18 +3,11 @@ use crate::light_mbo_entry::LightMboEntry; use roda_state::stage::{OutputCollector, Stage}; use std::collections::HashMap; +#[derive(Default)] pub struct AggregationStage { book_volumes: HashMap<(u32, u8, i64), BookLevelEntry>, } -impl Default for AggregationStage { - fn default() -> Self { - Self { - book_volumes: HashMap::new(), - } - } -} - impl Stage for AggregationStage { fn process(&mut self, entry: LightMboEntry, collector: &mut C) where diff --git a/examples/databento_replay/analysis_stage.rs b/examples/databento_replay/analysis_stage.rs index cd2157a..3189124 100644 --- a/examples/databento_replay/analysis_stage.rs +++ b/examples/databento_replay/analysis_stage.rs @@ -28,11 +28,13 @@ impl Stage for AnalysisStage { C: OutputCollector, { self.counter += 1; - let book_top = self.book_tops.entry(entry.symbol).or_insert_with(|| { - let mut bt = BookLevelTop::default(); - bt.symbol = entry.symbol; - bt - }); + let book_top = self + .book_tops + .entry(entry.symbol) + .or_insert_with(|| BookLevelTop { + symbol: entry.symbol, + ..Default::default() + }); book_top.adjust(entry); let mut bid_vol = 0.0; diff --git a/examples/databento_replay/light_mbo_entry.rs b/examples/databento_replay/light_mbo_entry.rs index 6b26ac2..ab90fa6 100644 --- a/examples/databento_replay/light_mbo_entry.rs +++ b/examples/databento_replay/light_mbo_entry.rs @@ -5,15 +5,15 @@ use dbn::record::MboMsg; #[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] pub struct LightMboEntry { /// 1. The Event Timestamp (UNIX nanos). - /// Essential for detecting "Flash Crash" speed or latency. + /// Essential for detecting "Flash Crash" speed or latency. pub ts: u64, /// 2. The Unique Order ID. - /// Critical for linking a 'Cancel' message back to the original 'Add'. + /// Critical for linking a 'Cancel' message back to the original 'Add'. pub order_id: u64, /// 3. The Price. - /// Signed integer (fixed precision, usually 1e-9). + /// Signed integer (fixed precision, usually 1e-9). pub price: i64, /// 4. The Size (Quantity). @@ -21,21 +21,21 @@ pub struct LightMboEntry { // --- PACKING SECTION (32-Bit Alignment) --- /// 5. The Instrument ID (from Header). - /// Needed if your store contains multiple symbols (e.g., MSFT and AAPL). + /// Needed if your store contains multiple symbols (e.g., MSFT and AAPL). pub instrument_id: u32, // --- PACKING SECTION (8-Bit Alignment) --- /// 6. Action (Add='A', Cancel='C', Modify='M', etc.) - /// We store as u8 to match the raw byte. + /// We store as u8 to match the raw byte. pub action: u8, /// 7. Side (Bid='B', Ask='A'). pub side: u8, /// 8. Explicit Padding. - /// We have used: 8+8+8+4+4+1+1 = 34 bytes. - /// The next multiple of 8 (for u64 alignment) is 40. - /// So we need 6 bytes of padding. + /// We have used: 8+8+8+4+4+1+1 = 34 bytes. + /// The next multiple of 8 (for u64 alignment) is 40. + /// So we need 6 bytes of padding. pub _pad: [u8; 6], } diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs index 18668cc..c12452a 100644 --- a/examples/sensor_test/main.rs +++ b/examples/sensor_test/main.rs @@ -1,10 +1,9 @@ mod models; use crate::models::{Alert, Reading, SensorKey, Summary}; -use bytemuck::{Pod, Zeroable}; use roda_state::StageEngine; use roda_state::pipe; -use roda_state::pipe::{delta, inspect, stateful}; +use roda_state::pipe::{delta, stateful}; use std::time::Duration; fn main() { @@ -28,15 +27,15 @@ fn main() { delta( |s: &Summary| s.sensor_id, |curr, prev| { - if let Some(p) = prev { - if curr.avg > p.avg * 1.5 { - return Some(Alert { - sensor_id: curr.sensor_id, - timestamp: curr.timestamp, - severity: 1, - ..Default::default() - }); - } + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); } None } diff --git a/examples/service_health/main.rs b/examples/service_health/main.rs index 65d1cab..51af506 100644 --- a/examples/service_health/main.rs +++ b/examples/service_health/main.rs @@ -31,16 +31,16 @@ fn main() { delta( |s: &Summary| s.sensor_id, |curr, prev| { - if let Some(p) = prev { + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { // Logic: Alert if the average jumps by more than 50% - if curr.avg > p.avg * 1.5 { - return Some(Alert { - sensor_id: curr.sensor_id, - timestamp: curr.timestamp, - severity: 1, - ..Default::default() - }); - } + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); } None } diff --git a/examples/service_health/models.rs b/examples/service_health/models.rs index f5e75f1..2df1f3a 100644 --- a/examples/service_health/models.rs +++ b/examples/service_health/models.rs @@ -11,7 +11,11 @@ pub struct Reading { impl Reading { pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { - Self { sensor_id, value, timestamp } + Self { + sensor_id, + value, + timestamp, + } } } @@ -61,8 +65,12 @@ impl Summary { #[inline(always)] pub fn update(&mut self, r: Reading) { - if r.value < self.min { self.min = r.value; } - if r.value > self.max { self.max = r.value; } + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } // Online average calculation self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; self.count += 1; @@ -77,4 +85,4 @@ pub struct Alert { pub timestamp: u64, pub severity: i32, pub _pad0: i32, -} \ No newline at end of file +} diff --git a/src/measure/latency_measurer.rs b/src/measure/latency_measurer.rs index f879685..bcc67fb 100644 --- a/src/measure/latency_measurer.rs +++ b/src/measure/latency_measurer.rs @@ -53,7 +53,7 @@ impl LatencyMeasurer { pub fn measure(&mut self, duration: Duration) { self.step += 1; - if self.step % self.sample_rate != 0 { + if !self.step.is_multiple_of(self.sample_rate) { return; } @@ -71,7 +71,7 @@ impl LatencyMeasurer { pub fn measure_with_guard(&mut self) -> LatencyMeasurerGuard<'_> { self.step += 1; - if self.step % self.sample_rate != 0 { + if !self.step.is_multiple_of(self.sample_rate) { return LatencyMeasurerGuard { measurer: self, start: None, @@ -85,7 +85,7 @@ impl LatencyMeasurer { pub fn step_measure(&mut self) { self.step += 1; - if self.step % self.sample_rate != 0 { + if !self.step.is_multiple_of(self.sample_rate) { return; } let elapsed = self.step_instant.elapsed(); @@ -136,26 +136,6 @@ impl LatencyMeasurer { ) } - fn format_count(count: u64) -> String { - if count < 1000 { - count.to_string() - } else if count < 1_000_000 { - let val = count as f64 / 1000.0; - if val == val.floor() { - format!("{:.0}k", val) - } else { - format!("{:.1}k", val) - } - } else { - let val = count as f64 / 1_000_000.0; - if val == val.floor() { - format!("{:.0}M", val) - } else { - format!("{:.1}M", val) - } - } - } - fn format_duration(nanos: f64) -> String { if nanos < 1000.0 { if nanos == nanos.floor() { diff --git a/src/pipe/dedup_by.rs b/src/pipe/dedup_by.rs index e84a889..979657d 100644 --- a/src/pipe/dedup_by.rs +++ b/src/pipe/dedup_by.rs @@ -11,11 +11,9 @@ where let key = key_fn(&curr); let prev = last_values.get(&key); - if let Some(p) = prev { - if *p == curr { - // Value hasn't changed; suppress the event - return None; - } + if prev == Some(&curr) { + // Value hasn't changed; suppress the event + return None; } // Value changed or is new; update cache and emit diff --git a/src/slot_store.rs b/src/slot_store.rs index dc628ae..fe56e27 100644 --- a/src/slot_store.rs +++ b/src/slot_store.rs @@ -5,7 +5,6 @@ use crate::storage::slot_mmap::SlotMmap; use bytemuck::Pod; use std::path::PathBuf; use std::sync::Arc; -use std::sync::atomic::AtomicU64; pub struct SlotStore { storage: SlotMmap, @@ -15,7 +14,6 @@ pub struct SlotStore { pub struct SlotStoreReader { storage: SlotMmap, - op_count: Arc, } pub struct SlotStoreOptions { @@ -55,7 +53,6 @@ impl SlotStore { pub fn reader(&self) -> SlotStoreReader { SlotStoreReader { - op_count: self.op_counter.new_counter(), storage: self.storage.reader(), } } diff --git a/src/stage_engine.rs b/src/stage_engine.rs index c3e3d81..c280379 100644 --- a/src/stage_engine.rs +++ b/src/stage_engine.rs @@ -55,12 +55,12 @@ impl StageEngine { self.engine.run_worker(move || { // Process all available data - if reader.next() { - if let Some(data) = reader.get() { - stage.process(data, &mut |out: NextOut| { - next_store.append(out); - }); - } + if reader.next() + && let Some(data) = reader.get() + { + stage.process(data, &mut |out: NextOut| { + next_store.append(out); + }); } // Yield to prevent 100% CPU usage when no data is available std::thread::yield_now(); @@ -120,6 +120,12 @@ impl Appendable for Sta } } +impl Default for StageEngine { + fn default() -> Self { + Self::new() + } +} + impl StageEngine { /// Creates a new engine with no stages. /// Acts as a passthrough until stages are added. diff --git a/src/storage/journal_mmap.rs b/src/storage/journal_mmap.rs index 07271bc..d71aee1 100644 --- a/src/storage/journal_mmap.rs +++ b/src/storage/journal_mmap.rs @@ -1,4 +1,4 @@ -use bytemuck::{Pod, Zeroable}; +use bytemuck::Pod; use memmap2::{MmapMut, MmapOptions}; use std::fs::OpenOptions; use std::path::PathBuf; @@ -136,6 +136,7 @@ unsafe impl Send for JournalMmap {} #[cfg(test)] mod tests { use super::*; + use bytemuck::Zeroable; use std::thread; use std::time::Duration; @@ -217,7 +218,7 @@ mod tests { if current_idx > last_idx { let val: u32 = *reader.read(last_idx); assert_eq!(val, count); - last_idx = current_idx; + last_idx += std::mem::size_of::(); count += 1; } thread::yield_now(); diff --git a/src/storage/slot_mmap.rs b/src/storage/slot_mmap.rs index 0700482..05b32c1 100644 --- a/src/storage/slot_mmap.rs +++ b/src/storage/slot_mmap.rs @@ -11,7 +11,6 @@ pub struct SlotMmap { ptr: *mut u8, num_slots: usize, slot_size: usize, - read_only: bool, _marker: std::marker::PhantomData, } @@ -40,7 +39,6 @@ impl SlotMmap { num_slots, slot_size, _mmap: Arc::new(mmap), - read_only: false, _marker: std::marker::PhantomData, }) } @@ -60,7 +58,6 @@ impl SlotMmap { num_slots, slot_size, _mmap: Arc::new(mmap), - read_only: false, _marker: std::marker::PhantomData, }) } @@ -104,7 +101,7 @@ impl SlotMmap { let v1 = (*version_ptr).load(Ordering::Relaxed); std::sync::atomic::fence(Ordering::SeqCst); - if v1 % 2 == 0 { + if v1.is_multiple_of(2) { let mut data: T = std::mem::zeroed(); std::ptr::copy_nonoverlapping( data_ptr, @@ -130,7 +127,6 @@ impl SlotMmap { ptr: self.ptr, num_slots: self.num_slots, slot_size: self.slot_size, - read_only: true, _marker: std::marker::PhantomData, } } diff --git a/tests/aggregator_tests.rs b/tests/aggregator_tests.rs index 73b450f..1dc6144 100644 --- a/tests/aggregator_tests.rs +++ b/tests/aggregator_tests.rs @@ -1,6 +1,5 @@ use bytemuck::{Pod, Zeroable}; use roda_state::JournalStoreOptions; -use roda_state::components::{Appendable, IterativeReadable}; use roda_state::{Aggregator, RodaEngine}; #[repr(C)] diff --git a/tests/index_tests.rs b/tests/index_tests.rs index d09b726..9ab8581 100644 --- a/tests/index_tests.rs +++ b/tests/index_tests.rs @@ -13,7 +13,7 @@ struct ComplexKey { #[test] fn test_index_multiple_values() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -38,7 +38,7 @@ fn test_index_multiple_values() { #[test] fn test_multiple_indices_on_same_store() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -62,7 +62,7 @@ fn test_multiple_indices_on_same_store() { #[test] fn test_index_complex_key() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -95,7 +95,7 @@ fn test_index_complex_key() { #[test] fn test_index_shallow_clone_sharing() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -114,7 +114,7 @@ fn test_index_shallow_clone_sharing() { #[test] fn test_index_collision_overwrite() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -136,7 +136,7 @@ fn test_index_collision_overwrite() { #[test] fn test_index_not_found() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -268,7 +268,7 @@ fn test_multiple_workers_reading_index_only_original_computes() { #[test] fn test_index_iterator() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test", size: 1024, @@ -302,7 +302,7 @@ struct PriceLevel { #[test] fn test_index_navigation() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test_nav", size: 1024, @@ -359,7 +359,7 @@ fn test_index_navigation() { #[test] fn test_index_navigation_rev() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test_nav_rev", size: 1024, diff --git a/tests/journal_tests.rs b/tests/journal_tests.rs index 465ccc5..c91427e 100644 --- a/tests/journal_tests.rs +++ b/tests/journal_tests.rs @@ -1,6 +1,5 @@ use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Appendable, IterativeReadable}; #[test] #[should_panic(expected = "Store is full")] diff --git a/tests/logic_tests.rs b/tests/logic_tests.rs index 7047a3f..e0edb52 100644 --- a/tests/logic_tests.rs +++ b/tests/logic_tests.rs @@ -1,6 +1,5 @@ use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Appendable, IterativeReadable}; #[test] fn test_reader_next_and_with_logic() { diff --git a/tests/push_read_tests.rs b/tests/push_read_tests.rs index a1ca783..d10c6cf 100644 --- a/tests/push_read_tests.rs +++ b/tests/push_read_tests.rs @@ -1,6 +1,5 @@ use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Appendable, IterativeReadable}; #[test] fn test_push_then_read_single() { diff --git a/tests/stage_engine_tests.rs b/tests/stage_engine_tests.rs index 723b570..1a081b6 100644 --- a/tests/stage_engine_tests.rs +++ b/tests/stage_engine_tests.rs @@ -220,6 +220,7 @@ fn test_worker_panic_on_drop() { } #[test] +#[ignore] fn test_long_pipeline_heavy_load() { let stages = 10; let items = 5000; diff --git a/tests/window_tests.rs b/tests/window_tests.rs index 067c73a..7bd908c 100644 --- a/tests/window_tests.rs +++ b/tests/window_tests.rs @@ -1,6 +1,5 @@ use bytemuck::{Pod, Zeroable}; use roda_state::JournalStoreOptions; -use roda_state::components::{Appendable, IterativeReadable}; use roda_state::{RodaEngine, Window}; #[repr(C)] From ebca84871430da1e2388e6acc9e642b26783a441 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 15:28:10 +0100 Subject: [PATCH 09/11] optimizations --- Cargo.toml | 4 - benches/comprehensive_bench.rs | 294 ------------- benches/sensor_bench.rs | 2 +- benches/store_bench.rs | 1 - .../databento_replay/aggregation_stage.rs | 2 +- examples/databento_replay/analysis_stage.rs | 2 +- examples/databento_replay/importer.rs | 7 +- examples/sensor_test/main.rs | 2 +- examples/service_health/main.rs | 2 +- src/aggregator.rs | 185 -------- src/direct_index.rs | 123 ------ src/engine.rs | 9 + src/journal_store.rs | 9 - src/lib.rs | 25 +- src/macros.rs | 10 + src/stage.rs | 12 +- src/stage_engine.rs | 27 +- src/window.rs | 124 ------ tests/aggregator_tests.rs | 409 ------------------ tests/comprehensive_tests.rs | 150 ------- tests/index_tests.rs | 406 ----------------- tests/stage_engine_tests.rs | 1 - tests/store_no_alloc_tests.rs | 15 +- tests/window_tests.rs | 315 -------------- 24 files changed, 55 insertions(+), 2081 deletions(-) delete mode 100644 benches/comprehensive_bench.rs delete mode 100644 src/aggregator.rs delete mode 100644 src/direct_index.rs create mode 100644 src/macros.rs delete mode 100644 src/window.rs delete mode 100644 tests/aggregator_tests.rs delete mode 100644 tests/index_tests.rs delete mode 100644 tests/window_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 9939727..f322edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,10 +26,6 @@ bench = false # We use the 'benches/' directory name = "store_bench" harness = false -[[bench]] -name = "comprehensive_bench" -harness = false - [[bench]] name = "sensor_bench" harness = false diff --git a/benches/comprehensive_bench.rs b/benches/comprehensive_bench.rs deleted file mode 100644 index 064cdd4..0000000 --- a/benches/comprehensive_bench.rs +++ /dev/null @@ -1,294 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use criterion::{Criterion, criterion_group, criterion_main}; -use roda_state::measure::LatencyMeasurer; -use roda_state::{Aggregator, JournalStoreOptions, RodaEngine, Window}; -use std::hint::black_box; - -#[derive(Clone, Copy, Zeroable, Pod, Default)] -#[repr(C)] -struct RawData { - id: u32, - _pad: u32, - value: f64, -} - -#[derive(Clone, Copy, Zeroable, Pod, Default)] -#[repr(C)] -struct AggregatedData { - id: u32, - _pad: u32, - sum: f64, - count: u64, -} - -fn bench_index(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("index"); - - let size = 16 * 1024 * 1024 * 1024; - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "bench_index_store", - size, - in_memory: true, - }); - - // Fill data - for i in 0..10000 { - store.append(RawData { - id: i as u32, - value: i as f64, - ..Default::default() - }); - } - - let index = store.direct_index::(); - - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function("index_compute_10k", |b| { - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - let reader = store.reader(); - let index = store.direct_index::(); - while reader.next() { - index.compute(|data| data.id); - } - }); - }); - println!("index_compute_10k latency:{}", measurer.format_stats()); - - // Pre-compute index for lookup bench - let reader = store.reader(); - while reader.next() { - index.compute(|data| data.id); - } - let index_reader = index.reader(); - - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function("index_lookup", |b| { - let mut i = 0u32; - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - black_box(index_reader.get(&(i % 10_000))); - i += 1; - }); - }); - println!("index_lookup latency:{}", measurer.format_stats()); - - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function("index_incremental_compute", |b| { - let mut i = 10000u32; - let reader = store.reader(); - // Skip already pushed - for _ in 0..10000 { - reader.next(); - } - - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - store.append(RawData { - id: i, - value: i as f64, - ..Default::default() - }); - reader.next(); - index.compute(|data| data.id); - i += 1; - }); - }); - println!( - "index_incremental_compute latency:{}", - measurer.format_stats() - ); - - group.finish(); -} - -fn bench_aggregator(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("aggregator"); - - for num_partitions in [10, 100, 1000] { - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "bench_agg_source", - size: 8 * 1024 * 1024 * 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "bench_agg_target", - size: 8 * 1024 * 1024 * 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let aggregator: Aggregator = Aggregator::new(); - - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function( - format!("aggregator_reduce_step_{}_partitions", num_partitions), - |b| { - let mut i = 0u32; - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - source.append(RawData { - id: i % num_partitions, - value: 1.0, - ..Default::default() - }); - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.id) - .reduce(|_idx, r, s, _keep| { - s.id = r.id; - s.sum += r.value; - s.count += 1; - }); - i += 1; - }); - }, - ); - println!( - "aggregator_reduce_step_{}_partitions latency:{}", - num_partitions, - measurer.format_stats() - ); - } - - group.finish(); -} - -fn bench_window(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("window_component"); - - let size = 8 * 1024 * 1024 * 1024; - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "bench_window_source", - size, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "bench_window_target", - size, - in_memory: true, - }); - - let source_reader = source.reader(); - let window: Window = Window::new(); - - for window_size in [10, 100] { - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function(format!("window_reduce_size_{}", window_size), |b| { - let mut i = 0u32; - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - source.append(RawData { - id: i, - value: i as f64, - ..Default::default() - }); - source_reader.next(); - window - .from(&source_reader) - .to(&mut target) - .reduce(window_size, |data| { - let sum: f64 = data.iter().map(|d| d.value).sum(); - Some(RawData { - id: data.last().unwrap().id, - value: sum / data.len() as f64, - ..Default::default() - }) - }); - i += 1; - }); - }); - println!( - "window_reduce_size_{} latency:{}", - window_size, - measurer.format_stats() - ); - } - - group.finish(); -} - -fn bench_mixed(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("mixed_pipeline"); - - let size = 8 * 1024 * 1024 * 1024; - let mut s1 = engine.new_journal_store::(JournalStoreOptions { - name: "mixed_s1", - size, - in_memory: true, - }); - let mut s2 = engine.new_journal_store::(JournalStoreOptions { - name: "mixed_s2", - size, - in_memory: true, - }); - let mut s3 = engine.new_journal_store::(JournalStoreOptions { - name: "mixed_s3", - size, - in_memory: true, - }); - - let r1 = s1.reader(); - let r2 = s2.reader(); - - let aggregator: Aggregator = Aggregator::new(); - let window: Window = Window::new(); - - let mut measurer = LatencyMeasurer::new(1000); - group.bench_function("mixed_pipeline", |b| { - let mut i = 0u32; - b.iter(|| { - let _latency_guard = measurer.measure_with_guard(); - // Push to S1 - s1.append(RawData { - id: i % 10, - value: 1.0, - ..Default::default() - }); - - // Aggregator: S1 -> S2 - r1.next(); - aggregator - .from(&r1) - .to(&mut s2) - .partition_by(|r| r.id) - .reduce(|_idx, r, s, _keep| { - s.id = r.id; - s.sum += r.value; - s.count += 1; - }); - - // Window: S2 -> S3 - r2.next(); - window.from(&r2).to(&mut s3).reduce(5, |data| { - let sum: f64 = data.iter().map(|d| d.sum).sum(); - Some(AggregatedData { - id: 0, // Mixed - sum, - count: data.iter().map(|d| d.count).sum(), - ..Default::default() - }) - }); - - i += 1; - }); - }); - println!("mixed_pipeline latency:{}", measurer.format_stats()); - - group.finish(); -} - -criterion_group!( - benches, - bench_index, - bench_aggregator, - bench_window, - bench_mixed -); -criterion_main!(benches); diff --git a/benches/sensor_bench.rs b/benches/sensor_bench.rs index df9dccd..cb57c57 100644 --- a/benches/sensor_bench.rs +++ b/benches/sensor_bench.rs @@ -2,7 +2,7 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, criterion_group, criterion_main}; use roda_state::StageEngine; use roda_state::pipe; -use roda_state::pipe::{delta, stateful}; +use roda_state::{delta, stateful}; use std::collections::HashMap; use std::hint::black_box; use std::time::{Duration, Instant}; diff --git a/benches/store_bench.rs b/benches/store_bench.rs index f0ad6fe..164b572 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -1,6 +1,5 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; -use roda_state::measure::LatencyMeasurer; use roda_state::{JournalStoreOptions, RodaEngine}; use std::hint::black_box; diff --git a/examples/databento_replay/aggregation_stage.rs b/examples/databento_replay/aggregation_stage.rs index 3ab7f62..cd8d383 100644 --- a/examples/databento_replay/aggregation_stage.rs +++ b/examples/databento_replay/aggregation_stage.rs @@ -1,6 +1,6 @@ use crate::book_level_entry::BookLevelEntry; use crate::light_mbo_entry::LightMboEntry; -use roda_state::stage::{OutputCollector, Stage}; +use roda_state::{OutputCollector, Stage}; use std::collections::HashMap; #[derive(Default)] diff --git a/examples/databento_replay/analysis_stage.rs b/examples/databento_replay/analysis_stage.rs index 3189124..672ea50 100644 --- a/examples/databento_replay/analysis_stage.rs +++ b/examples/databento_replay/analysis_stage.rs @@ -1,7 +1,7 @@ use crate::book_level_entry::BookLevelEntry; use crate::book_level_top::BookLevelTop; use crate::imbalance_signal::ImbalanceSignal; -use roda_state::stage::{OutputCollector, Stage}; +use roda_state::{OutputCollector, Stage}; use spdlog::prelude::*; use std::collections::HashMap; use std::time::{Duration, Instant}; diff --git a/examples/databento_replay/importer.rs b/examples/databento_replay/importer.rs index f903a84..bbfcae6 100644 --- a/examples/databento_replay/importer.rs +++ b/examples/databento_replay/importer.rs @@ -10,15 +10,13 @@ use spdlog::prelude::*; // Use your specific high-level API modules use crate::light_mbo_entry::LightMboEntry; -use roda_state::components::Appendable; -use roda_state::measure::latency_measurer::LatencyMeasurer; +use roda_state::Appendable; pub fn import_mbo_file( file: PathBuf, market_store: &mut impl Appendable, ) -> Result<(), Box> { info!("[Writer] Starting Feed Handler for {:?}...", file); - let mut latency_measurer = LatencyMeasurer::new(1); let start = Instant::now(); let mut count = 0u64; @@ -27,7 +25,6 @@ pub fn import_mbo_file( // 3. Hot Loop while let Some(record) = decoder.decode_record_ref()? { - let _latency_guard = latency_measurer.measure_with_guard(); if record.header().rtype == rtype::MBO { let msg = record.get::().unwrap(); market_store.append(LightMboEntry::from(msg)); @@ -40,7 +37,5 @@ pub fn import_mbo_file( "[Writer] Finished! Pushed {} updates in {:?}", count, duration ); - // info!("[Writer] Store size: {}", market_store.size()); - info!("[Latency/Import]{}", latency_measurer.format_stats()); Ok(()) } diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs index c12452a..2bbf620 100644 --- a/examples/sensor_test/main.rs +++ b/examples/sensor_test/main.rs @@ -3,7 +3,7 @@ mod models; use crate::models::{Alert, Reading, SensorKey, Summary}; use roda_state::StageEngine; use roda_state::pipe; -use roda_state::pipe::{delta, stateful}; +use roda_state::{delta, stateful}; use std::time::Duration; fn main() { diff --git a/examples/service_health/main.rs b/examples/service_health/main.rs index 51af506..79e54f7 100644 --- a/examples/service_health/main.rs +++ b/examples/service_health/main.rs @@ -3,7 +3,7 @@ mod models; use models::{Alert, Reading, SensorKey, Summary}; use roda_state::StageEngine; use roda_state::pipe; -use roda_state::pipe::{dedup_by, delta, inspect, stateful}; +use roda_state::{dedup_by, delta, inspect, stateful}; use std::time::Duration; fn main() { diff --git a/src/aggregator.rs b/src/aggregator.rs deleted file mode 100644 index 718c1df..0000000 --- a/src/aggregator.rs +++ /dev/null @@ -1,185 +0,0 @@ -use crate::components::{Appendable, IterativeReadable}; -use bytemuck::Pod; -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::hash::Hash; -use std::marker::PhantomData; - -pub struct Aggregator { - pub(crate) _v: PhantomData, - pub(crate) _out_v: PhantomData, - pub(crate) _partition_key: PhantomData, - pub(crate) last_index: Cell, - pub(crate) states: RefCell>, -} - -impl Aggregator { - pub fn new() -> Aggregator { - Self { - _v: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, - last_index: Cell::new(0), - states: RefCell::new(HashMap::new()), - } - } -} - -impl Default - for Aggregator -{ - fn default() -> Self { - Self::new() - } -} - -impl - Aggregator -{ - pub fn from<'a, R: IterativeReadable>( - &'a self, - reader: &'a R, - ) -> AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> { - AggregatorFrom { - aggregator: self, - reader, - _in: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, - } - } -} - -pub struct AggregatorFrom< - 'a, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: IterativeReadable, -> { - aggregator: &'a Aggregator, - reader: &'a R, - _in: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, -} - -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, R: IterativeReadable> - AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> -{ - pub fn to<'b, S: Appendable>( - self, - store: &'b mut S, - ) -> AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> { - AggregatorTo { - aggregator: self.aggregator, - reader: self.reader, - store, - _in: PhantomData, - _out: PhantomData, - _partition_key: PhantomData, - } - } -} - -pub struct AggregatorTo< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: IterativeReadable, - S: Appendable, -> { - aggregator: &'a Aggregator, - reader: &'a R, - store: &'b mut S, - _in: PhantomData, - _out: PhantomData, - _partition_key: PhantomData, -} - -impl< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: IterativeReadable, - S: Appendable, -> AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> -{ - pub fn partition_by( - self, - key_fn: F, - ) -> AggregatorPartition<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> - where - F: Fn(&InValue) -> PartitionKey, - { - AggregatorPartition { - aggregator: self.aggregator, - reader: self.reader, - store: self.store, - key_fn, - _in: PhantomData, - _out: PhantomData, - _key: PhantomData, - } - } -} - -pub struct AggregatorPartition< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R, - S, - F, -> { - aggregator: &'a Aggregator, - reader: &'a R, - store: &'b mut S, - key_fn: F, - _in: PhantomData, - _out: PhantomData, - _key: PhantomData, -} - -impl<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> - AggregatorPartition<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> -where - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey: Hash + Eq + Send, - R: IterativeReadable, - S: Appendable, - F: Fn(&InValue) -> PartitionKey, -{ - pub fn reduce(self, mut update_fn: impl FnMut(u64, &InValue, &mut OutValue, &mut bool)) { - let mut states = self.aggregator.states.borrow_mut(); - let mut last_idx = self.aggregator.last_index.get(); - - let current_index = self.reader.get_index(); - if current_index > last_idx { - if let Some(val) = self.reader.get() { - let key = (self.key_fn)(&val); - let (index, mut state) = - states.get(&key).cloned().unwrap_or((0, OutValue::zeroed())); - - let mut keep = true; - update_fn(index, &val, &mut state, &mut keep); - if keep { - self.store.append(state); - - states.insert(key, (index + 1, state)); - } else { - states.remove(&key); - } - } - last_idx = current_index; - self.aggregator.last_index.set(last_idx); - } - } -} diff --git a/src/direct_index.rs b/src/direct_index.rs deleted file mode 100644 index 923b124..0000000 --- a/src/direct_index.rs +++ /dev/null @@ -1,123 +0,0 @@ -use crate::components::IterativeReadable; -use bytemuck::Pod; -use crossbeam_skiplist::SkipMap; -use std::ops::Bound; -use std::sync::Arc; - -pub struct DirectIndex< - Key: Clone + Ord + Send, - State: Pod + Send, - StoreReader: IterativeReadable + 'static, -> { - pub(crate) map: Arc>, - pub reader: StoreReader, -} - -pub struct DirectIndexReader { - pub(crate) map: Arc>, -} - -impl DirectIndex -where - Key: Clone + Ord + Send + 'static, - State: Pod + Send, - StoreReader: IterativeReadable + 'static, -{ - pub fn compute(&self, key_fn: impl FnOnce(&State) -> Key) { - if self.reader.next() - && let Some(state) = self.reader.get() - { - let key = key_fn(&state); - self.map.insert(key.clone(), state); - } - } - pub fn delete(&self, key: &Key) { - self.map.remove(key); - } - - pub fn reader(&self) -> DirectIndexReader { - DirectIndexReader { - map: self.map.clone(), - } - } - - pub fn iter(&self) -> impl Iterator + '_ { - self.map - .iter() - .map(|entry| (entry.key().clone(), *entry.value())) - } - - pub fn size(&self) -> usize { - self.map.len() - } -} - -impl DirectIndexReader -where - Key: Clone + Ord + Send + 'static, // 'static or appropriate lifetime for the Map - State: Pod + Send, -{ - pub fn with(&self, key: &Key, handler: impl FnOnce(&State) -> R) -> Option { - self.map.get(key).map(|entry| handler(entry.value())) - } - - pub fn get(&self, key: &Key) -> Option { - self.map.get(key).map(|entry| *entry.value()) - } - - pub fn iter(&self) -> impl Iterator + '_ { - self.map - .iter() - .map(|entry| (entry.key().clone(), *entry.value())) - } - - // --- New Navigation Implementations --- - - /// Replicates lower_bound: starts at the first key >= provided key. - pub fn find_ge<'a>( - &'a self, - key: &'a Key, - ) -> impl DoubleEndedIterator + 'a { - self.map - .range((Bound::Included(key), Bound::Unbounded)) - .map(move |entry| (entry.key().clone(), *entry.value())) - } - - /// Replicates upper_bound: starts at the first key <= provided key, - /// but usually used with .rev() to get the Best Bid. - pub fn find_le<'a>( - &'a self, - key: &'a Key, - ) -> impl DoubleEndedIterator + 'a { - self.map - .range((Bound::Unbounded, Bound::Included(key))) - .map(move |entry| (entry.key().clone(), *entry.value())) - } - - /// Standard range scan (e.g., for getting a specific slice of the book). - pub fn range<'a, R>(&'a self, range: R) -> impl DoubleEndedIterator + 'a - where - R: std::ops::RangeBounds + 'a, - { - self.map - .range(range) - .map(move |entry| (entry.key().clone(), *entry.value())) - } - - /// Efficiency helper to jump straight to the Best Bid or Best Ask. - pub fn first_after(&self, key: &Key) -> Option<(Key, State)> { - self.map - .lower_bound(Bound::Included(key)) - .map(|e| (e.key().clone(), *e.value())) - } - - pub fn last_before(&self, key: &Key) -> Option<(Key, State)> { - // upper_bound finds first > key, then prev() finds highest <= key. - let entry = self.map.upper_bound(Bound::Included(key))?; - entry.prev().map(|e| (e.key().clone(), *e.value())) - } - - pub fn size(&self) -> usize { - self.map.len() - } -} diff --git a/src/engine.rs b/src/engine.rs index ac086c5..15e1f4f 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -91,6 +91,15 @@ impl RodaEngine { last_op_count = new_op_count; } } + + pub fn is_any_worker_panicked(&self) -> bool { + for handler in &self.worker_handlers { + if handler.is_finished() && self.running.load(std::sync::atomic::Ordering::Relaxed) { + return true; + } + } + false + } } impl Default for RodaEngine { diff --git a/src/journal_store.rs b/src/journal_store.rs index cb972a5..835030d 100644 --- a/src/journal_store.rs +++ b/src/journal_store.rs @@ -74,15 +74,6 @@ impl JournalStore { } } - pub fn direct_index( - &self, - ) -> crate::direct_index::DirectIndex> { - crate::direct_index::DirectIndex { - map: std::sync::Arc::new(crossbeam_skiplist::SkipMap::new()), - reader: self.reader(), - } - } - pub fn size(&self) -> usize { self.storage.get_write_index() / size_of::() } diff --git a/src/lib.rs b/src/lib.rs index 6a7d588..f7c1a4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,21 +1,18 @@ -pub mod aggregator; -pub mod components; -pub mod direct_index; -pub mod engine; -pub mod journal_store; +mod components; +mod engine; +mod journal_store; +mod macros; pub mod measure; mod op_counter; -pub mod pipe; -pub mod slot_store; -pub mod stage; -pub mod stage_engine; +mod pipe; +mod slot_store; +mod stage; +mod stage_engine; mod storage; -pub mod window; -pub use crate::aggregator::Aggregator; -pub use crate::direct_index::{DirectIndex, DirectIndexReader}; +pub use crate::components::*; pub use crate::engine::RodaEngine; pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; -pub use crate::stage::{OutputCollector, Stage}; +pub use crate::pipe::*; +pub use crate::stage::{OutputCollector, Stage, StageExt}; pub use crate::stage_engine::StageEngine; -pub use crate::window::Window; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..d0274c9 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,10 @@ +#[macro_export] +macro_rules! pipe { + ($s1:expr) => { $s1 }; + ($s1:expr, $($rest:expr),+ $(,)?) => { + { + use $crate::StageExt; + $s1.pipe($crate::pipe!($($rest),+)) + } + }; +} diff --git a/src/stage.rs b/src/stage.rs index bf42dee..80f611d 100644 --- a/src/stage.rs +++ b/src/stage.rs @@ -87,20 +87,10 @@ where { } -#[macro_export] -macro_rules! pipe { - ($s1:expr) => { $s1 }; - ($s1:expr, $($rest:expr),+ $(,)?) => { - { - use $crate::stage::StageExt; - $s1.pipe($crate::pipe!($($rest),+)) - } - }; -} - #[cfg(test)] mod tests { use super::*; + use crate::pipe; #[test] fn test_pipe_closures() { diff --git a/src/stage_engine.rs b/src/stage_engine.rs index c280379..bf5d4a8 100644 --- a/src/stage_engine.rs +++ b/src/stage_engine.rs @@ -12,6 +12,7 @@ pub struct StageEngine { input_store: JournalStore, output_reader: StoreJournalReader, stage_count: usize, + default_capacity: usize, } impl StageEngine { @@ -22,7 +23,8 @@ impl StageEngine { self, stage: S, ) -> StageEngine { - self.add_stage_with_capacity(1024, stage) + let capacity = self.default_capacity; + self.add_stage_with_capacity(capacity, stage) } /// Adds a new stage to the pipeline with a specific capacity for the output store. @@ -54,16 +56,16 @@ impl StageEngine { let next_reader = next_store.reader(); self.engine.run_worker(move || { - // Process all available data - if reader.next() - && let Some(data) = reader.get() - { - stage.process(data, &mut |out: NextOut| { - next_store.append(out); - }); + if reader.next() { + if let Some(data) = reader.get() { + stage.process(data, &mut |out: NextOut| { + next_store.append(out); + }); + } + } else { + // Yield to prevent 100% CPU usage when no data is available + std::thread::yield_now(); } - // Yield to prevent 100% CPU usage when no data is available - std::thread::yield_now(); }); StageEngine { @@ -71,6 +73,7 @@ impl StageEngine { input_store: self.input_store, output_reader: next_reader, stage_count: self.stage_count, + default_capacity: self.default_capacity, } } @@ -87,6 +90,9 @@ impl StageEngine { if let Some(data) = self.try_receive() { return Some(data); } + if self.engine.is_any_worker_panicked() { + panic!("Worker panicked, pipeline is broken"); + } thread::yield_now(); } } @@ -148,6 +154,7 @@ impl StageEngine { input_store, output_reader, stage_count: 0, + default_capacity: capacity, } } } diff --git a/src/window.rs b/src/window.rs deleted file mode 100644 index c9a602c..0000000 --- a/src/window.rs +++ /dev/null @@ -1,124 +0,0 @@ -use crate::components::{Appendable, IterativeReadable}; -use bytemuck::Pod; -use std::cell::{Cell, RefCell}; -use std::marker::PhantomData; - -pub struct Window { - pub(crate) _v: PhantomData, - pub(crate) _out_v: PhantomData, - pub(crate) last_index: Cell, - pub(crate) buffer: RefCell>, -} - -impl Window { - pub fn new() -> Window { - Self { - _v: PhantomData, - _out_v: PhantomData, - last_index: Cell::new(0), - buffer: RefCell::new(Vec::new()), - } - } -} - -impl Default for Window { - fn default() -> Self { - Self::new() - } -} - -impl Window { - pub fn from<'a, R: IterativeReadable>( - &'a self, - reader: &'a R, - ) -> WindowFrom<'a, InValue, OutValue, R> { - WindowFrom { - window: self, - reader, - _in: PhantomData, - _out_v: PhantomData, - } - } - - pub fn pipe( - _source: impl IterativeReadable, - _target: impl Appendable, - ) -> Self { - Self::new() - } -} - -pub struct WindowFrom<'a, InValue: Pod + Send, OutValue: Pod + Send, R: IterativeReadable> -{ - window: &'a Window, - reader: &'a R, - _in: PhantomData, - _out_v: PhantomData, -} - -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, R: IterativeReadable> - WindowFrom<'a, InValue, OutValue, R> -{ - pub fn to<'b, S: Appendable>( - self, - store: &'b mut S, - ) -> WindowTo<'a, 'b, InValue, OutValue, R, S> { - WindowTo { - window: self.window, - reader: self.reader, - store, - _in: PhantomData, - _out: PhantomData, - } - } -} - -pub struct WindowTo< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - R: IterativeReadable, - S: Appendable, -> { - window: &'a Window, - reader: &'a R, - store: &'b mut S, - _in: PhantomData, - _out: PhantomData, -} - -impl<'a, 'b, InValue, OutValue, R, S> WindowTo<'a, 'b, InValue, OutValue, R, S> -where - InValue: Pod + Send, - OutValue: Pod + Send, - R: IterativeReadable, - S: Appendable, -{ - pub fn reduce( - &mut self, - window_size: u32, - mut update_fn: impl FnMut(&[InValue]) -> Option, - ) { - let mut buffer = self.window.buffer.borrow_mut(); - let mut last_index = self.window.last_index.get(); - - let current_index = self.reader.get_index(); - if current_index > last_index { - if let Some(val) = self.reader.get() { - buffer.push(val); - if buffer.len() > window_size as usize { - buffer.remove(0); - } - - if buffer.len() == window_size as usize - && let Some(out) = update_fn(&buffer) - { - self.store.append(out); - } - } - last_index = current_index; - self.window.last_index.set(last_index); - } - } -} diff --git a/tests/aggregator_tests.rs b/tests/aggregator_tests.rs deleted file mode 100644 index 1dc6144..0000000 --- a/tests/aggregator_tests.rs +++ /dev/null @@ -1,409 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::JournalStoreOptions; -use roda_state::{Aggregator, RodaEngine}; - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct SensorReading { - pub value: f64, - pub sensor_id: u16, - pub _pad: [u8; 6], -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct SensorStats { - pub sum: f64, - pub min: f64, - pub max: f64, - pub count: u32, - pub sensor_id: u16, - pub _pad: [u8; 2], -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct GroupKey { - pub sensor_id: u16, - pub group_id: u16, -} - -#[test] -fn test_aggregator_count_and_sum() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats, _keep| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - stats.sum += reading.value; - }); - }); - - // Push readings - source.append(SensorReading { - sensor_id: 1, - value: 10.0, - ..Default::default() - }); - source.append(SensorReading { - sensor_id: 1, - value: 20.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate the final aggregated result by get_window from the target - let res = target_reader.get_window::<2>(0).unwrap(); - assert_eq!(res[1].sensor_id, 1); - assert_eq!(res[1].count, 2); - assert_eq!(res[1].sum, 30.0); -} - -#[test] -fn test_aggregator_min_max_tracking() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats, _keep| { - if index == 0 { - stats.min = reading.value; - stats.max = reading.value; - } else { - stats.min = stats.min.min(reading.value); - stats.max = stats.max.max(reading.value); - } - stats.sensor_id = reading.sensor_id; - }); - }); - - // Push readings - source.append(SensorReading { - sensor_id: 1, - value: 10.0, - ..Default::default() - }); - source.append(SensorReading { - sensor_id: 1, - value: 20.0, - ..Default::default() - }); - source.append(SensorReading { - sensor_id: 1, - value: 5.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window from the target - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[2].min, 5.0); - assert_eq!(res[2].max, 20.0); -} - -#[test] -fn test_aggregator_multiple_partitions() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats, _keep| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - // Push readings across partitions - source.append(SensorReading { - sensor_id: 1, - value: 1.0, - ..Default::default() - }); - source.append(SensorReading { - sensor_id: 2, - value: 2.0, - ..Default::default() - }); - source.append(SensorReading { - sensor_id: 1, - value: 3.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window all results - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].sensor_id, 1); - assert_eq!(res[0].count, 1); - assert_eq!(res[1].sensor_id, 2); - assert_eq!(res[1].count, 1); - assert_eq!(res[2].sensor_id, 1); - assert_eq!(res[2].count, 2); -} - -#[test] -fn test_aggregator_complex_key() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation with complex key inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| GroupKey { - sensor_id: r.sensor_id, - group_id: (r.value / 10.0) as u16, - }) - .reduce(|index, reading, stats, _keep| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - source.append(SensorReading { - sensor_id: 1, - value: 15.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1>(0).unwrap(); - assert_eq!(res[0].sensor_id, 1); - assert_eq!(res[0].count, 1); -} - -#[test] -fn test_aggregator_reset_behavior() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats, _keep| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - // Push several readings for sensor 1 - for i in 0..5 { - source.append(SensorReading { - sensor_id: 1, - value: i as f64, - ..Default::default() - }); - } - - // Switch to sensor 2 - source.append(SensorReading { - sensor_id: 2, - value: 100.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate get_window results: first 5 for sensor 1 with counts 1..5, then sensor 2 with count 1 - let res = target_reader.get_window::<6>(0).unwrap(); - for (i, item) in res.iter().enumerate().take(5) { - assert_eq!(item.sensor_id, 1); - assert_eq!(item.count, (i as u32) + 1); - } - assert_eq!(res[5].sensor_id, 2); - assert_eq!(res[5].count, 1); -} - -#[test] -fn test_aggregator_large_index() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, _reading, stats, _keep| { - stats.count = (index + 1) as u32; - }); - }); - - // Simulate 1000 items in one partition - for i in 0..1000 { - source.append(SensorReading { - sensor_id: 1, - value: i as f64, - ..Default::default() - }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate all results - let res = target_reader.get_window::<1000>(0).unwrap(); - for (i, item) in res.iter().enumerate().take(1000) { - assert_eq!(item.count, (i as u32) + 1); - } -} - -#[test] -fn test_aggregator_worker_large() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 2000, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 2000, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - - let aggregator: Aggregator = Aggregator::new(); - - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats, _keep| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - stats.sum += reading.value; - }); - }); - - for _ in 0..1000 { - source.append(SensorReading { - sensor_id: 1, - value: 1.0, - ..Default::default() - }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1000>(0).unwrap(); - assert_eq!(res[999].count, 1000); - assert_eq!(res[999].sum, 1000.0); -} diff --git a/tests/comprehensive_tests.rs b/tests/comprehensive_tests.rs index 48a50a9..1ab6ffa 100644 --- a/tests/comprehensive_tests.rs +++ b/tests/comprehensive_tests.rs @@ -1,6 +1,5 @@ use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Appendable, IterativeReadable}; use std::sync::{Arc, Barrier}; use std::thread; @@ -56,26 +55,6 @@ fn test_store_reader_edge_cases() { assert_eq!(reader.with_last(|&v| v), Some(42)); } -#[test] -fn test_index_reader_with_and_get() { - let mut engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "index_with", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - store.append(123); - index.compute(|&v| v); - let reader = index.reader(); - - assert_eq!(reader.get(&123), Some(123)); - assert_eq!(reader.with(&123, |&v| v), Some(123)); - - assert_eq!(reader.get(&456), None); - assert_eq!(reader.with(&456, |_| 1), None); -} - #[test] fn test_store_full_capacity() { let mut engine = RodaEngine::new(); @@ -174,132 +153,3 @@ fn test_store_concurrent_load() { assert_eq!(total_read, num_readers * num_pushes); } - -#[test] -fn test_index_load_and_edge_cases() { - let mut engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "index_edge", - size: 1024 * 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let index_reader = index.reader(); - - // 1. compute on empty store - index.compute(|&v| v); - assert_eq!(index_reader.get(&0), None); - - // 2. Load test - let num_items = 1000; - for i in 0..num_items { - store.append(i as u64); - index.compute(|&v| v); - } - - for i in 0..num_items { - assert_eq!(index_reader.get(&(i as u64)), Some(i as u64)); - } - - // 3. Duplicate keys (overwrites) - store.append(100); // 1001st item - index.compute(|&v| v); // index the 100th -> 100 (key 100) - - store.append(10000); // 1002nd item - index.compute(|_v| 100); // Force key 100 to map to value 10000 - assert_eq!(index_reader.get(&100), Some(10000)); -} - -#[test] -fn test_index_concurrent_compute() { - let engine = Arc::new(RodaEngine::new()); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "index_concurrent", - size: 1024 * 1024, - in_memory: true, - }); - let index = std::sync::Mutex::new(store.direct_index::()); - let index = Arc::new(index); - - let num_items = 5000; - for i in 0..num_items { - store.append(i as u32); - } - - let num_workers = 5; - let barrier = Arc::new(Barrier::new(num_workers)); - let mut workers = Vec::new(); - - for _ in 0..num_workers { - let b = barrier.clone(); - let idx = index.clone(); - workers.push(thread::spawn(move || { - b.wait(); - loop { - let mut found = false; - { - let idx_locked = idx.lock().unwrap(); - idx_locked.compute(|&v| { - found = true; - v - }); - } - if !found { - break; - } - } - })); - } - - for worker in workers { - worker.join().unwrap(); - } - - let index_reader = index.lock().unwrap().reader(); - for i in 0..num_items { - assert_eq!(index_reader.get(&(i as u32)), Some(i as u32)); - } -} - -#[test] -fn test_index_reader_concurrent_get() { - let mut engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "index_read_concurrent", - size: 1024 * 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - let num_items = 1000; - for i in 0..num_items { - store.append(i as u32); - index.compute(|&v| v); - } - - let reader = Arc::new(index.reader()); - let num_threads = 8; - let mut threads = Vec::new(); - let barrier = Arc::new(Barrier::new(num_threads)); - - for _t in 0..num_threads { - let r = reader.clone(); - let b = barrier.clone(); - threads.push(thread::spawn(move || { - b.wait(); - for i in 0..num_items { - // Mix get and with - if i % 2 == 0 { - assert_eq!(r.get(&(i as u32)), Some(i as u32)); - } else { - let val = r.with(&(i as u32), |&v| v); - assert_eq!(val, Some(i as u32)); - } - } - })); - } - - for thread in threads { - thread.join().unwrap(); - } -} diff --git a/tests/index_tests.rs b/tests/index_tests.rs deleted file mode 100644 index 9ab8581..0000000 --- a/tests/index_tests.rs +++ /dev/null @@ -1,406 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::RodaEngine; -use roda_state::journal_store::JournalStoreOptions; -use std::thread; -use std::time::Duration; - -#[repr(C)] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Pod, Zeroable)] -struct ComplexKey { - id: u32, - category: u32, -} - -#[test] -fn test_index_multiple_values() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - for i in 0..5 { - store.append(i); - } - - // Index them all - for _ in 0..5 { - index.compute(|x| x * 10); - } - - let reader = index.reader(); - for i in 0..5 { - assert_eq!(reader.get(&(i * 10)), Some(i)); - } -} - -#[test] -fn test_multiple_indices_on_same_store() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - - let index_double = store.direct_index::(); - let index_triple = store.direct_index::(); - - store.append(10); - - index_double.compute(|x| x * 2); - index_triple.compute(|x| x * 3); - - let reader_double = index_double.reader(); - let reader_triple = index_triple.reader(); - - assert_eq!(reader_double.get(&20), Some(10)); - assert_eq!(reader_triple.get(&30), Some(10)); -} - -#[test] -fn test_index_complex_key() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - store.append(100); - index.compute(|&val| ComplexKey { - id: val, - category: 1, - }); - - let reader = index.reader(); - assert_eq!( - reader.get(&ComplexKey { - id: 100, - category: 1 - }), - Some(100) - ); - assert_eq!( - reader.get(&ComplexKey { - id: 100, - category: 2 - }), - None - ); -} - -#[test] -fn test_index_shallow_clone_sharing() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let clone1 = index.reader(); - let clone2 = index.reader(); - - store.append(42); - index.compute(|&x| x); - - assert_eq!(clone1.get(&42), Some(42)); - assert_eq!(clone2.get(&42), Some(42)); -} - -#[test] -fn test_index_collision_overwrite() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - // Both 10 and 20 will map to key 1 - store.append(10); - store.append(20); - - index.compute(|_| 1); - index.compute(|_| 1); - - let reader = index.reader(); - // Usually a direct index mapping should store the latest value for a given key - assert_eq!(reader.get(&1), Some(20)); -} - -#[test] -fn test_index_not_found() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - store.append(10); - index.compute(|x| x + 1); - - let reader = index.reader(); - assert_eq!(reader.get(&11), Some(10)); - assert_eq!(reader.get(&999), None); -} - -#[test] -fn test_concurrent_push_and_index() { - let mut engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let index_reader = index.reader(); - - // Spawn a worker to index everything that comes in - engine.run_worker(move || { - for _ in 0..10 { - index.compute(|&x| x); - } - }); - - // Push values from another thread (main thread) - for i in 0..10 { - store.append(i); - // Give worker some time to process - thread::sleep(Duration::from_millis(1)); - } - - // Give some extra time for the last ones to be indexed - thread::sleep(Duration::from_millis(20)); - - for i in 0..10 { - assert_eq!(index_reader.get(&i), Some(i)); - } -} - -#[test] -fn test_run_worker_with_multiple_stores() { - let mut engine = RodaEngine::new(); - let mut store_u32 = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let mut store_string = engine.new_journal_store::<[u8; 16]>(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - - let index_u32 = store_u32.direct_index::(); - let index_string = store_string.direct_index::(); - - // Prepare read-only readers for assertions after workers complete - let index_u32_reader = index_u32.reader(); - let index_string_reader = index_string.reader(); - - for _ in 0..10 { - store_u32.append(100); - } - - let mut pushed_u32 = false; - engine.run_worker(move || { - if !pushed_u32 { - store_u32.append(100); - pushed_u32 = true; - } - index_u32.compute(|&x| x); - }); - - let mut pushed_string = false; - engine.run_worker(move || { - if !pushed_string { - let mut bytes = [0u8; 16]; - bytes[..5].copy_from_slice(b"hello"); - store_string.append(bytes); - pushed_string = true; - } - index_string.compute(|s: &[u8; 16]| s.iter().take_while(|&&b| b != 0).count()); - }); - - // Wait for workers - thread::sleep(Duration::from_millis(50)); - - assert_eq!(index_u32_reader.get(&100), Some(100)); - let res_bytes = index_string_reader.get(&5).unwrap(); - assert_eq!(&res_bytes[..5], b"hello"); -} - -#[test] -fn test_multiple_workers_reading_index_only_original_computes() { - let mut engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - let reader1 = index.reader(); - let reader2 = index.reader(); - - store.append(1); - store.append(2); - - // Only the original index can compute; shallow clones are read-only - engine.run_worker(move || { - index.compute(|&x| x * 10); - index.compute(|&x| x * 10); - }); - - thread::sleep(Duration::from_millis(50)); - - assert_eq!(reader1.get(&10), Some(1)); - assert_eq!(reader2.get(&20), Some(2)); -} - -#[test] -fn test_index_iterator() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - for i in 0..5 { - store.append(i); - index.compute(|&x| x * 2); - } - - let reader = index.reader(); - let items: Vec<_> = reader.iter().collect(); - - assert_eq!(items.len(), 5); - let expected = vec![(0, 0), (2, 1), (4, 2), (6, 3), (8, 4)]; - assert_eq!(items, expected); - - // Test Index::iter too - let items_from_index: Vec<_> = index.iter().collect(); - assert_eq!(items_from_index, expected); -} - -#[repr(C)] -#[derive(Clone, Copy, Pod, Zeroable, Default, Debug, PartialEq)] -struct PriceLevel { - pub price: i64, - pub volume: u64, -} - -#[test] -fn test_index_navigation() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test_nav", - size: 1024, - in_memory: true, - }); - - let index = store.direct_index::(); - let reader = index.reader(); - - // Push some data - store.append(PriceLevel { - price: 100, - volume: 10, - }); - store.append(PriceLevel { - price: 200, - volume: 20, - }); - store.append(PriceLevel { - price: 300, - volume: 30, - }); - - // Compute index - index.compute(|p| p.price); // for 100 - index.compute(|p| p.price); // for 200 - index.compute(|p| p.price); // for 300 - - // Test find_ge - { - let key = 150; - let mut it = reader.find_ge(&key); - assert_eq!(it.next().unwrap().0, 200); - assert_eq!(it.next().unwrap().0, 300); - assert!(it.next().is_none()); - } - - // Test find_le - { - let key = 250; - let mut it = reader.find_le(&key); - assert_eq!(it.next().unwrap().0, 100); - assert_eq!(it.next().unwrap().0, 200); - assert!(it.next().is_none()); - } - - // Test range - { - let mut it = reader.range(150..250); - assert_eq!(it.next().unwrap().0, 200); - assert!(it.next().is_none()); - } -} - -#[test] -fn test_index_navigation_rev() { - let engine = RodaEngine::new(); - let mut store = engine.new_journal_store::(JournalStoreOptions { - name: "test_nav_rev", - size: 1024, - in_memory: true, - }); - - let index = store.direct_index::(); - let reader = index.reader(); - - store.append(PriceLevel { - price: 100, - volume: 10, - }); - store.append(PriceLevel { - price: 200, - volume: 20, - }); - store.append(PriceLevel { - price: 300, - volume: 30, - }); - - index.compute(|p| p.price); - index.compute(|p| p.price); - index.compute(|p| p.price); - - // Test find_ge().rev() - { - let key = 150; - let mut it = reader.find_ge(&key).rev(); - assert_eq!(it.next().unwrap().0, 300); - assert_eq!(it.next().unwrap().0, 200); - assert!(it.next().is_none()); - } - - // Test find_le().rev() - { - let key = 250; - let mut it = reader.find_le(&key).rev(); - assert_eq!(it.next().unwrap().0, 200); - assert_eq!(it.next().unwrap().0, 100); - assert!(it.next().is_none()); - } -} diff --git a/tests/stage_engine_tests.rs b/tests/stage_engine_tests.rs index 1a081b6..723b570 100644 --- a/tests/stage_engine_tests.rs +++ b/tests/stage_engine_tests.rs @@ -220,7 +220,6 @@ fn test_worker_panic_on_drop() { } #[test] -#[ignore] fn test_long_pipeline_heavy_load() { let stages = 10; let items = 5000; diff --git a/tests/store_no_alloc_tests.rs b/tests/store_no_alloc_tests.rs index f9c0d65..13f25f2 100644 --- a/tests/store_no_alloc_tests.rs +++ b/tests/store_no_alloc_tests.rs @@ -1,6 +1,6 @@ use assert_no_alloc::*; +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::journal_store::JournalStoreOptions; #[cfg(debug_assertions)] #[global_allocator] @@ -103,16 +103,3 @@ fn test_store_reader_get_last_no_alloc() { let _ = reader.get_last(); }); } - -#[test] -fn test_store_direct_index_allocations_allowed() { - let mut engine = RodaEngine::new(); - let store = engine.new_journal_store::(JournalStoreOptions { - name: "direct_index_alloc", - size: 1024, - in_memory: true, - }); - - // direct_index now allocates because it uses crossbeam-skiplist - let _ = store.direct_index::(); -} diff --git a/tests/window_tests.rs b/tests/window_tests.rs deleted file mode 100644 index 7bd908c..0000000 --- a/tests/window_tests.rs +++ /dev/null @@ -1,315 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::JournalStoreOptions; -use roda_state::{RodaEngine, Window}; - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct DataPoint { - pub value: f64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct Analysis { - pub average: f64, - pub is_increasing: u32, - pub _pad: u32, -} - -#[test] -fn test_window_filling_and_sliding() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - // Run window reduce inside worker - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(3, |window| { - if window.len() < 3 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - let increasing = - window[2].value > window[1].value && window[1].value > window[0].value; - Some(Analysis { - average: sum / 3.0, - is_increasing: if increasing { 1 } else { 0 }, - ..Default::default() - }) - }); - }); - - // Push data points - for i in 1..=5 { - source.append(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window all outputs (5 - 3 + 1 = 3) - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 2.0); - assert_eq!(res[0].is_increasing, 1); - assert_eq!(res[1].average, 3.0); - assert_eq!(res[1].is_increasing, 1); - assert_eq!(res[2].average, 4.0); - assert_eq!(res[2].is_increasing, 1); -} - -#[test] -fn test_window_size_one() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - // Window size 1 should process every item individually - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(1, |window| { - assert_eq!(window.len(), 1); - Some(Analysis { - average: window[0].value, - is_increasing: 0, - ..Default::default() - }) - }); - }); - - // Push values - for v in [10.0, 20.0, 30.0] { - source.append(DataPoint { value: v }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 10.0); - assert_eq!(res[0].is_increasing, 0); - assert_eq!(res[1].average, 20.0); - assert_eq!(res[1].is_increasing, 0); - assert_eq!(res[2].average, 30.0); - assert_eq!(res[2].is_increasing, 0); -} - -#[test] -fn test_window_large_sliding() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 100, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 100, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - // Larger window size - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(10, |window| { - if window.len() < 10 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - Some(Analysis { - average: sum / 10.0, - is_increasing: if window[9].value > window[0].value { - 1 - } else { - 0 - }, - ..Default::default() - }) - }); - }); - - // Push values 0..11 -> expect 3 outputs - for i in 0..12 { - source.append(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 4.5); - assert_eq!(res[0].is_increasing, 1); - assert_eq!(res[1].average, 5.5); - assert_eq!(res[1].is_increasing, 1); - assert_eq!(res[2].average, 6.5); - assert_eq!(res[2].is_increasing, 1); -} - -#[test] -fn test_window_worker_large() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 2000, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 2000, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(10, |window| { - if window.len() < 10 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - Some(Analysis { - average: sum / 10.0, - is_increasing: if window[window.len() - 1].value > window[0].value { - 1 - } else { - 0 - }, - ..Default::default() - }) - }); - }); - - for i in 0..1000 { - source.append(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<991>(0).unwrap(); - assert_eq!(res[0].average, 4.5); // (0+1+2+3+4+5+6+7+8+9)/10 = 45/10 = 4.5 - assert_eq!(res[0].is_increasing, 1); -} - -#[test] -fn test_window_max_value() { - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(3, |window| { - window.iter().map(|d| d.value).max_by(|a, b| a.total_cmp(b)) - }); - }); - - // Push values: expect maxima per 3-sized window - for v in [1.0, 3.0, 2.0, 5.0, 4.0] { - source.append(DataPoint { value: v }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0], 3.0); - assert_eq!(res[1], 5.0); - assert_eq!(res[2], 5.0); -} - -#[test] -fn test_window_all_none_until_full() { - use std::sync::Arc; - use std::sync::atomic::{AtomicUsize, Ordering}; - let mut engine = RodaEngine::new(); - let mut source = engine.new_journal_store::(JournalStoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.new_journal_store::(JournalStoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - let call_count = Arc::new(AtomicUsize::new(0)); - let cc = call_count.clone(); - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(5, |window: &[DataPoint]| { - cc.fetch_add(1, Ordering::Relaxed); - if window.len() == 5 { Some(1u8) } else { None } - }); - }); - - for i in 0..5 { - source.append(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1>(0).unwrap(); - assert_eq!(res[0], 1); -} From 97836e81f1806d92669048bbabcc3cbe0742e380 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 15:47:11 +0100 Subject: [PATCH 10/11] add new pipes: latency, progress --- benches/store_bench.rs | 1 + examples/databento_replay/main.rs | 26 +++++-- src/pipe/latency.rs | 109 ++++++++++++++++++++++++++++++ src/pipe/mod.rs | 4 ++ src/pipe/progress.rs | 106 +++++++++++++++++++++++++++++ 5 files changed, 240 insertions(+), 6 deletions(-) create mode 100644 src/pipe/latency.rs create mode 100644 src/pipe/progress.rs diff --git a/benches/store_bench.rs b/benches/store_bench.rs index 164b572..47f44a6 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -2,6 +2,7 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; use roda_state::{JournalStoreOptions, RodaEngine}; use std::hint::black_box; +use roda_state::measure::LatencyMeasurer; #[derive(Clone, Copy, Zeroable, Pod)] #[repr(C)] diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs index 366d9d9..2647881 100644 --- a/examples/databento_replay/main.rs +++ b/examples/databento_replay/main.rs @@ -3,7 +3,7 @@ use spdlog::prelude::*; use std::path::PathBuf; use std::time::Duration; -use roda_state::StageEngine; +use roda_state::{StageEngine, latency, pipe, progress}; mod aggregation_stage; mod analysis_stage; @@ -34,14 +34,28 @@ fn main() -> Result<(), Box> { engine.enable_latency_stats(true); // 2. Add Aggregation Stage: LightMboEntry -> BookLevelEntry - let engine = engine.add_stage_with_capacity(30_000_000, AggregationStage::default()); + let engine = engine.add_stage_with_capacity( + 30_000_000, + pipe![ + progress("Aggregation", 10_000_000), + latency("Aggregation", 10_000_000, 1000, AggregationStage::default()) + ], + ); // 3. Add Imbalance Analysis Stage: BookLevelEntry -> ImbalanceSignal - let mut engine = engine.add_stage_with_capacity(30_000_000, AnalysisStage::default()); + let mut engine = engine.add_stage_with_capacity( + 30_000_000, + pipe![ + progress("Imbalance Analysis", 10_000_000), + latency( + "Imbalance Analysis", + 10_000_000, + 1000, + AnalysisStage::default() + ) + ], + ); - // 4. Start importing data - // import_mbo_file expects &mut impl Appendable - // StageEngine implements it. import_mbo_file(args.file, &mut engine)?; info!("[System] Waiting for all stages to finish processing..."); diff --git a/src/pipe/latency.rs b/src/pipe/latency.rs new file mode 100644 index 0000000..47fbcbe --- /dev/null +++ b/src/pipe/latency.rs @@ -0,0 +1,109 @@ +use crate::measure::latency_measurer::LatencyMeasurer; +use crate::stage::{OutputCollector, Stage}; +use bytemuck::Pod; +use spdlog::info; +use std::marker::PhantomData; + +/// A pipe that measures the latency of an inner stage. +pub struct Latency { + name: String, + report_interval: usize, + stage: S, + measurer: LatencyMeasurer, + count: usize, + _phantom: PhantomData<(In, Out)>, +} + +impl Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + pub fn new( + name: impl Into, + report_interval: usize, + sample_rate: u64, + stage: S, + ) -> Self { + Latency { + name: name.into(), + report_interval, + stage, + measurer: LatencyMeasurer::new(sample_rate), + count: 0, + _phantom: PhantomData, + } + } +} + +impl Stage for Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + { + let _guard = self.measurer.measure_with_guard(); + self.stage.process(data, collector); + } + self.count += 1; + if self.count % self.report_interval == 0 { + info!("[{}] Latency: {}", self.name, self.measurer.format_stats()); + } + } +} + +pub fn latency( + name: impl Into, + interval: usize, + example_size: usize, + stage: S, +) -> Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + Latency::new(name, interval, example_size as u64, stage) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_latency_logic() { + let mut pipe = latency("test", 2, 1, |x: u32| { + thread::sleep(Duration::from_millis(10)); + Some(x as u64) + }); + + let mut out = Vec::new(); + + // Process 1st item + { + let mut collector = |x: u64| out.push(x); + pipe.process(1u32, &mut collector); + } + assert_eq!(out, vec![1]); + + // Process 2nd item - should trigger print + { + let mut collector = |x: u64| out.push(x); + pipe.process(2u32, &mut collector); + } + assert_eq!(out, vec![1, 2]); + + let stats = pipe.measurer.get_stats(); + assert_eq!(stats.count, 2); + assert!(stats.min >= 10_000_000); // at least 10ms in nanos + } +} diff --git a/src/pipe/mod.rs b/src/pipe/mod.rs index 2d3e4c0..fd75330 100644 --- a/src/pipe/mod.rs +++ b/src/pipe/mod.rs @@ -2,7 +2,9 @@ mod dedup_by; mod delta; mod filter; mod inspect; +mod latency; mod map; +mod progress; mod stateful; mod windowed; @@ -10,6 +12,8 @@ pub use dedup_by::dedup_by; pub use delta::delta; pub use filter::filter; pub use inspect::inspect; +pub use latency::latency; pub use map::map; +pub use progress::progress; pub use stateful::stateful; pub use windowed::windowed; diff --git a/src/pipe/progress.rs b/src/pipe/progress.rs new file mode 100644 index 0000000..231e710 --- /dev/null +++ b/src/pipe/progress.rs @@ -0,0 +1,106 @@ +use std::time::Instant; +use spdlog::info; + +/// A pipe that logs progress information. +pub fn progress(name: impl Into, interval: usize) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + assert!(interval > 0, "interval must be greater than 0"); + let name = name.into(); + let mut count = 0; + let mut last_instant = Instant::now(); + let start_instant = last_instant; + + move |item| { + count += 1; + if count % interval == 0 { + let now = Instant::now(); + let elapsed = now.duration_since(last_instant); + let total_elapsed = now.duration_since(start_instant); + + let mps = interval as f64 / elapsed.as_secs_f64(); + let total_mps = count as f64 / total_elapsed.as_secs_f64(); + + info!( + "[{}] Processed {} messages, Rate: {} msg/s, Avg: {} msg/s", + name, + format_count(count as f64), + format_count(mps), + format_count(total_mps) + ); + last_instant = now; + } + Some(item) + } +} + +fn format_count(val: f64) -> String { + if val < 1000.0 { + if val == val.floor() { + format!("{:.0}", val) + } else { + format!("{:.2}", val) + } + } else if val < 1_000_000.0 { + format!("{:.2}k", val / 1000.0) + } else if val < 1_000_000_000.0 { + format!("{:.2}m", val / 1_000_000.0) + } else if val < 1_000_000_000_000.0 { + format!("{:.2}b", val / 1_000_000_000.0) + } else { + format!("{:.2}t", val / 1_000_000_000_000.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_progress_logic() { + let mut pipe = progress("test", 2); + + // Process 1st item + let res = pipe(1u32); + assert_eq!(res, Some(1)); + + // Process 2nd item - should trigger print + thread::sleep(Duration::from_millis(10)); + let res = pipe(2u32); + assert_eq!(res, Some(2)); + + // Process 3rd item + let res = pipe(3u32); + assert_eq!(res, Some(3)); + + // Process 4th item - should trigger print + thread::sleep(Duration::from_millis(10)); + let res = pipe(4u32); + assert_eq!(res, Some(4)); + } + + #[test] + fn test_progress_no_delay() { + let mut pipe = progress("test_fast", 2); + for i in 0..10 { + pipe(i); + } + } + + #[test] + fn test_format_count() { + assert_eq!(format_count(0.0), "0"); + assert_eq!(format_count(123.0), "123"); + assert_eq!(format_count(123.45), "123.45"); + assert_eq!(format_count(1000.0), "1.00k"); + assert_eq!(format_count(1234.0), "1.23k"); + assert_eq!(format_count(1_000_000.0), "1.00m"); + assert_eq!(format_count(1_234_567.0), "1.23m"); + assert_eq!(format_count(1_000_000_000.0), "1.00b"); + assert_eq!(format_count(1_234_567_890.0), "1.23b"); + assert_eq!(format_count(1_000_000_000_000.0), "1.00t"); + } +} From 20b7c1f21f44c84a19c348337c771c6e27660120 Mon Sep 17 00:00:00 2001 From: Taleh Ibrahimli Date: Sun, 15 Feb 2026 15:53:44 +0100 Subject: [PATCH 11/11] fix build issues --- benches/store_bench.rs | 2 +- examples/sensor_test/main.rs | 7 ++----- scripts/check.sh | 4 ++-- src/pipe/latency.rs | 2 +- src/pipe/progress.rs | 6 +++--- tests/comprehensive_tests.rs | 6 +++--- tests/push_read_tests.rs | 10 +++++----- tests/stage_engine_tests.rs | 4 ++-- tests/store_no_alloc_tests.rs | 12 ++++++------ 9 files changed, 25 insertions(+), 28 deletions(-) diff --git a/benches/store_bench.rs b/benches/store_bench.rs index 47f44a6..f0ad6fe 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -1,8 +1,8 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; +use roda_state::measure::LatencyMeasurer; use roda_state::{JournalStoreOptions, RodaEngine}; use std::hint::black_box; -use roda_state::measure::LatencyMeasurer; #[derive(Clone, Copy, Zeroable, Pod)] #[repr(C)] diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs index 2bbf620..41a5288 100644 --- a/examples/sensor_test/main.rs +++ b/examples/sensor_test/main.rs @@ -16,11 +16,8 @@ fn main() { let mut engine = engine .add_stage(pipe![ // Use stateful helper to handle the HashMap and windowing logic - stateful( - |r| SensorKey::from_reading(r), - |r| Summary::init(r), - |state, r| state.update(r) - ) + stateful(SensorKey::from_reading, Summary::init, |state, r| state + .update(r)) ]) .add_stage(pipe![ // Use delta to compare current summary to previous summary for the same sensor diff --git a/scripts/check.sh b/scripts/check.sh index 933eee7..b71af1c 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -6,9 +6,9 @@ echo "Running rustfmt..." cargo fmt --all --check echo "Running clippy..." -cargo clippy -- -D warnings +cargo clippy --all-targets -- -D warnings echo "Running tests..." -cargo test +cargo test --all-targets echo "All checks passed!" diff --git a/src/pipe/latency.rs b/src/pipe/latency.rs index 47fbcbe..ab6fe00 100644 --- a/src/pipe/latency.rs +++ b/src/pipe/latency.rs @@ -53,7 +53,7 @@ where self.stage.process(data, collector); } self.count += 1; - if self.count % self.report_interval == 0 { + if self.count.is_multiple_of(self.report_interval) { info!("[{}] Latency: {}", self.name, self.measurer.format_stats()); } } diff --git a/src/pipe/progress.rs b/src/pipe/progress.rs index 231e710..ee686b1 100644 --- a/src/pipe/progress.rs +++ b/src/pipe/progress.rs @@ -1,5 +1,5 @@ -use std::time::Instant; use spdlog::info; +use std::time::Instant; /// A pipe that logs progress information. pub fn progress(name: impl Into, interval: usize) -> impl FnMut(T) -> Option @@ -8,13 +8,13 @@ where { assert!(interval > 0, "interval must be greater than 0"); let name = name.into(); - let mut count = 0; + let mut count: usize = 0; let mut last_instant = Instant::now(); let start_instant = last_instant; move |item| { count += 1; - if count % interval == 0 { + if count.is_multiple_of(interval) { let now = Instant::now(); let elapsed = now.duration_since(last_instant); let total_elapsed = now.duration_since(start_instant); diff --git a/tests/comprehensive_tests.rs b/tests/comprehensive_tests.rs index 1ab6ffa..b6fa46d 100644 --- a/tests/comprehensive_tests.rs +++ b/tests/comprehensive_tests.rs @@ -5,7 +5,7 @@ use std::thread; #[test] fn test_store_reader_edge_cases() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "edge_cases", size: 1024, @@ -57,7 +57,7 @@ fn test_store_reader_edge_cases() { #[test] fn test_store_full_capacity() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let num_items = 10; let mut store = engine.new_journal_store::(JournalStoreOptions { name: "full_capacity", @@ -86,7 +86,7 @@ fn test_store_full_capacity() { #[test] #[should_panic(expected = "Store is full")] fn test_store_overflow_panic() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "overflow", size: 1, diff --git a/tests/push_read_tests.rs b/tests/push_read_tests.rs index d10c6cf..2277965 100644 --- a/tests/push_read_tests.rs +++ b/tests/push_read_tests.rs @@ -3,7 +3,7 @@ use roda_state::RodaEngine; #[test] fn test_push_then_read_single() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test1", size: 1024, @@ -19,7 +19,7 @@ fn test_push_then_read_single() { #[test] fn test_multiple_push_read_in_order() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test2", size: 1024, @@ -39,7 +39,7 @@ fn test_multiple_push_read_in_order() { #[test] fn test_interleaved_push_and_read() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test3", size: 1024, @@ -62,7 +62,7 @@ fn test_interleaved_push_and_read() { #[test] fn test_stores_are_isolated_by_type() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut u_store = engine.new_journal_store::(JournalStoreOptions { name: "u32", @@ -93,7 +93,7 @@ fn test_stores_are_isolated_by_type() { #[test] fn test_push_after_partial_reads() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test4", size: 1024, diff --git a/tests/stage_engine_tests.rs b/tests/stage_engine_tests.rs index 723b570..373469c 100644 --- a/tests/stage_engine_tests.rs +++ b/tests/stage_engine_tests.rs @@ -17,8 +17,8 @@ fn test_basic_pipeline() { #[test] fn test_none_filtering() { - let mut engine = - StageEngine::::new().add_stage(|x: u32| if x % 2 == 0 { Some(x) } else { None }); + let mut engine = StageEngine::::new() + .add_stage(|x: u32| if x.is_multiple_of(2) { Some(x) } else { None }); engine.send(1); engine.send(2); diff --git a/tests/store_no_alloc_tests.rs b/tests/store_no_alloc_tests.rs index 13f25f2..da46aa4 100644 --- a/tests/store_no_alloc_tests.rs +++ b/tests/store_no_alloc_tests.rs @@ -8,7 +8,7 @@ static ALLOC: AllocDisabler = AllocDisabler; #[test] fn test_store_push_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_push", size: 1024, @@ -22,7 +22,7 @@ fn test_store_push_no_alloc() { #[test] fn test_store_reader_next_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_next", size: 1024, @@ -38,7 +38,7 @@ fn test_store_reader_next_no_alloc() { #[test] fn test_store_reader_get_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get", size: 1024, @@ -55,7 +55,7 @@ fn test_store_reader_get_no_alloc() { #[test] fn test_store_reader_get_window_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_window", size: 1024, @@ -74,7 +74,7 @@ fn test_store_reader_get_window_no_alloc() { #[test] fn test_store_reader_get_at_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_at", size: 1024, @@ -90,7 +90,7 @@ fn test_store_reader_get_at_no_alloc() { #[test] fn test_store_reader_get_last_no_alloc() { - let mut engine = RodaEngine::new(); + let engine = RodaEngine::new(); let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_last", size: 1024,