diff --git a/Cargo.lock b/Cargo.lock index 362f79a..3ab1b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +17,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -26,6 +41,21 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" @@ -33,10 +63,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] -name = "anyhow" -version = "1.0.101" +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] [[package]] name = "arc-swap" @@ -68,6 +121,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "bitflags" version = "2.10.0" @@ -100,6 +159,12 @@ dependencies = [ "syn", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cast" version = "0.3.0" @@ -113,6 +178,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -122,17 +189,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "chacha20" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" -dependencies = [ - "cfg-if", - "cpufeatures", - "rand_core", -] - [[package]] name = "chrono" version = "0.4.43" @@ -180,6 +236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -188,8 +245,22 @@ version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -198,6 +269,12 @@ version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -205,35 +282,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] -name = "cpufeatures" -version = "0.3.0" +name = "crc32fast" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "libc", + "cfg-if", ] [[package]] name = "criterion" -version = "0.5.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ + "alloca", "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", "itertools", "num-traits", - "once_cell", "oorandom", + "page_size", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -241,14 +317,23 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.5.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -290,6 +375,66 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "dbn" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5aa5489abf84f8dde3b736c13dffa9f99ea0fc95d06c7719007d40339e63fb" +dependencies = [ + "csv", + "dbn-macros", + "fallible-streaming-iterator", + "itoa", + "json-writer", + "num_enum", + "oval", + "thiserror", + "time", + "zstd", +] + +[[package]] +name = "dbn-macros" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94fa2649cf276e4ae91232d1d1d318367d1b6df1b15d1175493c213575a6ddc" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "deranged" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" +dependencies = [ + "powerfmt", +] + [[package]] name = "dyn-clone" version = "1.0.20" @@ -319,6 +464,12 @@ dependencies = [ "typeid", ] +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -326,23 +477,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] -name = "foldhash" -version = "0.1.5" +name = "flate2" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "rand_core", "wasip2", - "wasip3", ] [[package]] @@ -358,18 +511,23 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.5" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash", -] +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] -name = "hashbrown" -version = "0.16.1" +name = "hdrhistogram" +version = "7.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64", + "byteorder", + "crossbeam-channel", + "flate2", + "nom 7.1.3", + "num-traits", +] [[package]] name = "heck" @@ -407,12 +565,6 @@ dependencies = [ "cc", ] -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - [[package]] name = "indexmap" version = "2.13.0" @@ -420,9 +572,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown 0.16.1", - "serde", - "serde_core", + "hashbrown", ] [[package]] @@ -436,11 +586,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -451,6 +607,16 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + [[package]] name = "js-sys" version = "0.3.85" @@ -462,10 +628,14 @@ dependencies = [ ] [[package]] -name = "leb128fmt" -version = "0.1.0" +name = "json-writer" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +checksum = "279046e6427c19c86f93df06fe9dc90c32b43f4a2a85bb3083d579e4a1e7ef03" +dependencies = [ + "itoa", + "ryu", +] [[package]] name = "libc" @@ -503,6 +673,32 @@ dependencies = [ "libc", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nom" version = "8.0.0" @@ -512,6 +708,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + [[package]] name = "num-traits" version = "0.2.19" @@ -521,18 +723,62 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "oorandom" version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "oval" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135cef32720c6746450d910890b0b69bcba2bbf6f85c9f4583df13fe415de828" + +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -556,6 +802,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -585,13 +837,18 @@ dependencies = [ ] [[package]] -name = "prettyplease" -version = "0.2.37" +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "proc-macro2", - "syn", + "toml_edit", ] [[package]] @@ -618,23 +875,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "rand" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" -dependencies = [ - "chacha20", - "getrandom", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" - [[package]] name = "rayon" version = "1.11.0" @@ -699,14 +939,13 @@ version = "0.1.0" dependencies = [ "assert_no_alloc", "bytemuck", + "clap", "criterion", "crossbeam-skiplist", - "crossbeam-utils", - "libc", + "dbn", + "hdrhistogram", "memmap2", - "rand", "spdlog-rs", - "thiserror", ] [[package]] @@ -818,6 +1057,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "smallvec" version = "1.15.1" @@ -830,7 +1075,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7c4ffbdbc9f2d819ffb53ef00a253f524ba7bfd7a3aa8dcd50789b9b27be550" dependencies = [ - "nom", + "nom 8.0.0", "strum", "strum_macros", "thiserror", @@ -870,6 +1115,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "strum" version = "0.27.2" @@ -1000,6 +1251,37 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -1010,6 +1292,36 @@ dependencies = [ "serde_json", ] +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.10+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.8+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0742ff5ff03ea7e67c8ae6c93cac239e0d9784833362da3f9a9c1da8dfefcbdc" +dependencies = [ + "winnow", +] + [[package]] name = "typeid" version = "1.0.3" @@ -1023,10 +1335,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] -name = "unicode-xid" -version = "0.2.6" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "value-bag" @@ -1084,15 +1396,6 @@ dependencies = [ "wit-bindgen", ] -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen", -] - [[package]] name = "wasm-bindgen" version = "0.2.108" @@ -1138,40 +1441,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - [[package]] name = "web-sys" version = "0.3.85" @@ -1282,115 +1551,70 @@ dependencies = [ ] [[package]] -name = "wit-bindgen" -version = "0.51.0" +name = "winnow" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ - "wit-bindgen-rust-macro", + "memchr", ] [[package]] -name = "wit-bindgen-core" +name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] -name = "wit-bindgen-rust" -version = "0.51.0" +name = "zerocopy" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", + "zerocopy-derive", ] [[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" +name = "zerocopy-derive" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ - "anyhow", - "prettyplease", "proc-macro2", "quote", "syn", - "wit-bindgen-core", - "wit-bindgen-rust", ] [[package]] -name = "wit-component" -version = "0.244.0" +name = "zmij" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] +checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" [[package]] -name = "wit-parser" -version = "0.244.0" +name = "zstd" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", + "zstd-safe", ] [[package]] -name = "zerocopy" -version = "0.8.39" +name = "zstd-safe" +version = "7.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" dependencies = [ - "zerocopy-derive", + "zstd-sys", ] [[package]] -name = "zerocopy-derive" -version = "0.8.39" +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ - "proc-macro2", - "quote", - "syn", + "cc", + "pkg-config", ] - -[[package]] -name = "zmij" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" diff --git a/Cargo.toml b/Cargo.toml index feb95fc..f322edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,18 +6,18 @@ description = "A wait-free, cache-friendly state synchronization engine for HFT. authors = ["Your Name"] [dependencies] -spdlog-rs = "0.5.2" -rand = "0.10.0-rc.6" -bytemuck = {version = "1.25.0", features = ["derive"]} +bytemuck = { version = "1.25.0", features = ["derive"] } memmap2 = "0.9.9" -thiserror = "2.0.18" -crossbeam-utils = "0.8.21" crossbeam-skiplist = "0.1" -libc = "0.2" # Needed for mlock (memory pinning) and sched_setaffinity +clap = { version = "4.5.57", features = ["derive"] } +hdrhistogram = "7.5" +spdlog-rs = "0.5.2" [dev-dependencies] assert_no_alloc = { version = "1.1.2" } -criterion = { version = "0.5", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } +dbn = { version = "0.48.0" } +clap = { version = "4.0", features = ["derive"] } [lib] bench = false # We use the 'benches/' directory @@ -27,7 +27,7 @@ name = "store_bench" harness = false [[bench]] -name = "comprehensive_bench" +name = "sensor_bench" harness = false [profile.profiling] diff --git a/README.md b/README.md index fcecf5a..8b3520e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Roda -Ultra-high-performance, low-latency state computer for real-time analytics and trading systems. Roda lets you build +Ultra-high-performance, low-latency state computer for real-time analytics and event-driven systems. Roda lets you build deterministic streaming pipelines with cache-friendly dataflows, wait-free reads, and explicit memory bounds—ideal for -HFT, market microstructure research, telemetry, and any workload where microseconds matter. +IoT, telemetry, industrial automation, and any workload where microseconds matter. > Status: Early design and API preview. Examples and tests illustrate the intended DX. Expect rapid iteration and > breaking changes. @@ -85,134 +85,159 @@ roda-state = { path = "." } Run the example: ```bash -cargo run --example hello_world +cargo run --example sensor_test ``` -## Example: From Ticks to OHLC to Trading Signals +## Example: From Sensor Readings to Summaries to Alerts -Below is a trimmed version of `examples/hello_world.rs` that demonstrates a two-stage pipeline: aggregate ticks into OHLC candles, then derive a simple momentum signal via a sliding window. +Below is a trimmed version of `examples/sensor_test.rs` that demonstrates a two-stage pipeline: aggregate raw sensor readings into statistical summaries, then derive alerts when anomalies are detected via a sliding window. ```rust use bytemuck::{Pod, Zeroable}; use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; use roda_state::{Aggregator, RodaEngine, Window}; +use std::thread; +use std::time::Duration; #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct Tick { - symbol: u64, - price: f64, +struct Reading { + sensor_id: u64, + value: f64, timestamp: u64, } +impl Reading { + fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { sensor_id, value, timestamp } + } +} + + #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct OHLC { - symbol: u64, - open: f64, - high: f64, - low: f64, - close: f64, +struct Summary { + sensor_id: u64, + min: f64, + max: f64, + avg: f64, + count: u64, timestamp: u64, } #[repr(C)] #[derive(Clone, Copy, Default, Pod, Zeroable)] -struct Signal { - symbol: u64, +struct Alert { + sensor_id: u64, timestamp: u64, - direction: i32, - size: u32, + severity: i32, + _pad0: i32, } #[derive(Clone, Copy, PartialEq, Eq, Hash, Pod, Zeroable)] #[repr(C)] -struct TimeKey { - symbol: u64, +struct SensorKey { + sensor_id: u64, timestamp: u64, } fn main() { let engine = RodaEngine::new(); - // Allocate bounded stores (explicit memory profile) - let tick_store = engine.store::(StoreOptions { - name: "ticks", + // 1. Allocate bounded stores + let mut reading_store = engine.store::(StoreOptions { + name: "readings", size: 1_000_000, in_memory: true, }); - let tick_reader = tick_store.reader(); - let mut ohlc_store = engine.store::(StoreOptions { - name: "ohlc", + let reading_reader = reading_store.reader(); + + let mut summary_store = engine.store::(StoreOptions { + name: "summaries", size: 10_000, in_memory: true, }); - let ohlc_reader = ohlc_store.reader(); - let mut signal_store = engine.store::(StoreOptions { - name: "signals", + let summary_reader = summary_store.reader(); + + let mut alert_store = engine.store::(StoreOptions { + name: "alerts", size: 10_000, in_memory: true, }); + let alert_reader_for_print = alert_store.reader(); - // Index to locate candles by (symbol, time) - let ohlc_index = ohlc_store.direct_index::(); + let summary_index = summary_store.direct_index::(); - // Declare pipelines - let mut ohlc_pipeline: Aggregator = Aggregator::new(); - let mut strategy_pipeline: Window = Window::new(); + // 2. Declare pipelines + let summary_pipeline: Aggregator = Aggregator::new(); + let alert_pipeline: Window = Window::new(); - // Worker 1: aggregate ticks -> OHLC and maintain index + // 3. Worker 1: aggregate readings -> summaries and maintain index engine.run_worker(move || { - tick_reader.next(); - ohlc_pipeline - .from(&tick_reader) - .to(&mut ohlc_store) - .partition_by(|t| TimeKey { - symbol: t.symbol, - timestamp: t.timestamp / 100_000 + reading_reader.next(); + summary_pipeline + .from(&reading_reader) + .to(&mut summary_store) + .partition_by(|r| SensorKey { + sensor_id: r.sensor_id, + timestamp: r.timestamp / 100_000 }) - .reduce(|i, t, c| { + .reduce(|i, r, s| { if i == 0 { - c.open = t.price; - c.high = t.price; - c.low = t.price; - c.close = t.price; - c.symbol = t.symbol; - c.timestamp = (t.timestamp / 100_000) * 100_000; + *s = Summary { + sensor_id: r.sensor_id, + min: r.value, max: r.value, avg: r.value, count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + }; } else { - c.high = c.high.max(t.price); - c.low = c.low.min(t.price); - c.close = t.price; + s.min = s.min.min(r.value); + s.max = s.max.max(r.value); + s.avg = (s.avg * s.count as f64 + r.value) / (s.count + 1) as f64; + s.count += 1; } }); - ohlc_index.compute(|c| TimeKey { - symbol: c.symbol, - timestamp: c.timestamp / 100_000 + summary_index.compute(|s| SensorKey { + sensor_id: s.sensor_id, + timestamp: s.timestamp / 100_000 }); }); - // Worker 2: 2-bar momentum signal + // 4. Worker 2: alert on average jumps engine.run_worker(move || { - ohlc_reader.next(); - strategy_pipeline - .from(&ohlc_reader) - .to(&mut signal_store) + summary_reader.next(); + alert_pipeline + .from(&summary_reader) + .to(&mut alert_store) .reduce(2, |w| { - let prev = w[0]; - let cur = w[1]; - (cur.close > prev.close).then(|| Signal { - symbol: cur.symbol, + let (prev, cur) = (w[0], w[1]); + (cur.avg > prev.avg * 1.5).then(|| Alert { + sensor_id: cur.sensor_id, timestamp: cur.timestamp, - direction: 1, - size: ((cur.close - prev.close) as u32).min(100) + severity: 1, + ..Default::default() }) }); }); + + // 5. Data Ingestion + reading_store.push(Reading::from(1, 10.0, 10_000)); + reading_store.push(Reading::from(1, 12.0, 20_000)); + reading_store.push(Reading::from(1, 20.0, 110_000)); + reading_store.push(Reading::from(1, 22.0, 120_000)); + + thread::sleep(Duration::from_millis(100)); + + // 6. Print Results + while alert_reader_for_print.next() { + if let Some(a) = alert_reader_for_print.get() { + println!("{:?}", a); + } + } } ``` -Explore the full example in `examples/hello_world.rs` for more context. +Explore the full example in `examples/sensor_test.rs` for more context. ## Contributing diff --git a/benches/comprehensive_bench.rs b/benches/comprehensive_bench.rs deleted file mode 100644 index a976741..0000000 --- a/benches/comprehensive_bench.rs +++ /dev/null @@ -1,264 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use criterion::{Criterion, black_box, criterion_group, criterion_main}; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; -use roda_state::{Aggregator, RodaEngine, Window}; - -#[derive(Clone, Copy, Zeroable, Pod, Default)] -#[repr(C)] -struct RawData { - id: u32, - _pad: u32, - value: f64, -} - -#[derive(Clone, Copy, Zeroable, Pod, Default)] -#[repr(C)] -struct AggregatedData { - id: u32, - _pad: u32, - sum: f64, - count: u64, -} - -fn bench_index(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("index"); - - let size = 16 * 1024 * 1024 * 1024; - let mut store = engine.store::(StoreOptions { - name: "bench_index_store", - size, - in_memory: true, - }); - - // Fill data - for i in 0..10000 { - store.push(RawData { - id: i as u32, - value: i as f64, - ..Default::default() - }); - } - - let index = store.direct_index::(); - - group.bench_function("index_compute_10k", |b| { - b.iter(|| { - let reader = store.reader(); - let index = store.direct_index::(); - while reader.next() { - index.compute(|data| data.id); - } - }); - }); - - // Pre-compute index for lookup bench - let reader = store.reader(); - while reader.next() { - index.compute(|data| data.id); - } - let index_reader = index.reader(); - - group.bench_function("index_lookup", |b| { - let mut i = 0u32; - b.iter(|| { - black_box(index_reader.get(&(i % 10000))); - i += 1; - }); - }); - - group.bench_function("index_incremental_compute", |b| { - let mut i = 10000u32; - let reader = store.reader(); - // Skip already pushed - for _ in 0..10000 { - reader.next(); - } - - b.iter(|| { - store.push(RawData { - id: i, - value: i as f64, - ..Default::default() - }); - reader.next(); - index.compute(|data| data.id); - i += 1; - }); - }); - - group.finish(); -} - -fn bench_aggregator(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("aggregator"); - - for num_partitions in [10, 100, 1000] { - let mut source = engine.store::(StoreOptions { - name: "bench_agg_source", - size: 8 * 1024 * 1024 * 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "bench_agg_target", - size: 8 * 1024 * 1024 * 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let aggregator: Aggregator = Aggregator::new(); - - group.bench_function( - format!("aggregator_reduce_step_{}_partitions", num_partitions), - |b| { - let mut i = 0u32; - b.iter(|| { - source.push(RawData { - id: i % num_partitions, - value: 1.0, - ..Default::default() - }); - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.id) - .reduce(|_idx, r, s| { - s.id = r.id; - s.sum += r.value; - s.count += 1; - }); - i += 1; - }); - }, - ); - } - - group.finish(); -} - -fn bench_window(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("window_component"); - - let size = 8 * 1024 * 1024 * 1024; - let mut source = engine.store::(StoreOptions { - name: "bench_window_source", - size, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "bench_window_target", - size, - in_memory: true, - }); - - let source_reader = source.reader(); - let window: Window = Window::new(); - - for window_size in [10, 100] { - group.bench_function(format!("window_reduce_size_{}", window_size), |b| { - let mut i = 0u32; - b.iter(|| { - source.push(RawData { - id: i, - value: i as f64, - ..Default::default() - }); - source_reader.next(); - window - .from(&source_reader) - .to(&mut target) - .reduce(window_size, |data| { - let sum: f64 = data.iter().map(|d| d.value).sum(); - Some(RawData { - id: data.last().unwrap().id, - value: sum / data.len() as f64, - ..Default::default() - }) - }); - i += 1; - }); - }); - } - - group.finish(); -} - -fn bench_mixed(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("mixed_pipeline"); - - let size = 8 * 1024 * 1024 * 1024; - let mut s1 = engine.store::(StoreOptions { - name: "mixed_s1", - size, - in_memory: true, - }); - let mut s2 = engine.store::(StoreOptions { - name: "mixed_s2", - size, - in_memory: true, - }); - let mut s3 = engine.store::(StoreOptions { - name: "mixed_s3", - size, - in_memory: true, - }); - - let r1 = s1.reader(); - let r2 = s2.reader(); - - let aggregator: Aggregator = Aggregator::new(); - let window: Window = Window::new(); - - group.bench_function("mixed_push_agg_window", |b| { - let mut i = 0u32; - b.iter(|| { - // Push to S1 - s1.push(RawData { - id: i % 10, - value: 1.0, - ..Default::default() - }); - - // Aggregator: S1 -> S2 - r1.next(); - aggregator - .from(&r1) - .to(&mut s2) - .partition_by(|r| r.id) - .reduce(|_idx, r, s| { - s.id = r.id; - s.sum += r.value; - s.count += 1; - }); - - // Window: S2 -> S3 - r2.next(); - window.from(&r2).to(&mut s3).reduce(5, |data| { - let sum: f64 = data.iter().map(|d| d.sum).sum(); - Some(AggregatedData { - id: 0, // Mixed - sum, - count: data.iter().map(|d| d.count).sum(), - ..Default::default() - }) - }); - - i += 1; - }); - }); - - group.finish(); -} - -criterion_group!( - benches, - bench_index, - bench_aggregator, - bench_window, - bench_mixed -); -criterion_main!(benches); diff --git a/benches/sensor_bench.rs b/benches/sensor_bench.rs new file mode 100644 index 0000000..cb57c57 --- /dev/null +++ b/benches/sensor_bench.rs @@ -0,0 +1,193 @@ +use bytemuck::{Pod, Zeroable}; +use criterion::{Criterion, criterion_group, criterion_main}; +use roda_state::StageEngine; +use roda_state::pipe; +use roda_state::{delta, stateful}; +use std::collections::HashMap; +use std::hint::black_box; +use std::time::{Duration, Instant}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} + +impl SensorKey { + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +impl Summary { + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + #[inline(always)] + pub fn update(&mut self, r: Reading) { + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} + +fn bench_sensor_pipeline(c: &mut Criterion) { + let num_readings = 1_000_000; + let num_sensors = 1000; + + let mut readings = Vec::with_capacity(num_readings); + for i in 0..num_readings { + let sensor_id = (i % num_sensors) as u64; + let value = if i > 0 && i % 1000 == 0 { + 50.0 + } else { + 10.0 + (i as f64 * 0.0001) + }; + readings.push(Reading::from(sensor_id, value, i as u64 * 10_000)); + } + + let mut group = c.benchmark_group("sensor_pipeline"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(10)); + + group.bench_function("stage_engine", |b| { + b.iter_custom(|iters| { + let mut total_duration = Duration::ZERO; + for _ in 0..iters { + let engine = StageEngine::::with_capacity(num_readings + 1000); + let mut engine = engine + .add_stage_with_capacity( + num_readings + 1000, + pipe![stateful( + SensorKey::from_reading, + Summary::init, + |state, r| state.update(r) + )], + ) + .add_stage_with_capacity( + num_readings + 1000, + pipe![delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + None + } + )], + ); + + let start = Instant::now(); + for &r in &readings { + engine.send(r); + } + engine.await_idle(Duration::from_secs(5)); + total_duration += start.elapsed(); + + // Drain alerts + while let Some(alert) = engine.try_receive() { + black_box(alert); + } + } + total_duration + }); + }); + + group.bench_function("pure_rust", |b| { + b.iter(|| { + let mut summaries: HashMap = HashMap::new(); + let mut last_summaries: HashMap = HashMap::new(); + let mut alerts = Vec::new(); + + for &r in &readings { + let key = SensorKey::from_reading(&r); + let summary = summaries.entry(key).or_insert_with(|| Summary::init(&r)); + + summary.update(r); + let curr_summary = *summary; + + if let Some(prev) = last_summaries.get(&r.sensor_id) + && curr_summary.avg > prev.avg * 1.5 + { + alerts.push(Alert { + sensor_id: curr_summary.sensor_id, + timestamp: curr_summary.timestamp, + severity: 1, + ..Default::default() + }); + } + last_summaries.insert(r.sensor_id, curr_summary); + } + black_box(alerts); + }); + }); + + group.finish(); +} + +criterion_group!(benches, bench_sensor_pipeline); +criterion_main!(benches); diff --git a/benches/store_bench.rs b/benches/store_bench.rs index 48ad295..f0ad6fe 100644 --- a/benches/store_bench.rs +++ b/benches/store_bench.rs @@ -1,7 +1,7 @@ use bytemuck::{Pod, Zeroable}; use criterion::{Criterion, Throughput, criterion_group, criterion_main}; -use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; +use roda_state::measure::LatencyMeasurer; +use roda_state::{JournalStoreOptions, RodaEngine}; use std::hint::black_box; #[derive(Clone, Copy, Zeroable, Pod)] @@ -11,48 +11,56 @@ struct LargeState { } fn bench_push(c: &mut Criterion) { - let engine = RodaEngine::new(); - let mut group = c.benchmark_group("push"); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); + let mut group = c.benchmark_group("append"); // 1GB buffer to ensure we don't overflow during benchmarking let size = 16 * 1024 * 1024 * 1024; - let mut store_u64 = engine.store::(StoreOptions { + let mut store_u64 = engine.new_journal_store::(JournalStoreOptions { name: "bench_push_u64", size, in_memory: true, }); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("push_u64", |b| { let mut val = 0u64; b.iter(|| { - store_u64.push(black_box(val)); + let _latency_guard = measurer.measure_with_guard(); + store_u64.append(black_box(val)); val += 1; }); }); + println!("push_u64 latency:{}", measurer.format_stats()); - let mut store_large = engine.store::(StoreOptions { + let mut store_large = engine.new_journal_store::(JournalStoreOptions { name: "bench_push_large", size, in_memory: true, }); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("push_128b", |b| { let val = LargeState { data: [42; 16] }; b.iter(|| { - store_large.push(black_box(val)); + let _latency_guard = measurer.measure_with_guard(); + store_large.append(black_box(val)); }); }); + println!("push_128b latency:{}", measurer.format_stats()); group.finish(); } fn bench_fetch(c: &mut Criterion) { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); let mut group = c.benchmark_group("fetch"); let size = 1024 * 1024 * 100; // 100MB - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "bench_fetch", size, in_memory: true, @@ -60,56 +68,69 @@ fn bench_fetch(c: &mut Criterion) { // Pre-fill some data for i in 0..10000 { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_at_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_at(black_box(5000))); }); }); + println!("get_at_u64 latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_last_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_last()); }); }); + println!("get_last_u64 latency:{}", measurer.format_stats()); - let mut store_large = engine.store::(StoreOptions { + let mut store_large = engine.new_journal_store::(JournalStoreOptions { name: "bench_fetch_large", size, in_memory: true, }); for _ in 0..10000 { - store_large.push(LargeState { data: [42; 16] }); + store_large.append(LargeState { data: [42; 16] }); } let reader_large = store_large.reader(); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_at_128b", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader_large.get_at(black_box(5000))); }); }); + println!("get_at_128b latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("next_get_u64", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); if reader.next() { black_box(reader.get()); } }); }); + println!("next_get_u64 latency:{}", measurer.format_stats()); group.finish(); } fn bench_window(c: &mut Criterion) { - let engine = RodaEngine::new(); + let mut engine = RodaEngine::new(); + engine.enable_latency_stats(true); let mut group = c.benchmark_group("window"); let size = 1024 * 1024 * 100; // 100MB - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "bench_window", size, in_memory: true, @@ -117,22 +138,28 @@ fn bench_window(c: &mut Criterion) { // Pre-fill some data for i in 0..10000 { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); group.throughput(Throughput::Elements(1)); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_window_10", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_window::<10>(black_box(5000))); }); }); + println!("get_window_10 latency:{}", measurer.format_stats()); + let mut measurer = LatencyMeasurer::new(1000); group.bench_function("get_window_100", |b| { b.iter(|| { + let _latency_guard = measurer.measure_with_guard(); black_box(reader.get_window::<100>(black_box(5000))); }); }); + println!("get_window_100 latency:{}", measurer.format_stats()); group.finish(); } diff --git a/examples/databento_replay/README.md b/examples/databento_replay/README.md new file mode 100644 index 0000000..b130ecc --- /dev/null +++ b/examples/databento_replay/README.md @@ -0,0 +1,38 @@ +# Liquidity Monitor + +This example demonstrates a market data replay system using the Roda engine. It processes raw Market-By-Order (MBO) data to perform real-time liquidity analysis. + +## Overview + +The "Liquidity Monitor" goes beyond simple price tracking. It focuses on three main objectives: + +### 1. Reconstruct the Aggregate Book (Level 2) +Convert the raw stream of individual orders (MBO) into a consolidated map of **Price → Total Volume**. +* **Why useful?** This is what exchanges actually sell as "Level 2 Data." You are building it from scratch from the most granular data available. + +### 2. Calculate "Order Book Imbalance" +Measure the ratio of buy vs. sell pressure in the book. + +**Formula:** +$$Imbalance = \frac{Bid\ Vol - Ask\ Vol}{Bid\ Vol + Ask\ Vol}$$ + +* **Why useful?** This is a primary signal for predicting short-term price movement. A positive value indicates buy pressure. + +### 3. Detect "Liquidity Voids" +Monitor the book for sudden drops in available volume. +* **Condition:** If the total volume at the Top 5 levels drops by 50% in < 1ms, trigger an alert. +* **Why useful?** This predicts "Flash Crashes" and high-volatility events where price might slip significantly. + +## Usage + +To run the replay, provide the path to a Databento MBO file: + +```bash +cargo run --example databento_replay -- --file path/to/your/data.dbn +``` + +## Architecture + +- `main.rs`: Sets up the Roda engine, market data store, and the processing pipeline. +- `importer.rs`: Handles reading and decoding the Databento MBO file. +- `light_mbo_entry.rs`: Defines the compact data structure for storing MBO records in the Roda store. diff --git a/examples/databento_replay/aggregation_stage.rs b/examples/databento_replay/aggregation_stage.rs new file mode 100644 index 0000000..cd8d383 --- /dev/null +++ b/examples/databento_replay/aggregation_stage.rs @@ -0,0 +1,51 @@ +use crate::book_level_entry::BookLevelEntry; +use crate::light_mbo_entry::LightMboEntry; +use roda_state::{OutputCollector, Stage}; +use std::collections::HashMap; + +#[derive(Default)] +pub struct AggregationStage { + book_volumes: HashMap<(u32, u8, i64), BookLevelEntry>, +} + +impl Stage for AggregationStage { + fn process(&mut self, entry: LightMboEntry, collector: &mut C) + where + C: OutputCollector, + { + let key = (entry.instrument_id, entry.side, entry.price); + let book = self.book_volumes.entry(key).or_insert(BookLevelEntry { + ts: entry.ts, + symbol: entry.instrument_id as u64, + price: entry.price, + volume: 0, + side: entry.side, + _pad: [0; 7], + }); + + book.ts = entry.ts; + + match entry.action { + // Add + b'A' => { + book.volume = book.volume.saturating_add(entry.size as u64); + } + // Cancel, Fill, or Trade + b'C' | b'F' | b'T' => { + book.volume = book.volume.saturating_sub(entry.size as u64); + } + // Clear Book + b'R' => { + book.volume = 0; + } + _ => {} + } + + // Always push the update so downstream knows about deletions/volume=0 + collector.push(*book); + + if book.volume == 0 { + self.book_volumes.remove(&key); + } + } +} diff --git a/examples/databento_replay/analysis_stage.rs b/examples/databento_replay/analysis_stage.rs new file mode 100644 index 0000000..672ea50 --- /dev/null +++ b/examples/databento_replay/analysis_stage.rs @@ -0,0 +1,90 @@ +use crate::book_level_entry::BookLevelEntry; +use crate::book_level_top::BookLevelTop; +use crate::imbalance_signal::ImbalanceSignal; +use roda_state::{OutputCollector, Stage}; +use spdlog::prelude::*; +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +pub struct AnalysisStage { + book_tops: HashMap, + last_print: Instant, + counter: u64, +} + +impl Default for AnalysisStage { + fn default() -> Self { + Self { + book_tops: HashMap::new(), + last_print: Instant::now(), + counter: 0, + } + } +} + +impl Stage for AnalysisStage { + fn process(&mut self, entry: BookLevelEntry, collector: &mut C) + where + C: OutputCollector, + { + self.counter += 1; + let book_top = self + .book_tops + .entry(entry.symbol) + .or_insert_with(|| BookLevelTop { + symbol: entry.symbol, + ..Default::default() + }); + book_top.adjust(entry); + + let mut bid_vol = 0.0; + let mut ask_vol = 0.0; + + for (i, level) in book_top.bids.iter().enumerate() { + if level.price == 0 { + break; + } + let weight = 1.0 - (i as f64 * 0.2); + bid_vol += level.size as f64 * weight; + } + + for (i, level) in book_top.asks.iter().enumerate() { + if level.price == 0 { + break; + } + let weight = 1.0 - (i as f64 * 0.2); + ask_vol += level.size as f64 * weight; + } + + let total_vol = bid_vol + ask_vol; + if total_vol > 0.0 { + let imbalance = (bid_vol - ask_vol) / total_vol; + + // Produce the signal + collector.push(ImbalanceSignal { + ts: entry.ts, + symbol: entry.symbol, + imbalance, + bid_vol, + ask_vol, + }); + + if imbalance.abs() > 0.95 && self.last_print.elapsed() > Duration::from_millis(500) { + info!( + "[Sym:{}] Imbalance: {:.2} (B: {:.0}, A: {:.0})", + entry.symbol, imbalance, bid_vol, ask_vol + ); + self.last_print = Instant::now(); + } + } + } +} + +impl Drop for AnalysisStage { + fn drop(&mut self) { + info!( + "[System] Final Imbalance Signals processed: {}", + self.counter + ); + } +} diff --git a/examples/databento_replay/book_level_entry.rs b/examples/databento_replay/book_level_entry.rs new file mode 100644 index 0000000..bd9bff5 --- /dev/null +++ b/examples/databento_replay/book_level_entry.rs @@ -0,0 +1,12 @@ +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelEntry { + pub ts: u64, + pub symbol: u64, // or instrument_id + pub price: i64, + pub volume: u64, // "size" is also common + pub side: u8, // 0=Bid, 1=Ask + pub _pad: [u8; 7], +} diff --git a/examples/databento_replay/book_level_top.rs b/examples/databento_replay/book_level_top.rs new file mode 100644 index 0000000..d7fcb2e --- /dev/null +++ b/examples/databento_replay/book_level_top.rs @@ -0,0 +1,83 @@ +use crate::book_level_entry::BookLevelEntry; +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelTopEntry { + pub size: u64, + pub price: i64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct BookLevelTop { + pub ts: u64, + pub symbol: u64, // or instrument_id + pub asks: [BookLevelTopEntry; 5], + pub bids: [BookLevelTopEntry; 5], +} + +impl BookLevelTop { + pub(crate) fn adjust(&mut self, entry: BookLevelEntry) { + self.ts = entry.ts; + let levels = match entry.side { + b'A' => &mut self.asks, + b'B' => &mut self.bids, + _ => return, + }; + + if let Some(existing_idx) = levels.iter().position(|l| l.price == entry.price) { + if entry.volume == 0 { + for i in existing_idx..4 { + levels[i] = levels[i + 1]; + } + levels[4] = BookLevelTopEntry::default(); + } else { + levels[existing_idx].size = entry.volume; + } + return; + } + + if entry.volume > 0 { + // PASS ONLY THE SLICE: This avoids borrowing 'self' again + Self::insert_if_better(entry, levels); + } + } + + // Removed '&mut self' and changed to a static helper + fn insert_if_better(entry: BookLevelEntry, levels: &mut [BookLevelTopEntry; 5]) { + let is_ask = entry.side == b'A'; + + let pos = levels.iter().position(|l| { + if l.price == 0 { + return true; + } + if is_ask { + entry.price < l.price + } else { + entry.price > l.price + } + }); + + if let Some(i) = pos { + for j in (i + 1..5).rev() { + levels[j] = levels[j - 1]; + } + levels[i] = BookLevelTopEntry { + price: entry.price, + size: entry.volume, + }; + } + } +} + +impl From for BookLevelTop { + fn from(entry: BookLevelEntry) -> Self { + Self { + ts: entry.ts, + symbol: entry.symbol, + asks: [BookLevelTopEntry::default(); 5], + bids: [BookLevelTopEntry::default(); 5], + } + } +} diff --git a/examples/databento_replay/imbalance_signal.rs b/examples/databento_replay/imbalance_signal.rs new file mode 100644 index 0000000..6b25c5f --- /dev/null +++ b/examples/databento_replay/imbalance_signal.rs @@ -0,0 +1,11 @@ +use bytemuck::{Pod, Zeroable}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct ImbalanceSignal { + pub ts: u64, + pub symbol: u64, + pub imbalance: f64, + pub bid_vol: f64, + pub ask_vol: f64, +} diff --git a/examples/databento_replay/importer.rs b/examples/databento_replay/importer.rs new file mode 100644 index 0000000..bbfcae6 --- /dev/null +++ b/examples/databento_replay/importer.rs @@ -0,0 +1,41 @@ +use std::error::Error; +use std::path::PathBuf; +use std::time::Instant; + +use dbn::Record; +use dbn::decode::{DbnDecoder as Decoder, DecodeRecordRef}; +use dbn::enums::rtype; +use dbn::record::MboMsg; +use spdlog::prelude::*; + +// Use your specific high-level API modules +use crate::light_mbo_entry::LightMboEntry; +use roda_state::Appendable; + +pub fn import_mbo_file( + file: PathBuf, + market_store: &mut impl Appendable, +) -> Result<(), Box> { + info!("[Writer] Starting Feed Handler for {:?}...", file); + let start = Instant::now(); + let mut count = 0u64; + + // 1. Setup Decoder + let mut decoder = Decoder::from_zstd_file(&file)?; + + // 3. Hot Loop + while let Some(record) = decoder.decode_record_ref()? { + if record.header().rtype == rtype::MBO { + let msg = record.get::().unwrap(); + market_store.append(LightMboEntry::from(msg)); + count += 1; + } + } + + let duration = start.elapsed(); + info!( + "[Writer] Finished! Pushed {} updates in {:?}", + count, duration + ); + Ok(()) +} diff --git a/examples/databento_replay/light_mbo_entry.rs b/examples/databento_replay/light_mbo_entry.rs new file mode 100644 index 0000000..ab90fa6 --- /dev/null +++ b/examples/databento_replay/light_mbo_entry.rs @@ -0,0 +1,57 @@ +use bytemuck::{Pod, Zeroable}; +use dbn::record::MboMsg; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct LightMboEntry { + /// 1. The Event Timestamp (UNIX nanos). + /// Essential for detecting "Flash Crash" speed or latency. + pub ts: u64, + + /// 2. The Unique Order ID. + /// Critical for linking a 'Cancel' message back to the original 'Add'. + pub order_id: u64, + + /// 3. The Price. + /// Signed integer (fixed precision, usually 1e-9). + pub price: i64, + + /// 4. The Size (Quantity). + pub size: u32, + + // --- PACKING SECTION (32-Bit Alignment) --- + /// 5. The Instrument ID (from Header). + /// Needed if your store contains multiple symbols (e.g., MSFT and AAPL). + pub instrument_id: u32, + + // --- PACKING SECTION (8-Bit Alignment) --- + /// 6. Action (Add='A', Cancel='C', Modify='M', etc.) + /// We store as u8 to match the raw byte. + pub action: u8, + + /// 7. Side (Bid='B', Ask='A'). + pub side: u8, + + /// 8. Explicit Padding. + /// We have used: 8+8+8+4+4+1+1 = 34 bytes. + /// The next multiple of 8 (for u64 alignment) is 40. + /// So we need 6 bytes of padding. + pub _pad: [u8; 6], +} + +impl From<&MboMsg> for LightMboEntry { + fn from(msg: &MboMsg) -> Self { + Self { + ts: msg.hd.ts_event, + order_id: msg.order_id, + price: msg.price, + size: msg.size, + instrument_id: msg.hd.instrument_id, + // Cast char (i8) to u8 directly. + // 'A' is 65, 'B' is 66, etc. + action: msg.action as u8, + side: msg.side as u8, + _pad: [0; 6], + } + } +} diff --git a/examples/databento_replay/main.rs b/examples/databento_replay/main.rs new file mode 100644 index 0000000..2647881 --- /dev/null +++ b/examples/databento_replay/main.rs @@ -0,0 +1,68 @@ +use clap::Parser; +use spdlog::prelude::*; +use std::path::PathBuf; +use std::time::Duration; + +use roda_state::{StageEngine, latency, pipe, progress}; + +mod aggregation_stage; +mod analysis_stage; +mod book_level_entry; +mod book_level_top; +mod imbalance_signal; +mod importer; +mod light_mbo_entry; + +use crate::aggregation_stage::AggregationStage; +use crate::analysis_stage::AnalysisStage; +use importer::import_mbo_file; + +#[derive(Parser)] +struct Args { + #[arg(long)] + file: PathBuf, +} + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + info!("[System] Booting Roda Data Bento Replay with StageEngine..."); + + // 1. Initialize StageEngine with enough capacity for the input + // Using 30M as in original example + let mut engine = StageEngine::with_capacity(30_000_000); + engine.enable_latency_stats(true); + + // 2. Add Aggregation Stage: LightMboEntry -> BookLevelEntry + let engine = engine.add_stage_with_capacity( + 30_000_000, + pipe![ + progress("Aggregation", 10_000_000), + latency("Aggregation", 10_000_000, 1000, AggregationStage::default()) + ], + ); + + // 3. Add Imbalance Analysis Stage: BookLevelEntry -> ImbalanceSignal + let mut engine = engine.add_stage_with_capacity( + 30_000_000, + pipe![ + progress("Imbalance Analysis", 10_000_000), + latency( + "Imbalance Analysis", + 10_000_000, + 1000, + AnalysisStage::default() + ) + ], + ); + + import_mbo_file(args.file, &mut engine)?; + + info!("[System] Waiting for all stages to finish processing..."); + engine.await_idle(Duration::from_secs(600)); + + info!("[System] Final Imbalance Signals: {}", engine.output_size()); + info!("[System] Done!"); + + Ok(()) +} diff --git a/examples/hello_world.rs b/examples/hello_world.rs deleted file mode 100644 index 0868940..0000000 --- a/examples/hello_world.rs +++ /dev/null @@ -1,138 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Index, Store, StoreOptions, StoreReader}; -use roda_state::{Aggregator, RodaEngine, Window}; -use std::cmp::min; -// ============================================================================== -// 1. DATA CONTRACT -// ============================================================================== - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Tick { - pub symbol: u64, - pub price: f64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct OHLC { - pub symbol: u64, - pub open: f64, - pub high: f64, - pub low: f64, - pub close: f64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct TimeKey { - pub symbol: u64, - pub timestamp: u64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] -pub struct Signal { - pub symbol: u64, - pub timestamp: u64, - pub direction: i32, - pub size: u32, -} - -// ============================================================================== -// 2. DECLARATIVE PIPELINE EXAMPLE -// ============================================================================== - -fn main() { - let engine = RodaEngine::new(); - - // A. RESOURCES - let tick_store = engine.store::(StoreOptions { - name: "ticks", - size: 1_000_000, - in_memory: true, - }); - let tick_reader = tick_store.reader(); - let mut ohlc_store = engine.store::(StoreOptions { - name: "ohlc", - size: 10_000, - in_memory: true, - }); - let ohlc_reader = ohlc_store.reader(); - let mut simple_strategy = engine.store::(StoreOptions { - name: "simple_strategy", - size: 10_000, - in_memory: true, - }); - - // The Index tracks where specific candles live in the ring buffer - let ohlc_index = ohlc_store.direct_index::(); - - // B. PIPELINE - let ohlc_pipeline: Aggregator = Aggregator::new(); - let simple_strategy_pipeline: Window = Window::new(); - - // C. WORKER - engine.run_worker(move || { - tick_reader.next(); - - // 1. PARTITION: Map the Tick to a Candle ID (Construct the Key) - ohlc_pipeline - .from(&tick_reader) - .to(&mut ohlc_store) - .partition_by(|tick| TimeKey { - symbol: tick.symbol, - timestamp: tick.timestamp / 100_000, - }) - .reduce(|index, tick, candle| { - if index == 0 { - // Init (First tick in bucket) - candle.open = tick.price; - candle.high = tick.price; - candle.low = tick.price; - candle.close = tick.price; - - // Set Identity - candle.symbol = tick.symbol; - candle.timestamp = (tick.timestamp / 100_000) * 100_000; - } else { - // Update - candle.high = tick.price.max(candle.high); - candle.low = tick.price.min(candle.low); - candle.close = tick.price; - } - }); - - // 3. INDEX: Ensure the new candle is discoverable - // Note: Input is 'candle' (OHLC), not 'tick' - ohlc_index.compute(|candle| TimeKey { - symbol: candle.symbol, - timestamp: candle.timestamp / 100_000, - }); - }); - - engine.run_worker(move || { - ohlc_reader.next(); - - simple_strategy_pipeline - .from(&ohlc_reader) - .to(&mut simple_strategy) - .reduce(2, |candle| { - let cur = candle[1]; - let prev = candle[0]; - - if cur.close > prev.close { - return Some(Signal { - symbol: cur.symbol, - timestamp: cur.timestamp, - direction: 1, - size: min(100, (cur.close - prev.close) as u32), - }); - } - - None - }) - }); -} diff --git a/examples/sensor_test/main.rs b/examples/sensor_test/main.rs new file mode 100644 index 0000000..41a5288 --- /dev/null +++ b/examples/sensor_test/main.rs @@ -0,0 +1,67 @@ +mod models; + +use crate::models::{Alert, Reading, SensorKey, Summary}; +use roda_state::StageEngine; +use roda_state::pipe; +use roda_state::{delta, stateful}; +use std::time::Duration; + +fn main() { + println!("Starting Sensor Multistage Pipeline (Optimized)..."); + + // 1. Initialize StageEngine + let engine = StageEngine::::with_capacity(1000); + + // 2. Add Aggregation Stage: Reading -> Summary + let mut engine = engine + .add_stage(pipe![ + // Use stateful helper to handle the HashMap and windowing logic + stateful(SensorKey::from_reading, Summary::init, |state, r| state + .update(r)) + ]) + .add_stage(pipe![ + // Use delta to compare current summary to previous summary for the same sensor + delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + None + } + ) + ]); + + // 4. INGEST DATA + println!("\nPushing sensor readings..."); + let readings = [ + Reading::from(1, 10.0, 10_000), + Reading::from(1, 12.0, 20_000), + Reading::from(1, 20.0, 110_000), // Average jump + Reading::from(1, 22.0, 120_000), + ]; + + for r in readings { + engine.send(r); + } + + engine.await_idle(Duration::from_millis(100)); + + // 5. DISPLAY RESULTS + println!("\nAlerts Detected:"); + while let Some(alert) = engine.receive() { + println!( + "ALERT: Sensor {} anomaly at {}", + alert.sensor_id, alert.timestamp + ); + } + + println!("\nDone!"); +} diff --git a/examples/sensor_test/models.rs b/examples/sensor_test/models.rs new file mode 100644 index 0000000..d8f7b65 --- /dev/null +++ b/examples/sensor_test/models.rs @@ -0,0 +1,94 @@ +use bytemuck::{Pod, Zeroable}; + +/// Raw sensor reading +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +/// Statistical summary of readings for a time window +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +/// Key used for partitioning and indexing summaries +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +/// Alert generated when an anomaly is detected +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} + +impl SensorKey { + /// Helper to create a key aligned to a 100ms (100,000 unit) window. + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + // Aligning timestamp to the floor of the window + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +impl Summary { + /// Initialize a new summary bucket from the first reading encountered. + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + /// Update the existing summary with a new reading. + #[inline(always)] + pub fn update(&mut self, r: Reading) { + // Update Min/Max + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } + + // Online Average Calculation: + // new_avg = ((old_avg * count) + new_val) / (count + 1) + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} diff --git a/examples/service_health/main.rs b/examples/service_health/main.rs new file mode 100644 index 0000000..79e54f7 --- /dev/null +++ b/examples/service_health/main.rs @@ -0,0 +1,85 @@ +mod models; + +use models::{Alert, Reading, SensorKey, Summary}; +use roda_state::StageEngine; +use roda_state::pipe; +use roda_state::{dedup_by, delta, inspect, stateful}; +use std::time::Duration; + +fn main() { + println!("--- Starting StageEngine: Service Health Pipeline ---"); + + // 1. Initialize StageEngine (Initial entry type is Reading) + let engine = StageEngine::::with_capacity(1000); + + // 2. Add Aggregation Stage: Reading -> Summary + // We also include a deduplicator at the start to drop identical raw readings. + let engine = engine.add_stage(pipe![ + dedup_by(|r: &Reading| (r.sensor_id, (r.value * 1000.0) as u64)), // Noise filter + stateful(SensorKey::from_reading, Summary::init, Summary::update), + inspect(|s: &Summary| { + println!( + "STAGE 1 [AGG]: Sensor {} Avg updated to {:.2}", + s.sensor_id, s.avg + ); + }) + ]); + + // 3. Add Anomaly Detection Stage: Summary -> Alert + // Uses Delta to compare current state with previous known state for that sensor. + let mut engine = engine.add_stage(pipe![ + delta( + |s: &Summary| s.sensor_id, + |curr, prev| { + if let Some(p) = prev + && curr.avg > p.avg * 1.5 + { + // Logic: Alert if the average jumps by more than 50% + return Some(Alert { + sensor_id: curr.sensor_id, + timestamp: curr.timestamp, + severity: 1, + ..Default::default() + }); + } + None + } + ), + // Deduplicate Alerts: Only notify if the alert is new/changed for this sensor + dedup_by(|a: &Alert| a.sensor_id), + inspect(|a: &Alert| { + println!( + "STAGE 2 [ALERT]: 🚨 Anomaly detected for Sensor {}!", + a.sensor_id + ); + }) + ]); + + // 4. Ingest Data + println!("\nIngesting readings..."); + let readings = [ + Reading::from(1, 10.0, 10_000), // Baseline + Reading::from(1, 10.0, 20_000), // Duplicate (filtered by dedup) + Reading::from(1, 11.0, 30_000), // Small change + Reading::from(1, 25.0, 110_000), // Spike -> Triggers Alert + Reading::from(2, 5.0, 10_000), // New Sensor + ]; + + for r in readings { + engine.send(r); + } + + // Give workers time to finish processing + engine.await_idle(Duration::from_millis(100)); + + // 5. Display Results from the end of the pipeline + println!("\n--- Final Alert Journal ---"); + while let Some(alert) = engine.try_receive() { + println!( + "Received in Main: Alert for Sensor {} at {}", + alert.sensor_id, alert.timestamp + ); + } + + println!("\nDone."); +} diff --git a/examples/service_health/models.rs b/examples/service_health/models.rs new file mode 100644 index 0000000..2df1f3a --- /dev/null +++ b/examples/service_health/models.rs @@ -0,0 +1,88 @@ +use bytemuck::{Pod, Zeroable}; + +/// Raw sensor reading +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Reading { + pub sensor_id: u64, + pub value: f64, + pub timestamp: u64, +} + +impl Reading { + pub fn from(sensor_id: u64, value: f64, timestamp: u64) -> Self { + Self { + sensor_id, + value, + timestamp, + } + } +} + +/// Key used for partitioning and indexing summaries (100ms buckets) +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SensorKey { + pub sensor_id: u64, + pub timestamp: u64, +} + +impl SensorKey { + #[inline(always)] + pub fn from_reading(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + // Aligns to 100,000 unit (100ms) windows + timestamp: (r.timestamp / 100_000) * 100_000, + } + } +} + +/// Statistical summary of readings for a time window +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Summary { + pub sensor_id: u64, + pub min: f64, + pub max: f64, + pub avg: f64, + pub count: u64, + pub timestamp: u64, +} + +impl Summary { + #[inline(always)] + pub fn init(r: &Reading) -> Self { + Self { + sensor_id: r.sensor_id, + min: r.value, + max: r.value, + avg: r.value, + count: 1, + timestamp: (r.timestamp / 100_000) * 100_000, + } + } + + #[inline(always)] + pub fn update(&mut self, r: Reading) { + if r.value < self.min { + self.min = r.value; + } + if r.value > self.max { + self.max = r.value; + } + // Online average calculation + self.avg = (self.avg * self.count as f64 + r.value) / (self.count + 1) as f64; + self.count += 1; + } +} + +/// Alert generated when an anomaly is detected +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, Pod, Zeroable, PartialEq)] +pub struct Alert { + pub sensor_id: u64, + pub timestamp: u64, + pub severity: i32, + pub _pad0: i32, +} diff --git a/scripts/check.sh b/scripts/check.sh index 933eee7..b71af1c 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -6,9 +6,9 @@ echo "Running rustfmt..." cargo fmt --all --check echo "Running clippy..." -cargo clippy -- -D warnings +cargo clippy --all-targets -- -D warnings echo "Running tests..." -cargo test +cargo test --all-targets echo "All checks passed!" diff --git a/src/aggregator.rs b/src/aggregator.rs deleted file mode 100644 index 262c0c3..0000000 --- a/src/aggregator.rs +++ /dev/null @@ -1,184 +0,0 @@ -use crate::components::{Store, StoreReader}; -use bytemuck::Pod; -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::hash::Hash; -use std::marker::PhantomData; - -pub struct Aggregator { - pub(crate) _v: PhantomData, - pub(crate) _out_v: PhantomData, - pub(crate) _partition_key: PhantomData, - pub(crate) last_index: Cell, - pub(crate) states: RefCell>, -} - -impl Aggregator { - pub fn new() -> Aggregator { - Self { - _v: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, - last_index: Cell::new(0), - states: RefCell::new(HashMap::new()), - } - } -} - -impl Default - for Aggregator -{ - fn default() -> Self { - Self::new() - } -} - -impl - Aggregator -{ - pub fn from<'a, R: StoreReader>( - &'a self, - reader: &'a R, - ) -> AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> { - AggregatorFrom { - aggregator: self, - reader, - _in: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, - } - } - - pub fn pipe(_source: impl Store, _target: impl Store) -> Self { - Self::new() - } -} - -pub struct AggregatorFrom< - 'a, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: StoreReader, -> { - aggregator: &'a Aggregator, - reader: &'a R, - _in: PhantomData, - _out_v: PhantomData, - _partition_key: PhantomData, -} - -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, PartitionKey, R: StoreReader> - AggregatorFrom<'a, InValue, OutValue, PartitionKey, R> -{ - pub fn to<'b, S: Store>( - self, - store: &'b mut S, - ) -> AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> { - AggregatorTo { - aggregator: self.aggregator, - reader: self.reader, - store, - _in: PhantomData, - _out: PhantomData, - _partition_key: PhantomData, - } - } -} - -pub struct AggregatorTo< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: StoreReader, - S: Store, -> { - aggregator: &'a Aggregator, - reader: &'a R, - store: &'b mut S, - _in: PhantomData, - _out: PhantomData, - _partition_key: PhantomData, -} - -impl< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R: StoreReader, - S: Store, -> AggregatorTo<'a, 'b, InValue, OutValue, PartitionKey, R, S> -{ - pub fn partition_by( - self, - key_fn: F, - ) -> AggregatorPartition<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> - where - F: Fn(&InValue) -> PartitionKey, - { - AggregatorPartition { - aggregator: self.aggregator, - reader: self.reader, - store: self.store, - key_fn, - _in: PhantomData, - _out: PhantomData, - _key: PhantomData, - } - } -} - -pub struct AggregatorPartition< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey, - R, - S, - F, -> { - aggregator: &'a Aggregator, - reader: &'a R, - store: &'b mut S, - key_fn: F, - _in: PhantomData, - _out: PhantomData, - _key: PhantomData, -} - -impl<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> - AggregatorPartition<'a, 'b, InValue, OutValue, PartitionKey, R, S, F> -where - InValue: Pod + Send, - OutValue: Pod + Send, - PartitionKey: Hash + Eq + Send, - R: StoreReader, - S: Store, - F: Fn(&InValue) -> PartitionKey, -{ - pub fn reduce(self, mut update_fn: impl FnMut(u64, &InValue, &mut OutValue)) { - let mut states = self.aggregator.states.borrow_mut(); - let mut last_index = self.aggregator.last_index.get(); - - let current_index = self.reader.get_index(); - if current_index > last_index { - if let Some(val) = self.reader.get() { - let key = (self.key_fn)(&val); - let (index, mut state) = - states.get(&key).cloned().unwrap_or((0, OutValue::zeroed())); - - update_fn(index, &val, &mut state); - self.store.push(state); - - states.insert(key, (index + 1, state)); - } - last_index = current_index; - self.aggregator.last_index.set(last_index); - } - } -} diff --git a/src/components.rs b/src/components.rs index d99deb3..7055323 100644 --- a/src/components.rs +++ b/src/components.rs @@ -1,53 +1,18 @@ -use crate::index::DirectIndex; use bytemuck::Pod; -pub struct StoreOptions { - pub name: &'static str, - pub size: usize, - pub in_memory: bool, +/// For structures where we append data to the end (Journals, Logs). +pub trait Appendable { + fn append(&mut self, state: State); } -pub trait Engine { - fn run_worker(&self, runnable: impl FnMut() + Send + 'static); - fn store(&self, options: StoreOptions) -> impl Store + 'static; +/// For structures where we update a specific "address" or "slot" (State Maps, Arrays). +pub trait Settable { + fn set(&mut self, at: usize, state: State); } -pub trait Store: Send { - type Reader: StoreReader; - fn push(&mut self, state: State); - fn reader(&self) -> Self::Reader; - fn direct_index(&self) -> DirectIndex; -} - -pub trait StoreReader: Send { +/// The base for anything that can be read. +pub trait IterativeReadable { fn next(&self) -> bool; - fn get_index(&self) -> usize; - - fn with(&self, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option - where - Self: Sized; - fn get(&self) -> Option; - fn get_at(&self, at: usize) -> Option; - fn get_last(&self) -> Option; - fn get_window(&self, at: usize) -> Option<&[State]> - where - Self: Sized; -} - -pub trait Index { - type Reader: IndexReader; - fn compute(&self, key_fn: impl FnOnce(&State) -> Key); - fn reader(&self) -> Self::Reader; -} - -pub trait IndexReader { - fn with(&self, key: &Key, handler: impl FnOnce(&State) -> R) -> Option; - fn get(&self, key: &Key) -> Option; + fn get_index(&self) -> usize; } diff --git a/src/engine.rs b/src/engine.rs index fe57bf1..15e1f4f 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,28 +1,21 @@ -use crate::components::{Engine, Store, StoreOptions}; -use crate::store::StoreJournal; +use crate::journal_store::{JournalStore, JournalStoreOptions}; +use crate::measure::latency_measurer::LatencyMeasurer; +use crate::op_counter::OpCounter; +use crate::slot_store::{SlotStore, SlotStoreOptions}; use bytemuck::Pod; +use spdlog::info; use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::thread; +use std::thread::sleep; +use std::time::{Duration, Instant}; pub struct RodaEngine { root_path: &'static str, running: Arc, -} - -impl Engine for RodaEngine { - fn run_worker(&self, mut runnable: impl FnMut() + Send + 'static) { - let running = self.running.clone(); - thread::spawn(move || { - while running.load(std::sync::atomic::Ordering::Relaxed) { - runnable(); - } - }); - } - - fn store(&self, options: StoreOptions) -> impl Store + 'static { - StoreJournal::new(self.root_path, options, size_of::()) - } + enable_latency_stats: bool, + worker_handlers: Vec>, + op_counter: Arc, } impl RodaEngine { @@ -30,6 +23,9 @@ impl RodaEngine { Self { root_path: "data", running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], + op_counter: OpCounter::new(), } } @@ -37,8 +33,73 @@ impl RodaEngine { Self { root_path, running: Arc::new(AtomicBool::new(true)), + enable_latency_stats: false, + worker_handlers: vec![], + op_counter: OpCounter::new(), } } + + pub fn enable_latency_stats(&mut self, enable: bool) { + self.enable_latency_stats = enable; + } + + pub fn run_worker(&mut self, mut runnable: impl FnMut() + Send + 'static) { + let worker_id = self.worker_handlers.len(); + let running = self.running.clone(); + let enable_latency_stats = self.enable_latency_stats; + let handler = thread::spawn(move || { + if enable_latency_stats { + let mut measurer = LatencyMeasurer::new(1000); + while running.load(std::sync::atomic::Ordering::Relaxed) { + let instant = Instant::now(); + runnable(); + measurer.measure(instant.elapsed()); + } + info!("[Latency/Worker:{}]{}", worker_id, measurer.format_stats()); + } else { + while running.load(std::sync::atomic::Ordering::Relaxed) { + runnable(); + } + } + }); + self.worker_handlers.push(handler); + } + + pub fn new_journal_store( + &self, + options: JournalStoreOptions, + ) -> JournalStore { + JournalStore::new(self.root_path, self.op_counter.clone(), options) + } + + pub fn new_slot_store(&self, options: SlotStoreOptions) -> SlotStore { + SlotStore::new(self.root_path, self.op_counter.clone(), options) + } + + pub fn await_idle(&self, timeout: Duration) { + let start = Instant::now(); + let mut last_op_count = self.op_counter.total_op_count(); + loop { + sleep(Duration::from_millis(1)); + let new_op_count = self.op_counter.total_op_count(); + if new_op_count == last_op_count { + break; + } + if start.elapsed() > timeout { + break; + } + last_op_count = new_op_count; + } + } + + pub fn is_any_worker_panicked(&self) -> bool { + for handler in &self.worker_handlers { + if handler.is_finished() && self.running.load(std::sync::atomic::Ordering::Relaxed) { + return true; + } + } + false + } } impl Default for RodaEngine { @@ -51,5 +112,8 @@ impl Drop for RodaEngine { fn drop(&mut self) { self.running .store(false, std::sync::atomic::Ordering::Relaxed); + for handler in self.worker_handlers.drain(..) { + handler.join().unwrap(); + } } } diff --git a/src/index.rs b/src/index.rs deleted file mode 100644 index 808b99a..0000000 --- a/src/index.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::components::{Index, IndexReader, StoreReader}; -use bytemuck::Pod; -use crossbeam_skiplist::SkipMap; -use std::sync::Arc; - -pub struct DirectIndex> { - pub(crate) map: Arc>, - pub reader: Reader, -} - -pub struct DirectIndexReader { - pub(crate) map: Arc>, -} - -impl> Index for DirectIndex -where - Key: Pod + Ord + Send, - Value: Pod + Send, -{ - type Reader = DirectIndexReader; - fn compute(&self, key_fn: impl FnOnce(&Value) -> Key) { - if self.reader.next() - && let Some(value) = self.reader.get() - { - let key = key_fn(&value); - self.map.insert(key, value); - } - } - - fn reader(&self) -> DirectIndexReader { - DirectIndexReader { - map: self.map.clone(), - } - } -} - -impl IndexReader for DirectIndexReader -where - Key: Pod + Ord + Send, - Value: Pod + Send, -{ - fn with(&self, key: &Key, handler: impl FnOnce(&Value) -> R) -> Option { - self.map.get(key).map(|entry| handler(entry.value())) - } - - fn get(&self, key: &Key) -> Option { - self.map.get(key).map(|entry| *entry.value()) - } -} diff --git a/src/journal_store.rs b/src/journal_store.rs new file mode 100644 index 0000000..835030d --- /dev/null +++ b/src/journal_store.rs @@ -0,0 +1,175 @@ +use crate::components::{Appendable, IterativeReadable}; +use crate::op_counter::OpCounter; +use crate::storage::journal_mmap::JournalMmap; +use bytemuck::Pod; +use std::cell::Cell; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering::Relaxed; + +pub struct JournalStoreOptions { + pub name: &'static str, + pub size: usize, + pub in_memory: bool, +} + +pub struct JournalStore { + storage: JournalMmap, + op_counter: Arc, + _marker: std::marker::PhantomData, +} + +pub struct StoreJournalReader { + next_index: Cell, + storage: JournalMmap, + op_count: Arc, + _marker: std::marker::PhantomData, +} + +impl JournalStore { + pub fn new( + root_path: &'static str, + op_counter: Arc, + option: JournalStoreOptions, + ) -> Self { + let total_size = option.size * size_of::(); + let storage = if option.in_memory { + JournalMmap::new(None, total_size).unwrap() + } else { + let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); + if path.exists() { + JournalMmap::load(path).unwrap() + } else { + JournalMmap::new(Some(path), total_size).unwrap() + } + }; + + Self { + op_counter, + storage, + _marker: Default::default(), + } + } + + pub fn append(&mut self, state: State) { + let size = size_of::(); + let current_pos = self.storage.get_write_index(); + assert!( + current_pos + size <= self.storage.len(), + "Store is full. Capacity: {}, Current position: {}, State size: {}", + self.storage.len(), + current_pos, + size + ); + self.storage.append(&state); + } + + pub fn reader(&self) -> StoreJournalReader { + StoreJournalReader { + op_count: self.op_counter.new_counter(), + next_index: Cell::new(0), + storage: self.storage.reader(), + _marker: Default::default(), + } + } + + pub fn size(&self) -> usize { + self.storage.get_write_index() / size_of::() + } +} + +impl Appendable for JournalStore { + fn append(&mut self, state: State) { + self.append(state); + } +} + +impl StoreJournalReader { + pub fn next(&self) -> bool { + let index_to_read = self.next_index.get(); + let offset = index_to_read * size_of::(); + let write_index = self.storage.get_write_index(); + + if offset + size_of::() > write_index { + return false; + } + + self.next_index.set(index_to_read + 1); + self.op_count.fetch_add(1, Relaxed); + + true + } + + pub fn get_index(&self) -> usize { + self.next_index.get() + } + + pub fn with(&self, handler: impl FnOnce(&State) -> R) -> Option { + let next_index = self.next_index.get(); + if next_index == 0 { + return None; + } + let current_index = next_index - 1; + let offset = current_index * size_of::(); + Some(handler(self.storage.read(offset))) + } + + pub fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { + let offset = at * size_of::(); + let write_index = self.storage.get_write_index(); + if offset + size_of::() > write_index { + return None; + } + Some(handler(self.storage.read(offset))) + } + + pub fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option { + let write_index = self.storage.get_write_index(); + if write_index < size_of::() { + return None; + } + let offset = write_index - size_of::(); + Some(handler(self.storage.read(offset))) + } + + pub fn get(&self) -> Option { + self.with(|s| *s) + } + + pub fn get_at(&self, at: usize) -> Option { + self.with_at(at, |s| *s) + } + + pub fn get_last(&self) -> Option { + self.with_last(|s| *s) + } + + pub fn get_window(&self, at: usize) -> Option<&[State]> { + let offset = at * size_of::(); + let write_index = self.storage.get_write_index(); + if offset + size_of::() * N > write_index { + return None; + } + + Some(self.storage.read_window::(offset)) + } + + pub fn size(&self) -> usize { + self.storage.get_write_index() / size_of::() + } +} + +impl IterativeReadable for StoreJournalReader { + fn next(&self) -> bool { + self.next() + } + + fn get(&self) -> Option { + self.get() + } + + fn get_index(&self) -> usize { + self.get_index() + } +} diff --git a/src/lib.rs b/src/lib.rs index df35bc3..f7c1a4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,18 @@ -pub mod aggregator; -pub mod components; -pub mod engine; -pub mod index; +mod components; +mod engine; +mod journal_store; +mod macros; +pub mod measure; +mod op_counter; +mod pipe; +mod slot_store; +mod stage; +mod stage_engine; mod storage; -pub mod store; -pub mod window; -pub use crate::aggregator::Aggregator; +pub use crate::components::*; pub use crate::engine::RodaEngine; -pub use crate::index::{DirectIndex, DirectIndexReader}; -pub use crate::store::{StoreJournal, StoreJournalReader}; -pub use crate::window::Window; +pub use crate::journal_store::{JournalStore, JournalStoreOptions, StoreJournalReader}; +pub use crate::pipe::*; +pub use crate::stage::{OutputCollector, Stage, StageExt}; +pub use crate::stage_engine::StageEngine; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..d0274c9 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,10 @@ +#[macro_export] +macro_rules! pipe { + ($s1:expr) => { $s1 }; + ($s1:expr, $($rest:expr),+ $(,)?) => { + { + use $crate::StageExt; + $s1.pipe($crate::pipe!($($rest),+)) + } + }; +} diff --git a/src/measure/latency_measurer.rs b/src/measure/latency_measurer.rs new file mode 100644 index 0000000..bcc67fb --- /dev/null +++ b/src/measure/latency_measurer.rs @@ -0,0 +1,187 @@ +use hdrhistogram::Histogram; +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone, Default)] +pub struct LatencyStats { + pub count: u64, + pub min: u64, + pub max: u64, + pub mean: f64, + pub p50: u64, + pub p90: u64, + pub p99: u64, + pub p999: u64, + pub p9999: u64, +} + +pub struct LatencyMeasurerGuard<'a> { + measurer: &'a mut LatencyMeasurer, + start: Option, +} + +impl Drop for LatencyMeasurerGuard<'_> { + fn drop(&mut self) { + if let Some(start) = self.start { + self.measurer.measure_local(start.elapsed()); + } + } +} + +/// A latency measurer that uses hdrhistogram. +pub struct LatencyMeasurer { + histogram: Histogram, + sum: u64, + step_instant: Instant, + sample_rate: u64, + step: u64, +} + +impl LatencyMeasurer { + pub fn new(sample_rate: u64) -> Self { + assert!(sample_rate > 0, "sample_rate must be positive"); + // Range: 1ns to 1,000s (1,000,000,000,000 ns) + // 3 significant figures + let histogram = Histogram::::new_with_bounds(1, 1_000_000_000_000, 3).unwrap(); + Self { + histogram, + sum: 0, + sample_rate, + step_instant: Instant::now(), + step: 0, + } + } + + pub fn measure(&mut self, duration: Duration) { + self.step += 1; + if !self.step.is_multiple_of(self.sample_rate) { + return; + } + + self.measure_local(duration); + } + + fn measure_local(&mut self, duration: Duration) { + let count = self.sample_rate; + let nanos = duration.as_nanos() as u64; + let nanos = nanos.clamp(1, 1_000_000_000_000); + + self.histogram.record_n(nanos, count).unwrap(); + self.sum += nanos; + } + + pub fn measure_with_guard(&mut self) -> LatencyMeasurerGuard<'_> { + self.step += 1; + if !self.step.is_multiple_of(self.sample_rate) { + return LatencyMeasurerGuard { + measurer: self, + start: None, + }; + } + LatencyMeasurerGuard { + measurer: self, + start: Some(Instant::now()), + } + } + + pub fn step_measure(&mut self) { + self.step += 1; + if !self.step.is_multiple_of(self.sample_rate) { + return; + } + let elapsed = self.step_instant.elapsed(); + self.measure(elapsed); + self.step_instant = Instant::now(); + } + + pub fn reset(&mut self) { + self.histogram.reset(); + self.sum = 0; + } + + pub fn get_stats(&self) -> LatencyStats { + let count = self.histogram.len(); + if count == 0 { + return LatencyStats::default(); + } + + LatencyStats { + count, + min: self.histogram.min(), + max: self.histogram.max(), + mean: self.histogram.mean(), + p50: self.histogram.value_at_quantile(0.5), + p90: self.histogram.value_at_quantile(0.9), + p99: self.histogram.value_at_quantile(0.99), + p999: self.histogram.value_at_quantile(0.999), + p9999: self.histogram.value_at_quantile(0.9999), + } + } + + pub fn format_stats(&self) -> String { + let stats = self.get_stats(); + if stats.count == 0 { + return "No stats collected yet".into(); + } + + format!( + "\tmin={},\tmax={},\tmean={},\tp50={},\tp90={},\tp99={},\tp999={},\tp9999={}", + Self::format_duration(stats.min as f64), + Self::format_duration(stats.max as f64), + Self::format_duration(stats.mean), + Self::format_duration(stats.p50 as f64), + Self::format_duration(stats.p90 as f64), + Self::format_duration(stats.p99 as f64), + Self::format_duration(stats.p999 as f64), + Self::format_duration(stats.p9999 as f64), + ) + } + + fn format_duration(nanos: f64) -> String { + if nanos < 1000.0 { + if nanos == nanos.floor() { + format!("{:.0}ns", nanos) + } else { + format!("{:.1}ns", nanos) + } + } else if nanos < 1_000_000.0 { + let val = nanos / 1000.0; + if val == val.floor() { + format!("{:.0}us", val) + } else { + format!("{:.1}us", val) + } + } else if nanos < 1_000_000_000.0 { + let val = nanos / 1_000_000.0; + if val == val.floor() { + format!("{:.0}ms", val) + } else { + let s = format!("{:.2}ms", val); + if s.ends_with("0ms") { + format!("{:.1}ms", val) + } else { + s + } + } + } else { + let val = nanos / 1_000_000_000.0; + if val == val.floor() { + format!("{:.0}s", val) + } else { + let s = format!("{:.2}s", val); + if s.ends_with("0s") { + format!("{:.1}s", val) + } else { + s + } + } + } + } + + pub fn is_outlier(&self, duration: Duration) -> bool { + let stats = self.get_stats(); + if stats.count < 100 { + return false; + } + duration.as_nanos() as u64 > stats.p999 + } +} diff --git a/src/measure/mod.rs b/src/measure/mod.rs new file mode 100644 index 0000000..7dfaab5 --- /dev/null +++ b/src/measure/mod.rs @@ -0,0 +1,2 @@ +pub mod latency_measurer; +pub use latency_measurer::{LatencyMeasurer, LatencyStats}; diff --git a/src/op_counter.rs b/src/op_counter.rs new file mode 100644 index 0000000..e2ecb72 --- /dev/null +++ b/src/op_counter.rs @@ -0,0 +1,31 @@ +use std::sync::atomic::AtomicU64; +use std::sync::{Arc, Mutex}; + +pub struct OpCounter { + counters: Mutex>>, +} + +impl OpCounter { + pub fn new() -> Arc { + Arc::new(Self { + counters: Mutex::new(vec![]), + }) + } + + pub fn total_op_count(&self) -> u64 { + self.counters + .lock() + .unwrap() + .iter() + .map(|c| c.load(std::sync::atomic::Ordering::Relaxed)) + .sum() + } + + pub fn new_counter(&self) -> Arc { + let counter = Arc::new(AtomicU64::new(0)); + + self.counters.lock().unwrap().push(counter.clone()); + + counter + } +} diff --git a/src/pipe/dedup_by.rs b/src/pipe/dedup_by.rs new file mode 100644 index 0000000..979657d --- /dev/null +++ b/src/pipe/dedup_by.rs @@ -0,0 +1,38 @@ +use std::collections::HashMap; + +/// Only emits the event if the value associated with the key has changed. +pub fn dedup_by(mut key_fn: impl FnMut(&T) -> K) -> impl FnMut(T) -> Option +where + K: std::hash::Hash + Eq, + T: bytemuck::Pod + Send + Copy + PartialEq, +{ + let mut last_values: HashMap = HashMap::new(); + move |curr| { + let key = key_fn(&curr); + let prev = last_values.get(&key); + + if prev == Some(&curr) { + // Value hasn't changed; suppress the event + return None; + } + + // Value changed or is new; update cache and emit + last_values.insert(key, curr); + Some(curr) + } +} + +#[cfg(test)] +mod dedup_tests { + use super::*; + + #[test] + fn test_dedup_logic() { + let mut pipe = dedup_by(|_: &i32| 0); // Use a constant key for global consecutive dedup + + assert_eq!(pipe(10), Some(10)); // First time: pass + assert_eq!(pipe(10), None); // Same value: drop + assert_eq!(pipe(20), Some(20)); // New value: pass + assert_eq!(pipe(10), Some(10)); // Changed back: pass + } +} diff --git a/src/pipe/delta.rs b/src/pipe/delta.rs new file mode 100644 index 0000000..2c22246 --- /dev/null +++ b/src/pipe/delta.rs @@ -0,0 +1,45 @@ +use std::collections::HashMap; + +/// Compares current item with the previous item of the same key. +pub fn delta( + mut key_fn: impl FnMut(&T) -> K, + mut logic: impl FnMut(T, Option) -> Option, +) -> impl FnMut(T) -> Option +where + K: std::hash::Hash + Eq, + T: bytemuck::Pod + Send + Copy, + Out: bytemuck::Pod + Send, +{ + let mut last_values: HashMap = HashMap::new(); + move |curr| { + let key = key_fn(&curr); + let prev = last_values.get(&key).copied(); + last_values.insert(key, curr); + logic(curr, prev) + } +} + +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable, Debug, PartialEq)] +struct Metric { + pub id: u64, + pub val: f64, +} + +#[test] +fn test_delta_logic() { + // Return u8 (1 for alert, 0 for none) to satisfy Pod + let mut pipe = delta( + |m: &Metric| m.id, + |curr, prev| match prev { + Some(p) if curr.val >= p.val + 5.0 => Some(1u8), + _ => Some(0u8), + }, + ); + + let m1 = Metric { id: 1, val: 10.0 }; + let m2 = Metric { id: 1, val: 17.0 }; + + assert_eq!(pipe(m1), Some(0u8)); + assert_eq!(pipe(m2), Some(1u8)); // Alert triggered +} diff --git a/src/pipe/filter.rs b/src/pipe/filter.rs new file mode 100644 index 0000000..e8087da --- /dev/null +++ b/src/pipe/filter.rs @@ -0,0 +1,22 @@ +/// Only passes items that satisfy the predicate. +pub fn filter(mut predicate: impl FnMut(&T) -> bool) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + move |item| { + if predicate(&item) { Some(item) } else { None } + } +} + +#[cfg(test)] +mod filter_tests { + use super::*; + + #[test] + fn test_filter_logic() { + let mut pipe = filter(|x: &i32| *x > 0); + + assert_eq!(pipe(10), Some(10)); + assert_eq!(pipe(-5), None); + } +} diff --git a/src/pipe/inspect.rs b/src/pipe/inspect.rs new file mode 100644 index 0000000..8850612 --- /dev/null +++ b/src/pipe/inspect.rs @@ -0,0 +1,30 @@ +/// Passes the item through while performing a side effect. +pub fn inspect(mut f: impl FnMut(&T)) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + move |item| { + f(&item); + Some(item) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[test] + fn test_inspect_logic() { + let count = Arc::new(AtomicUsize::new(0)); + let mut pipe = inspect(|_x: &u32| { + count.fetch_add(1, Ordering::Relaxed); + }); + + let res = pipe(42); + + assert_eq!(res, Some(42)); + assert_eq!(count.load(Ordering::Relaxed), 1); + } +} diff --git a/src/pipe/latency.rs b/src/pipe/latency.rs new file mode 100644 index 0000000..ab6fe00 --- /dev/null +++ b/src/pipe/latency.rs @@ -0,0 +1,109 @@ +use crate::measure::latency_measurer::LatencyMeasurer; +use crate::stage::{OutputCollector, Stage}; +use bytemuck::Pod; +use spdlog::info; +use std::marker::PhantomData; + +/// A pipe that measures the latency of an inner stage. +pub struct Latency { + name: String, + report_interval: usize, + stage: S, + measurer: LatencyMeasurer, + count: usize, + _phantom: PhantomData<(In, Out)>, +} + +impl Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + pub fn new( + name: impl Into, + report_interval: usize, + sample_rate: u64, + stage: S, + ) -> Self { + Latency { + name: name.into(), + report_interval, + stage, + measurer: LatencyMeasurer::new(sample_rate), + count: 0, + _phantom: PhantomData, + } + } +} + +impl Stage for Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + { + let _guard = self.measurer.measure_with_guard(); + self.stage.process(data, collector); + } + self.count += 1; + if self.count.is_multiple_of(self.report_interval) { + info!("[{}] Latency: {}", self.name, self.measurer.format_stats()); + } + } +} + +pub fn latency( + name: impl Into, + interval: usize, + example_size: usize, + stage: S, +) -> Latency +where + In: Pod + Send, + Out: Pod + Send, + S: Stage, +{ + Latency::new(name, interval, example_size as u64, stage) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_latency_logic() { + let mut pipe = latency("test", 2, 1, |x: u32| { + thread::sleep(Duration::from_millis(10)); + Some(x as u64) + }); + + let mut out = Vec::new(); + + // Process 1st item + { + let mut collector = |x: u64| out.push(x); + pipe.process(1u32, &mut collector); + } + assert_eq!(out, vec![1]); + + // Process 2nd item - should trigger print + { + let mut collector = |x: u64| out.push(x); + pipe.process(2u32, &mut collector); + } + assert_eq!(out, vec![1, 2]); + + let stats = pipe.measurer.get_stats(); + assert_eq!(stats.count, 2); + assert!(stats.min >= 10_000_000); // at least 10ms in nanos + } +} diff --git a/src/pipe/map.rs b/src/pipe/map.rs new file mode 100644 index 0000000..0fa5799 --- /dev/null +++ b/src/pipe/map.rs @@ -0,0 +1,21 @@ +/// Transforms an item from one type to another. +pub fn map(mut f: impl FnMut(In) -> Out) -> impl FnMut(In) -> Option +where + In: bytemuck::Pod + Send, + Out: bytemuck::Pod + Send, +{ + move |item| Some(f(item)) +} + +#[cfg(test)] +mod map_tests { + use super::*; + + #[test] + fn test_map_logic() { + // Transform u32 to u64 + let mut pipe = map(|x: u32| x as u64 * 2); + + assert_eq!(pipe(21), Some(42u64)); + } +} diff --git a/src/pipe/mod.rs b/src/pipe/mod.rs new file mode 100644 index 0000000..fd75330 --- /dev/null +++ b/src/pipe/mod.rs @@ -0,0 +1,19 @@ +mod dedup_by; +mod delta; +mod filter; +mod inspect; +mod latency; +mod map; +mod progress; +mod stateful; +mod windowed; + +pub use dedup_by::dedup_by; +pub use delta::delta; +pub use filter::filter; +pub use inspect::inspect; +pub use latency::latency; +pub use map::map; +pub use progress::progress; +pub use stateful::stateful; +pub use windowed::windowed; diff --git a/src/pipe/progress.rs b/src/pipe/progress.rs new file mode 100644 index 0000000..ee686b1 --- /dev/null +++ b/src/pipe/progress.rs @@ -0,0 +1,106 @@ +use spdlog::info; +use std::time::Instant; + +/// A pipe that logs progress information. +pub fn progress(name: impl Into, interval: usize) -> impl FnMut(T) -> Option +where + T: bytemuck::Pod + Send, +{ + assert!(interval > 0, "interval must be greater than 0"); + let name = name.into(); + let mut count: usize = 0; + let mut last_instant = Instant::now(); + let start_instant = last_instant; + + move |item| { + count += 1; + if count.is_multiple_of(interval) { + let now = Instant::now(); + let elapsed = now.duration_since(last_instant); + let total_elapsed = now.duration_since(start_instant); + + let mps = interval as f64 / elapsed.as_secs_f64(); + let total_mps = count as f64 / total_elapsed.as_secs_f64(); + + info!( + "[{}] Processed {} messages, Rate: {} msg/s, Avg: {} msg/s", + name, + format_count(count as f64), + format_count(mps), + format_count(total_mps) + ); + last_instant = now; + } + Some(item) + } +} + +fn format_count(val: f64) -> String { + if val < 1000.0 { + if val == val.floor() { + format!("{:.0}", val) + } else { + format!("{:.2}", val) + } + } else if val < 1_000_000.0 { + format!("{:.2}k", val / 1000.0) + } else if val < 1_000_000_000.0 { + format!("{:.2}m", val / 1_000_000.0) + } else if val < 1_000_000_000_000.0 { + format!("{:.2}b", val / 1_000_000_000.0) + } else { + format!("{:.2}t", val / 1_000_000_000_000.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_progress_logic() { + let mut pipe = progress("test", 2); + + // Process 1st item + let res = pipe(1u32); + assert_eq!(res, Some(1)); + + // Process 2nd item - should trigger print + thread::sleep(Duration::from_millis(10)); + let res = pipe(2u32); + assert_eq!(res, Some(2)); + + // Process 3rd item + let res = pipe(3u32); + assert_eq!(res, Some(3)); + + // Process 4th item - should trigger print + thread::sleep(Duration::from_millis(10)); + let res = pipe(4u32); + assert_eq!(res, Some(4)); + } + + #[test] + fn test_progress_no_delay() { + let mut pipe = progress("test_fast", 2); + for i in 0..10 { + pipe(i); + } + } + + #[test] + fn test_format_count() { + assert_eq!(format_count(0.0), "0"); + assert_eq!(format_count(123.0), "123"); + assert_eq!(format_count(123.45), "123.45"); + assert_eq!(format_count(1000.0), "1.00k"); + assert_eq!(format_count(1234.0), "1.23k"); + assert_eq!(format_count(1_000_000.0), "1.00m"); + assert_eq!(format_count(1_234_567.0), "1.23m"); + assert_eq!(format_count(1_000_000_000.0), "1.00b"); + assert_eq!(format_count(1_234_567_890.0), "1.23b"); + assert_eq!(format_count(1_000_000_000_000.0), "1.00t"); + } +} diff --git a/src/pipe/stateful.rs b/src/pipe/stateful.rs new file mode 100644 index 0000000..a411dc4 --- /dev/null +++ b/src/pipe/stateful.rs @@ -0,0 +1,53 @@ +use std::collections::HashMap; + +/// Manages a per-key state for aggregations. +pub fn stateful( + mut key_fn: impl FnMut(&In) -> K, + mut init_fn: impl FnMut(&In) -> Out, + mut fold_fn: impl FnMut(&mut Out, In), +) -> impl FnMut(In) -> Option +where + K: std::hash::Hash + Eq, + In: bytemuck::Pod + Send, + Out: bytemuck::Pod + Send + Copy, +{ + let mut storage: HashMap = HashMap::new(); + move |item| { + let key = key_fn(&item); + let entry = storage + .entry(key) + .and_modify(|state| fold_fn(state, item)) + .or_insert_with(|| init_fn(&item)); + Some(*entry) + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Default, bytemuck::Pod, bytemuck::Zeroable)] +pub struct Message { + pub id: u64, + pub value: i64, +} + +#[cfg(test)] +mod stateful_tests { + use super::*; + + #[test] + fn test_stateful_logic() { + // Now using our Pod-compliant struct instead of a tuple + let mut pipe = stateful( + |item: &Message| item.id, // Key: ID + |item| item.value, // Init: First value + |state, item| *state += item.value, // Fold: Add new value + ); + + let m1 = Message { id: 1, value: 10 }; + let m2 = Message { id: 2, value: 5 }; + let m3 = Message { id: 1, value: 20 }; + + assert_eq!(pipe(m1), Some(10)); + assert_eq!(pipe(m2), Some(5)); + assert_eq!(pipe(m3), Some(30)); + } +} diff --git a/src/pipe/windowed.rs b/src/pipe/windowed.rs new file mode 100644 index 0000000..f41efeb --- /dev/null +++ b/src/pipe/windowed.rs @@ -0,0 +1,27 @@ +/// Aligns a timestamp to the start of a fixed-duration window. +#[inline(always)] +pub fn windowed(timestamp: u64, window_size: u64) -> u64 { + if window_size == 0 { + return timestamp; + } + (timestamp / window_size) * window_size +} + +#[cfg(test)] +mod window_tests { + use super::*; + + #[test] + fn test_window_alignment() { + let t1 = 150_200; + let t2 = 199_999; + let window = 100_000; + + // Both should fall into the 100,000 bucket + assert_eq!(windowed(t1, window), 100_000); + assert_eq!(windowed(t2, window), 100_000); + + // Next bucket + assert_eq!(windowed(200_001, window), 200_000); + } +} diff --git a/src/slot_store.rs b/src/slot_store.rs new file mode 100644 index 0000000..fe56e27 --- /dev/null +++ b/src/slot_store.rs @@ -0,0 +1,87 @@ +use crate::components::Settable; +use crate::op_counter::OpCounter; +use crate::storage::slot_mmap::SlotMmap; +// Using the new SlotMmap logic +use bytemuck::Pod; +use std::path::PathBuf; +use std::sync::Arc; + +pub struct SlotStore { + storage: SlotMmap, + pub op_counter: Arc, + num_slots: usize, +} + +pub struct SlotStoreReader { + storage: SlotMmap, +} + +pub struct SlotStoreOptions { + pub name: &'static str, + pub size: usize, + pub in_memory: bool, +} + +impl SlotStore { + pub fn new( + root_path: &'static str, + op_counter: Arc, + option: SlotStoreOptions, + ) -> Self { + let storage = if option.in_memory { + SlotMmap::new(None, option.size).unwrap() + } else { + let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); + if path.exists() { + SlotMmap::load(path).unwrap() + } else { + SlotMmap::new(Some(path), option.size).unwrap() + } + }; + + Self { + num_slots: option.size, + op_counter, + storage, + } + } + + /// Unique to SlotStore: Write to a specific slot instead of appending + pub fn update_at(&mut self, index: usize, state: State) { + self.storage.write(index, &state); + } + + pub fn reader(&self) -> SlotStoreReader { + SlotStoreReader { + storage: self.storage.reader(), + } + } + + pub fn size(&self) -> usize { + self.num_slots + } +} + +impl Settable for SlotStore { + fn set(&mut self, at: usize, state: State) { + self.update_at(at, state); + } +} + +impl SlotStoreReader { + /// Performs a consistent snapshot read with retry logic + pub fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { + // Using 100 retries to ensure we get a consistent L5 snapshot + self.storage + .read_snapshot_with_retry(at, 100) + .map(|state| handler(&state)) + } + + pub fn get_at(&self, at: usize) -> Option { + self.with_at(at, |s| *s) + } + + pub fn size(&self) -> usize { + self.storage.num_slots() + } +} diff --git a/src/stage.rs b/src/stage.rs new file mode 100644 index 0000000..80f611d --- /dev/null +++ b/src/stage.rs @@ -0,0 +1,123 @@ +use bytemuck::Pod; +use std::marker::PhantomData; + +pub trait Stage { + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector; +} + +pub trait OutputCollector { + fn push(&mut self, item: T); +} + +impl OutputCollector for F +where + F: FnMut(T), +{ + #[inline(always)] + fn push(&mut self, item: T) { + (self)(item); + } +} + +impl Stage for F +where + F: FnMut(In) -> Option, + In: Pod + Send, + Out: Pod + Send, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + // Execute the closure and pass the result downstream + let out = (self)(data); + if let Some(out) = out { + collector.push(out); + } + } +} + +pub struct Pipeline { + s1: S1, + s2: S2, + _phantom: PhantomData<(In, Mid, Out)>, +} + +impl Stage for Pipeline +where + In: Pod + Send, + Mid: Pod + Send, + Out: Pod + Send, + S1: Stage, + S2: Stage, +{ + #[inline(always)] + fn process(&mut self, data: In, collector: &mut C) + where + C: OutputCollector, + { + self.s1.process(data, &mut |mid| { + self.s2.process(mid, collector); + }); + } +} + +pub trait StageExt: Stage { + #[inline(always)] + fn pipe>(self, s2: S2) -> Pipeline + where + Self: Sized, + { + Pipeline { + s1: self, + s2, + _phantom: PhantomData, + } + } +} + +impl StageExt for S +where + In: Pod + Send, + Mid: Pod + Send, + S: Stage, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pipe; + + #[test] + fn test_pipe_closures() { + let mut p = pipe![|x: u32| Some(x as u64), |x: u64| Some(x as u8),]; + + let mut out = Vec::new(); + p.process(100u32, &mut |x: u8| out.push(x)); + assert_eq!(out, vec![100u8]); + } + + #[test] + fn test_pipe_one_to_many() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u64, collector: &mut C) + where + C: OutputCollector, + { + collector.push(data); + collector.push(data); + } + } + + let mut p = pipe![|x: u32| Some(x as u64), Duplicate, |x: u64| Some(x as u8),]; + + let mut out = Vec::new(); + p.process(10u32, &mut |x: u8| out.push(x)); + assert_eq!(out, vec![10u8, 10u8]); + } +} diff --git a/src/stage_engine.rs b/src/stage_engine.rs new file mode 100644 index 0000000..bf5d4a8 --- /dev/null +++ b/src/stage_engine.rs @@ -0,0 +1,218 @@ +use crate::components::Appendable; +use crate::stage::Stage; +use crate::{JournalStore, JournalStoreOptions, RodaEngine, StoreJournalReader}; +use bytemuck::Pod; +use std::thread; +use std::time::Duration; + +/// A threaded pipeline engine that grows by adding stages. +/// Each stage runs in its own thread and communicates via JournalStore. +pub struct StageEngine { + engine: RodaEngine, + input_store: JournalStore, + output_reader: StoreJournalReader, + stage_count: usize, + default_capacity: usize, +} + +impl StageEngine { + /// Adds a new stage to the pipeline. + /// This method consumes the current engine and returns a new one with the updated output type. + /// A new thread is spawned to run the provided stage. + pub fn add_stage + Send + 'static>( + self, + stage: S, + ) -> StageEngine { + let capacity = self.default_capacity; + self.add_stage_with_capacity(capacity, stage) + } + + /// Adds a new stage to the pipeline with a specific capacity for the output store. + pub fn add_stage_with_capacity< + NextOut: Pod + Send + 'static, + S: Stage + Send + 'static, + >( + mut self, + capacity: usize, + mut stage: S, + ) -> StageEngine { + let stage_idx = self.stage_count; + self.stage_count += 1; + + // Use a leaked string for the store name as JournalStoreOptions requires &'static str. + // In a production long-running system, we would use a more robust name management, + // but for a pipeline that lasts the lifetime of the process, this is acceptable. + let name = Box::leak(format!("stage_{}", stage_idx).into_boxed_str()); + + let mut next_store = self + .engine + .new_journal_store::(JournalStoreOptions { + name, + size: capacity, + in_memory: true, + }); + + let reader = self.output_reader; + let next_reader = next_store.reader(); + + self.engine.run_worker(move || { + if reader.next() { + if let Some(data) = reader.get() { + stage.process(data, &mut |out: NextOut| { + next_store.append(out); + }); + } + } else { + // Yield to prevent 100% CPU usage when no data is available + std::thread::yield_now(); + } + }); + + StageEngine { + engine: self.engine, + input_store: self.input_store, + output_reader: next_reader, + stage_count: self.stage_count, + default_capacity: self.default_capacity, + } + } + + /// Sends data into the start of the pipeline. + /// Requires &mut self because JournalStore::append requires it (Single-Writer). + pub fn send(&mut self, data: In) { + self.input_store.append(data); + } + + /// Receives data from the end of the pipeline. + /// This will block/poll until data is available. + pub fn receive(&self) -> Option { + loop { + if let Some(data) = self.try_receive() { + return Some(data); + } + if self.engine.is_any_worker_panicked() { + panic!("Worker panicked, pipeline is broken"); + } + thread::yield_now(); + } + } + + /// Tries to receive data from the end of the pipeline without blocking. + pub fn try_receive(&self) -> Option { + if self.output_reader.next() { + return self.output_reader.get(); + } + None + } + + /// Returns the number of items in the output store. + pub fn output_size(&self) -> usize { + self.output_reader.size() + } + + pub fn enable_latency_stats(&mut self, enabled: bool) { + self.engine.enable_latency_stats(enabled); + } + + /// Waits for all workers to finish processing. + pub fn await_idle(&self, timeout: Duration) { + self.engine.await_idle(timeout); + } +} + +impl Appendable for StageEngine { + fn append(&mut self, state: In) { + self.send(state); + } +} + +impl Default for StageEngine { + fn default() -> Self { + Self::new() + } +} + +impl StageEngine { + /// Creates a new engine with no stages. + /// Acts as a passthrough until stages are added. + pub fn new() -> Self { + Self::with_capacity(1024) + } + + /// Creates a new engine with a specific capacity for the input store. + pub fn with_capacity(capacity: usize) -> Self { + let engine = RodaEngine::new(); + let input_store = engine.new_journal_store(JournalStoreOptions { + name: "input", + size: capacity, + in_memory: true, + }); + let output_reader = input_store.reader(); + + Self { + engine, + input_store, + output_reader, + stage_count: 0, + default_capacity: capacity, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn test_new_engine_threaded_pipeline() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| Some(x as u64)) + .add_stage(|x: u64| Some(x as u8)); + + engine.send(100u32); + + let result = engine.receive(); + assert_eq!(result, Some(100u8)); + } + + #[test] + fn test_new_engine_multiple_outputs() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u32, collector: &mut C) + where + C: crate::stage::OutputCollector, + { + collector.push(data); + collector.push(data + 1); + } + } + + let mut engine = StageEngine::::new() + .add_stage(Duplicate) + .add_stage(|x: u32| Some(x as u64)); + + engine.send(10u32); + + assert_eq!(engine.receive(), Some(10u64)); + assert_eq!(engine.receive(), Some(11u64)); + } + + #[test] + fn test_engine_concurrency() { + let mut engine = StageEngine::::new().add_stage(|x: u32| { + // Simulate some work + thread::sleep(Duration::from_millis(10)); + Some(x * 2) + }); + + engine.send(1); + engine.send(2); + engine.send(3); + + assert_eq!(engine.receive(), Some(2)); + assert_eq!(engine.receive(), Some(4)); + assert_eq!(engine.receive(), Some(6)); + } +} diff --git a/src/storage/journal_mmap.rs b/src/storage/journal_mmap.rs new file mode 100644 index 0000000..d71aee1 --- /dev/null +++ b/src/storage/journal_mmap.rs @@ -0,0 +1,336 @@ +use bytemuck::Pod; +use memmap2::{MmapMut, MmapOptions}; +use std::fs::OpenOptions; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::AtomicUsize; + +pub(crate) struct JournalMmap { + _mmap: Arc, + ptr: *mut u8, + len: usize, + write_index: Arc, + read_only: bool, +} + +impl JournalMmap { + /// CREATE: Creates a brand new file, truncating any existing data. + pub(crate) fn new(path: Option, total_size: usize) -> Result { + let mut mmap = if let Some(p) = &path { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(p)?; + + file.set_len(total_size as u64)?; + unsafe { MmapOptions::new().map_mut(&file)? } + } else { + MmapOptions::new().len(total_size).map_anon()? + }; + + let ptr = mmap.as_mut_ptr(); + let len = mmap.len(); + Ok(Self { + _mmap: Arc::new(mmap), + ptr, + len, + write_index: Arc::new(Default::default()), + read_only: false, + }) + } + + /// OPEN: Loads an existing file and maps its current size. + pub(crate) fn load(path: PathBuf) -> Result { + let file = OpenOptions::new().read(true).write(true).open(&path)?; + + let mut mmap = unsafe { MmapOptions::new().map_mut(&file)? }; + + let ptr = mmap.as_mut_ptr(); + let len = mmap.len(); + Ok(Self { + _mmap: Arc::new(mmap), + ptr, + len, + write_index: Arc::new(Default::default()), + read_only: false, + }) + } + + // --- Bytemuck Methods --- + + /// 1. Read (Immutable) + /// + /// Casts bytes at offset to a reference of T. + pub(crate) fn read(&self, offset: usize) -> &T { + let end = offset + size_of::(); + assert!( + end <= self.len, + "Read crosses buffer boundary - alignment issue?" + ); + bytemuck::from_bytes(&self.slice()[offset..end]) + } + + pub(crate) fn read_window(&self, offset: usize) -> &[T] { + let end = offset + size_of::() * N; + assert!( + end <= self.len, + "Read crosses buffer boundary - alignment issue?" + ); + let bytes = &self.slice()[offset..end]; + + bytemuck::cast_slice(bytes) + } + + pub(crate) fn append(&mut self, state: &T) { + let current_pos = self.write_index.load(std::sync::atomic::Ordering::Relaxed); + let size = size_of::(); + let end = current_pos + size; + + let dest_slice = self.slice_mut(); + + // Check for boundary crossing + assert!( + end <= dest_slice.len(), + "Journal is full. Cannot append more data." + ); + + // Perform the write + dest_slice[current_pos..end].copy_from_slice(bytemuck::bytes_of(state)); + + self.write_index + .store(end, std::sync::atomic::Ordering::Release); + } + + fn slice(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } + + fn slice_mut(&mut self) -> &mut [u8] { + assert!(!self.read_only, "Cannot mutate read-only buffer"); + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + + pub(crate) fn get_write_index(&self) -> usize { + self.write_index.load(std::sync::atomic::Ordering::Acquire) + } + + pub(crate) fn len(&self) -> usize { + self.len + } + + pub(crate) fn reader(&self) -> JournalMmap { + JournalMmap { + _mmap: self._mmap.clone(), + ptr: self.ptr, + len: self.len, + write_index: self.write_index.clone(), + read_only: true, + } + } +} + +unsafe impl Send for JournalMmap {} + +#[cfg(test)] +mod tests { + use super::*; + use bytemuck::Zeroable; + use std::thread; + use std::time::Duration; + + #[test] + fn test_new_anonymous() { + let size = 1024; + let journal = JournalMmap::new(None, size).unwrap(); + assert_eq!(journal.len(), size); + assert_eq!(journal.get_write_index(), 0); + } + + #[test] + fn test_append_and_read() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let val: u32 = 0x12345678; + journal.append(&val); + assert_eq!(journal.get_write_index(), 4); + + let read_val: u32 = *journal.read(0); + assert_eq!(read_val, val); + } + + #[test] + fn test_append_multiple() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + journal.append(&10u64); + journal.append(&20u64); + assert_eq!(journal.get_write_index(), 16); + + assert_eq!(*journal.read::(0), 10); + assert_eq!(*journal.read::(8), 20); + } + + #[test] + fn test_read_window() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + journal.append(&1u32); + journal.append(&2u32); + journal.append(&3u32); + + let window: &[u32] = journal.read_window::(0); + assert_eq!(window, &[1, 2, 3]); + } + + #[test] + #[should_panic(expected = "Journal is full. Cannot append more data.")] + fn test_boundary_append() { + let mut journal = JournalMmap::new(None, 4).unwrap(); + journal.append(&1u32); + journal.append(&1u8); // This should panic + } + + #[test] + #[should_panic(expected = "Read crosses buffer boundary")] + fn test_boundary_read() { + let journal = JournalMmap::new(None, 4).unwrap(); + let _: &u64 = journal.read(0); // Should panic + } + + #[test] + #[should_panic(expected = "Read crosses buffer boundary")] + fn test_boundary_read_window() { + let mut journal = JournalMmap::new(None, 8).unwrap(); + journal.append(&1u32); + journal.append(&2u32); + let _: &[u32] = journal.read_window::(0); // Should panic + } + + #[test] + fn test_reader_concurrency() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let reader = journal.reader(); + + let handle = thread::spawn(move || { + let mut last_idx = 0; + let mut count = 0; + while count < 10 { + let current_idx = reader.get_write_index(); + if current_idx > last_idx { + let val: u32 = *reader.read(last_idx); + assert_eq!(val, count); + last_idx += std::mem::size_of::(); + count += 1; + } + thread::yield_now(); + } + }); + + for i in 0..10u32 { + journal.append(&i); + thread::sleep(Duration::from_millis(1)); + } + + handle.join().unwrap(); + } + + #[test] + #[should_panic(expected = "Cannot mutate read-only buffer")] + fn test_reader_cannot_append() { + let journal = JournalMmap::new(None, 1024).unwrap(); + let mut reader = journal.reader(); + reader.append(&1u32); + } + + #[test] + fn test_file_backed() { + let path = std::env::temp_dir().join(format!("test_journal_{}.mmap", std::process::id())); + if path.exists() { + let _ = std::fs::remove_file(&path); + } + + { + let mut journal = JournalMmap::new(Some(path.clone()), 1024).unwrap(); + journal.append(&123u64); + } + + { + let journal = JournalMmap::load(path.clone()).unwrap(); + assert_eq!(journal.len(), 1024); + // write_index is not persisted + assert_eq!(journal.get_write_index(), 0); + assert_eq!(*journal.read::(0), 123u64); + } + + let _ = std::fs::remove_file(&path); + } + + #[repr(C)] + #[derive(Copy, Clone, Debug, Pod, Zeroable, PartialEq)] + struct LargeData { + a: u64, + b: u64, + c: u64, + d: u64, + } + + #[test] + fn test_reader_no_corruption() { + let mut journal = JournalMmap::new(None, 1024 * 1024).unwrap(); + let reader = journal.reader(); + + let handle = thread::spawn(move || { + let mut last_idx = 0; + while last_idx < 1000 * size_of::() { + let current_idx = reader.get_write_index(); + while last_idx < current_idx { + let data: LargeData = *reader.read(last_idx); + // Check if data is corrupted (a, b, c, d should all be equal to the same value) + assert_eq!( + data.a, data.b, + "Data corruption detected at index {}", + last_idx + ); + assert_eq!( + data.a, data.c, + "Data corruption detected at index {}", + last_idx + ); + assert_eq!( + data.a, data.d, + "Data corruption detected at index {}", + last_idx + ); + last_idx += size_of::(); + } + thread::yield_now(); + } + }); + + for i in 0..1000u64 { + let data = LargeData { + a: i, + b: i, + c: i, + d: i, + }; + journal.append(&data); + } + + handle.join().unwrap(); + } + + #[test] + fn test_immediate_read() { + let mut journal = JournalMmap::new(None, 1024).unwrap(); + let val: u64 = 0xDEADBEEFCAFEBABE; + journal.append(&val); + + // Data should be immediately available at the expected offset + let read_val: u64 = *journal.read(0); + assert_eq!(read_val, val); + + let val2: u64 = 0x1122334455667788; + journal.append(&val2); + assert_eq!(*journal.read::(8), val2); + } +} diff --git a/src/storage/mmap_journal.rs b/src/storage/mmap_journal.rs deleted file mode 100644 index 2d17cc0..0000000 --- a/src/storage/mmap_journal.rs +++ /dev/null @@ -1,134 +0,0 @@ -use bytemuck::Pod; -use memmap2::{MmapMut, MmapOptions}; -use std::fs::OpenOptions; -use std::path::PathBuf; -use std::sync::Arc; -use std::sync::atomic::AtomicUsize; - -pub(crate) struct MmapJournal { - _mmap: Arc, - ptr: *mut u8, - len: usize, - write_index: Arc, - read_only: bool, -} - -impl MmapJournal { - /// CREATE: Creates a brand new file, truncating any existing data. - pub fn new(path: Option, total_size: usize) -> Result { - let mut mmap = if let Some(p) = &path { - let file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(p)?; - - file.set_len(total_size as u64)?; - unsafe { MmapOptions::new().map_mut(&file)? } - } else { - MmapOptions::new().len(total_size).map_anon()? - }; - - let ptr = mmap.as_mut_ptr(); - let len = mmap.len(); - Ok(Self { - _mmap: Arc::new(mmap), - ptr, - len, - write_index: Arc::new(Default::default()), - read_only: false, - }) - } - - /// OPEN: Loads an existing file and maps its current size. - pub fn load(path: PathBuf) -> Result { - let file = OpenOptions::new().read(true).write(true).open(&path)?; - - let mut mmap = unsafe { MmapOptions::new().map_mut(&file)? }; - - let ptr = mmap.as_mut_ptr(); - let len = mmap.len(); - Ok(Self { - _mmap: Arc::new(mmap), - ptr, - len, - write_index: Arc::new(Default::default()), - read_only: false, - }) - } - - // --- Bytemuck Methods --- - - /// 1. Read (Immutable) - /// - /// Casts bytes at offset to a reference of T. - pub fn read(&self, offset: usize) -> &T { - let end = offset + size_of::(); - assert!( - end <= self.len, - "Read crosses buffer boundary - alignment issue?" - ); - bytemuck::from_bytes(&self.slice()[offset..end]) - } - - pub(crate) fn read_window(&self, offset: usize) -> &[T] { - let end = offset + size_of::() * N; - assert!( - end <= self.len, - "Read crosses buffer boundary - alignment issue?" - ); - let bytes = &self.slice()[offset..end]; - - bytemuck::cast_slice(bytes) - } - - pub fn append(&mut self, state: &T) { - let current_pos = self.write_index.load(std::sync::atomic::Ordering::Relaxed); - let size = size_of::(); - let end = current_pos + size; - - let dest_slice = self.slice_mut(); - - // Check for boundary crossing - assert!( - end <= dest_slice.len(), - "Journal is full. Cannot append more data." - ); - - // Perform the write - dest_slice[current_pos..end].copy_from_slice(bytemuck::bytes_of(state)); - - self.write_index - .store(end, std::sync::atomic::Ordering::Release); - } - - fn slice(&self) -> &[u8] { - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } - - fn slice_mut(&mut self) -> &mut [u8] { - assert!(!self.read_only, "Cannot mutate read-only buffer"); - unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } - } - - pub(crate) fn get_write_index(&self) -> usize { - self.write_index.load(std::sync::atomic::Ordering::Acquire) - } - - pub(crate) fn len(&self) -> usize { - self.len - } - - pub(crate) fn reader(&self) -> MmapJournal { - MmapJournal { - _mmap: self._mmap.clone(), - ptr: self.ptr, - len: self.len, - write_index: self.write_index.clone(), - read_only: true, - } - } -} - -unsafe impl Send for MmapJournal {} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c83b146..075644a 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1 +1,2 @@ -pub mod mmap_journal; +pub mod journal_mmap; +pub mod slot_mmap; diff --git a/src/storage/slot_mmap.rs b/src/storage/slot_mmap.rs new file mode 100644 index 0000000..05b32c1 --- /dev/null +++ b/src/storage/slot_mmap.rs @@ -0,0 +1,344 @@ +use bytemuck::Pod; +use memmap2::{MmapMut, MmapOptions}; +use std::fs::OpenOptions; +use std::hint::spin_loop; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +pub struct SlotMmap { + _mmap: Arc, + ptr: *mut u8, + num_slots: usize, + slot_size: usize, + _marker: std::marker::PhantomData, +} + +impl SlotMmap { + pub fn new(path: Option, num_slots: usize) -> Result { + // We manually calculate the slot size. + // 8 bytes for version + T + padding to reach 64-byte alignment (cache line). + let slot_size = 8 + size_of::(); + + let mut mmap = if let Some(p) = path { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(p)?; + + file.set_len((num_slots * slot_size) as u64)?; + unsafe { MmapOptions::new().map_mut(&file)? } + } else { + MmapOptions::new().len(num_slots * slot_size).map_anon()? + }; + + Ok(Self { + ptr: mmap.as_mut_ptr(), + num_slots, + slot_size, + _mmap: Arc::new(mmap), + _marker: std::marker::PhantomData, + }) + } + + /// OPEN: Loads an existing file and maps its current size. + pub fn load(path: PathBuf) -> Result { + let file = OpenOptions::new().read(true).write(true).open(&path)?; + + let mmap = unsafe { MmapOptions::new().map_mut(&file)? }; + + let len = mmap.len(); + + let slot_size = 8 + size_of::(); + let num_slots = len / slot_size; + Ok(Self { + ptr: mmap.as_ptr() as *mut u8, + num_slots, + slot_size, + _mmap: Arc::new(mmap), + _marker: std::marker::PhantomData, + }) + } + + /// WRITER: Updates the specific slot by index. + pub fn write(&mut self, index: usize, state: &T) { + assert!(index < self.num_slots); + let offset = index * self.slot_size; + + unsafe { + let version_ptr = self.ptr.add(offset) as *const AtomicU64; + + // 1. Increment to ODD + (*version_ptr).fetch_add(1, Ordering::Relaxed); + std::sync::atomic::fence(Ordering::SeqCst); + + // 2. Copy data + let data_ptr = self.ptr.add(offset + 8); + std::ptr::copy_nonoverlapping( + bytemuck::bytes_of(state).as_ptr(), + data_ptr, + std::mem::size_of::(), + ); + + // 3. Increment to EVEN + std::sync::atomic::fence(Ordering::SeqCst); + (*version_ptr).fetch_add(1, Ordering::Relaxed); + } + } + + /// READER: Snapshot with spin-retry logic. + pub fn read_snapshot_with_retry(&self, index: usize, max_retries: usize) -> Option { + assert!(index < self.num_slots); + let offset = index * self.slot_size; + + unsafe { + let version_ptr = self.ptr.add(offset) as *const AtomicU64; + let data_ptr = self.ptr.add(offset + 8); + + for _ in 0..max_retries { + let v1 = (*version_ptr).load(Ordering::Relaxed); + std::sync::atomic::fence(Ordering::SeqCst); + + if v1.is_multiple_of(2) { + let mut data: T = std::mem::zeroed(); + std::ptr::copy_nonoverlapping( + data_ptr, + &mut data as *mut T as *mut u8, + std::mem::size_of::(), + ); + + std::sync::atomic::fence(Ordering::SeqCst); + let v2 = (*version_ptr).load(Ordering::Relaxed); + if v1 == v2 { + return Some(data); + } + } + spin_loop(); + } + } + None + } + + pub fn reader(&self) -> Self { + Self { + _mmap: self._mmap.clone(), + ptr: self.ptr, + num_slots: self.num_slots, + slot_size: self.slot_size, + _marker: std::marker::PhantomData, + } + } + + pub fn num_slots(&self) -> usize { + self.num_slots + } +} + +unsafe impl Send for SlotMmap {} +unsafe impl Sync for SlotMmap {} + +#[cfg(test)] +mod tests { + use super::*; + use bytemuck::Zeroable; + use std::thread; + + #[repr(C)] + #[derive(Copy, Clone, Debug, Pod, Zeroable, PartialEq)] + struct TestData { + a: u64, + b: u64, + c: u64, + d: u64, + } + + #[test] + fn test_new_anonymous() { + let mut slot_mmap = SlotMmap::::new(None, 10).unwrap(); + assert_eq!(slot_mmap.num_slots(), 10); + + let data = TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }; + slot_mmap.write(0, &data); + + let read_data = slot_mmap.read_snapshot_with_retry(0, 10).unwrap(); + assert_eq!(data, read_data); + } + + #[test] + fn test_file_backed() { + let path = std::env::temp_dir().join(format!("test_slots_{}.mmap", std::process::id())); + if path.exists() { + let _ = std::fs::remove_file(&path); + } + + { + let mut slot_mmap = SlotMmap::::new(Some(path.clone()), 5).unwrap(); + slot_mmap.write( + 2, + &TestData { + a: 10, + b: 20, + c: 30, + d: 40, + }, + ); + } + + { + let slot_mmap = SlotMmap::::load(path.clone()).unwrap(); + assert_eq!(slot_mmap.num_slots(), 5); + let data = slot_mmap.read_snapshot_with_retry(2, 10).unwrap(); + assert_eq!( + TestData { + a: 10, + b: 20, + c: 30, + d: 40 + }, + data + ); + } + + let _ = std::fs::remove_file(&path); + } + + #[test] + #[should_panic] + fn test_boundary_write() { + let mut slot_mmap = SlotMmap::::new(None, 5).unwrap(); + slot_mmap.write( + 5, + &TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }, + ); + } + + #[test] + #[should_panic] + fn test_boundary_read() { + let slot_mmap = SlotMmap::::new(None, 5).unwrap(); + slot_mmap.read_snapshot_with_retry(5, 10); + } + + #[test] + fn test_multithreaded_consistency() { + let mut slot_mmap = SlotMmap::::new(None, 1).unwrap(); + let reader = slot_mmap.reader(); + + let writer_thread = thread::spawn(move || { + for i in 0..1_000_000 { + slot_mmap.write( + 0, + &TestData { + a: i, + b: i, + c: i, + d: i, + }, + ); + } + }); + + let reader_thread = thread::spawn(move || { + let mut success_count = 0; + for _ in 0..1_000_000 { + if let Some(data) = reader.read_snapshot_with_retry(0, 100) { + success_count += 1; + assert_eq!( + data.a, data.b, + "Data corruption detected! a: {}, b: {}", + data.a, data.b + ); + assert_eq!( + data.a, data.c, + "Data corruption detected! a: {}, b: {}", + data.a, data.c + ); + assert_eq!( + data.a, data.d, + "Data corruption detected! a: {}, b: {}", + data.a, data.d + ); + } + } + assert!(success_count > 0, "Reader thread made no successful reads"); + }); + + writer_thread.join().unwrap(); + reader_thread.join().unwrap(); + } + + #[test] + fn test_multiple_readers_consistency() { + let mut slot_mmap = SlotMmap::::new(None, 1).unwrap(); + + let mut readers = vec![]; + for _ in 0..4 { + readers.push(slot_mmap.reader()); + } + + let writer_thread = thread::spawn(move || { + for i in 0..1_000_000 { + slot_mmap.write( + 0, + &TestData { + a: i, + b: i, + c: i, + d: i, + }, + ); + } + }); + + let mut reader_threads = vec![]; + for reader in readers { + reader_threads.push(thread::spawn(move || { + let mut success_count = 0; + for _ in 0..1_000_000 { + if let Some(data) = reader.read_snapshot_with_retry(0, 100) { + success_count += 1; + assert_eq!(data.a, data.b); + assert_eq!(data.a, data.c); + assert_eq!(data.a, data.d); + } + } + assert!(success_count > 0, "Reader thread made no successful reads"); + })); + } + + writer_thread.join().unwrap(); + for t in reader_threads { + t.join().unwrap(); + } + } + + #[test] + fn test_reader_cloning() { + let mut slot_mmap = SlotMmap::::new(None, 10).unwrap(); + let reader1 = slot_mmap.reader(); + let reader2 = reader1.reader(); + + let data = TestData { + a: 1, + b: 2, + c: 3, + d: 4, + }; + slot_mmap.write(5, &data); + + assert_eq!(reader1.read_snapshot_with_retry(5, 10), Some(data)); + assert_eq!(reader2.read_snapshot_with_retry(5, 10), Some(data)); + } +} diff --git a/src/store.rs b/src/store.rs deleted file mode 100644 index 9a7e162..0000000 --- a/src/store.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::components::{Store, StoreOptions, StoreReader}; -use crate::index::DirectIndex; -use crate::storage::mmap_journal::MmapJournal; -use bytemuck::Pod; -use std::cell::Cell; -use std::path::PathBuf; - -pub struct StoreJournal { - storage: MmapJournal, -} - -pub struct StoreJournalReader { - next_index: Cell, - storage: MmapJournal, -} - -impl StoreJournal { - pub fn new(root_path: &'static str, option: StoreOptions, state_size: usize) -> Self { - let total_size = option.size * state_size; - let storage = if option.in_memory { - MmapJournal::new(None, total_size).unwrap() - } else { - let path: PathBuf = format!("{}/{}.store", root_path, option.name).into(); - if path.exists() { - MmapJournal::load(path).unwrap() - } else { - MmapJournal::new(Some(path), total_size).unwrap() - } - }; - - Self { storage } - } -} - -impl Store for StoreJournal { - type Reader = StoreJournalReader; - - fn push(&mut self, state: State) { - let size = size_of::(); - let current_pos = self.storage.get_write_index(); - assert!( - current_pos + size <= self.storage.len(), - "Store is full. Capacity: {}, Current position: {}, State size: {}", - self.storage.len(), - current_pos, - size - ); - self.storage.append(&state); - } - - fn reader(&self) -> StoreJournalReader { - StoreJournalReader { - next_index: Cell::new(0), - storage: self.storage.reader(), - } - } - - fn direct_index(&self) -> DirectIndex { - DirectIndex { - map: std::sync::Arc::new(crossbeam_skiplist::SkipMap::new()), - reader: StoreJournalReader { - next_index: Cell::new(0), - storage: self.storage.reader(), - }, - } - } -} - -impl StoreReader for StoreJournalReader { - fn next(&self) -> bool { - let index_to_read = self.next_index.get(); - let offset = index_to_read * size_of::(); - let write_index = self.storage.get_write_index(); - - if offset + size_of::() > write_index { - return false; - } - - self.next_index.set(index_to_read + 1); - - true - } - - fn get_index(&self) -> usize { - self.next_index.get() - } - - fn with(&self, handler: impl FnOnce(&State) -> R) -> Option { - let next_index = self.next_index.get(); - if next_index == 0 { - return None; - } - let current_index = next_index - 1; - let offset = current_index * size_of::(); - Some(handler(self.storage.read(offset))) - } - - fn with_at(&self, at: usize, handler: impl FnOnce(&State) -> R) -> Option { - let offset = at * size_of::(); - let write_index = self.storage.get_write_index(); - if offset + size_of::() > write_index { - return None; - } - Some(handler(self.storage.read(offset))) - } - - fn with_last(&self, handler: impl FnOnce(&State) -> R) -> Option { - let write_index = self.storage.get_write_index(); - if write_index < size_of::() { - return None; - } - let offset = write_index - size_of::(); - Some(handler(self.storage.read(offset))) - } - - fn get(&self) -> Option { - self.with(|s| *s) - } - - fn get_at(&self, at: usize) -> Option { - self.with_at(at, |s| *s) - } - - fn get_last(&self) -> Option { - self.with_last(|s| *s) - } - - fn get_window(&self, at: usize) -> Option<&[State]> { - let offset = at * size_of::(); - let write_index = self.storage.get_write_index(); - if offset + size_of::() * N > write_index { - return None; - } - - Some(self.storage.read_window::(offset)) - } -} diff --git a/src/window.rs b/src/window.rs deleted file mode 100644 index 165aeea..0000000 --- a/src/window.rs +++ /dev/null @@ -1,120 +0,0 @@ -use crate::components::{Store, StoreReader}; -use bytemuck::Pod; -use std::cell::{Cell, RefCell}; -use std::marker::PhantomData; - -pub struct Window { - pub(crate) _v: PhantomData, - pub(crate) _out_v: PhantomData, - pub(crate) last_index: Cell, - pub(crate) buffer: RefCell>, -} - -impl Window { - pub fn new() -> Window { - Self { - _v: PhantomData, - _out_v: PhantomData, - last_index: Cell::new(0), - buffer: RefCell::new(Vec::new()), - } - } -} - -impl Default for Window { - fn default() -> Self { - Self::new() - } -} - -impl Window { - pub fn from<'a, R: StoreReader>( - &'a self, - reader: &'a R, - ) -> WindowFrom<'a, InValue, OutValue, R> { - WindowFrom { - window: self, - reader, - _in: PhantomData, - _out_v: PhantomData, - } - } - - pub fn pipe(_source: impl StoreReader, _target: impl Store) -> Self { - Self::new() - } -} - -pub struct WindowFrom<'a, InValue: Pod + Send, OutValue: Pod + Send, R: StoreReader> { - window: &'a Window, - reader: &'a R, - _in: PhantomData, - _out_v: PhantomData, -} - -impl<'a, InValue: Pod + Send, OutValue: Pod + Send, R: StoreReader> - WindowFrom<'a, InValue, OutValue, R> -{ - pub fn to<'b, S: Store>( - self, - store: &'b mut S, - ) -> WindowTo<'a, 'b, InValue, OutValue, R, S> { - WindowTo { - window: self.window, - reader: self.reader, - store, - _in: PhantomData, - _out: PhantomData, - } - } -} - -pub struct WindowTo< - 'a, - 'b, - InValue: Pod + Send, - OutValue: Pod + Send, - R: StoreReader, - S: Store, -> { - window: &'a Window, - reader: &'a R, - store: &'b mut S, - _in: PhantomData, - _out: PhantomData, -} - -impl<'a, 'b, InValue, OutValue, R, S> WindowTo<'a, 'b, InValue, OutValue, R, S> -where - InValue: Pod + Send, - OutValue: Pod + Send, - R: StoreReader, - S: Store, -{ - pub fn reduce( - &mut self, - window_size: u32, - mut update_fn: impl FnMut(&[InValue]) -> Option, - ) { - let mut buffer = self.window.buffer.borrow_mut(); - let mut last_index = self.window.last_index.get(); - - let current_index = self.reader.get_index(); - if current_index > last_index { - if let Some(val) = self.reader.get() { - buffer.push(val); - if buffer.len() > window_size as usize { - buffer.remove(0); - } - - if buffer.len() == window_size as usize - && let Some(out) = update_fn(&buffer) - { - self.store.push(out); - } - } - last_index = current_index; - self.window.last_index.set(last_index); - } - } -} diff --git a/tests/aggregator_tests.rs b/tests/aggregator_tests.rs deleted file mode 100644 index c0337cb..0000000 --- a/tests/aggregator_tests.rs +++ /dev/null @@ -1,409 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; -use roda_state::{Aggregator, RodaEngine}; - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct SensorReading { - pub value: f64, - pub sensor_id: u16, - pub _pad: [u8; 6], -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct SensorStats { - pub sum: f64, - pub min: f64, - pub max: f64, - pub count: u32, - pub sensor_id: u16, - pub _pad: [u8; 2], -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct GroupKey { - pub sensor_id: u16, - pub group_id: u16, -} - -#[test] -fn test_aggregator_count_and_sum() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - stats.sum += reading.value; - }); - }); - - // Push readings - source.push(SensorReading { - sensor_id: 1, - value: 10.0, - ..Default::default() - }); - source.push(SensorReading { - sensor_id: 1, - value: 20.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate the final aggregated result by get_window from the target - let res = target_reader.get_window::<2>(0).unwrap(); - assert_eq!(res[1].sensor_id, 1); - assert_eq!(res[1].count, 2); - assert_eq!(res[1].sum, 30.0); -} - -#[test] -fn test_aggregator_min_max_tracking() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { - if index == 0 { - stats.min = reading.value; - stats.max = reading.value; - } else { - stats.min = stats.min.min(reading.value); - stats.max = stats.max.max(reading.value); - } - stats.sensor_id = reading.sensor_id; - }); - }); - - // Push readings - source.push(SensorReading { - sensor_id: 1, - value: 10.0, - ..Default::default() - }); - source.push(SensorReading { - sensor_id: 1, - value: 20.0, - ..Default::default() - }); - source.push(SensorReading { - sensor_id: 1, - value: 5.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window from the target - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[2].min, 5.0); - assert_eq!(res[2].max, 20.0); -} - -#[test] -fn test_aggregator_multiple_partitions() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - // Push readings across partitions - source.push(SensorReading { - sensor_id: 1, - value: 1.0, - ..Default::default() - }); - source.push(SensorReading { - sensor_id: 2, - value: 2.0, - ..Default::default() - }); - source.push(SensorReading { - sensor_id: 1, - value: 3.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window all results - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].sensor_id, 1); - assert_eq!(res[0].count, 1); - assert_eq!(res[1].sensor_id, 2); - assert_eq!(res[1].count, 1); - assert_eq!(res[2].sensor_id, 1); - assert_eq!(res[2].count, 2); -} - -#[test] -fn test_aggregator_complex_key() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation with complex key inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| GroupKey { - sensor_id: r.sensor_id, - group_id: (r.value / 10.0) as u16, - }) - .reduce(|index, reading, stats| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - source.push(SensorReading { - sensor_id: 1, - value: 15.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1>(0).unwrap(); - assert_eq!(res[0].sensor_id, 1); - assert_eq!(res[0].count, 1); -} - -#[test] -fn test_aggregator_reset_behavior() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - }); - }); - - // Push several readings for sensor 1 - for i in 0..5 { - source.push(SensorReading { - sensor_id: 1, - value: i as f64, - ..Default::default() - }); - } - - // Switch to sensor 2 - source.push(SensorReading { - sensor_id: 2, - value: 100.0, - ..Default::default() - }); - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate get_window results: first 5 for sensor 1 with counts 1..5, then sensor 2 with count 1 - let res = target_reader.get_window::<6>(0).unwrap(); - for (i, item) in res.iter().enumerate().take(5) { - assert_eq!(item.sensor_id, 1); - assert_eq!(item.count, (i as u32) + 1); - } - assert_eq!(res[5].sensor_id, 2); - assert_eq!(res[5].count, 1); -} - -#[test] -fn test_aggregator_large_index() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 1024, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 1024, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let aggregator: Aggregator = Aggregator::new(); - - // Run aggregation inside worker - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, _reading, stats| { - stats.count = (index + 1) as u32; - }); - }); - - // Simulate 1000 items in one partition - for i in 0..1000 { - source.push(SensorReading { - sensor_id: 1, - value: i as f64, - ..Default::default() - }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate all results - let res = target_reader.get_window::<1000>(0).unwrap(); - for (i, item) in res.iter().enumerate().take(1000) { - assert_eq!(item.count, (i as u32) + 1); - } -} - -#[test] -fn test_aggregator_worker_large() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 2000, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 2000, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - - let aggregator: Aggregator = Aggregator::new(); - - engine.run_worker(move || { - source_reader.next(); - aggregator - .from(&source_reader) - .to(&mut target) - .partition_by(|r| r.sensor_id) - .reduce(|index, reading, stats| { - stats.sensor_id = reading.sensor_id; - stats.count = (index + 1) as u32; - stats.sum += reading.value; - }); - }); - - for _ in 0..1000 { - source.push(SensorReading { - sensor_id: 1, - value: 1.0, - ..Default::default() - }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1000>(0).unwrap(); - assert_eq!(res[999].count, 1000); - assert_eq!(res[999].sum, 1000.0); -} diff --git a/tests/comprehensive_tests.rs b/tests/comprehensive_tests.rs index 772a79c..b6fa46d 100644 --- a/tests/comprehensive_tests.rs +++ b/tests/comprehensive_tests.rs @@ -1,12 +1,12 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions, StoreReader}; use std::sync::{Arc, Barrier}; use std::thread; #[test] fn test_store_reader_edge_cases() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "edge_cases", size: 1024, in_memory: true, @@ -26,7 +26,7 @@ fn test_store_reader_edge_cases() { // 4. get before next() assert_eq!(reader.get(), None); - store.push(42); + store.append(42); // 5. get before next() but after push assert_eq!(reader.get(), None); @@ -55,38 +55,18 @@ fn test_store_reader_edge_cases() { assert_eq!(reader.with_last(|&v| v), Some(42)); } -#[test] -fn test_index_reader_with_and_get() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "index_with", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - store.push(123); - index.compute(|&v| v); - let reader = index.reader(); - - assert_eq!(reader.get(&123), Some(123)); - assert_eq!(reader.with(&123, |&v| v), Some(123)); - - assert_eq!(reader.get(&456), None); - assert_eq!(reader.with(&456, |_| 1), None); -} - #[test] fn test_store_full_capacity() { let engine = RodaEngine::new(); let num_items = 10; - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "full_capacity", size: num_items, in_memory: true, }); for i in 0..num_items { - store.push(i as u64); + store.append(i as u64); } let reader = store.reader(); @@ -97,7 +77,7 @@ fn test_store_full_capacity() { assert!(!reader.next()); // This should panic if it exceeds capacity - // However, looking at store.rs: + // However, looking at journal_store: // self.storage.append(&state); // and MmapJournal::append // Let's see what happens if we push one more. @@ -107,25 +87,25 @@ fn test_store_full_capacity() { #[should_panic(expected = "Store is full")] fn test_store_overflow_panic() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "overflow", size: 1, in_memory: true, }); - store.push(1); - store.push(2); // Should panic here + store.append(1); + store.append(2); // Should panic here } #[test] fn test_store_concurrent_load() { let engine = Arc::new(RodaEngine::new()); - let store_options = StoreOptions { + let store_options = JournalStoreOptions { name: "concurrent_load", size: 1024 * 1024, in_memory: true, }; - let mut store = engine.store::(store_options); + let mut store = engine.new_journal_store::(store_options); let num_readers = 4; let num_pushes = 1000; @@ -163,7 +143,7 @@ fn test_store_concurrent_load() { barrier.wait(); for i in 1..=num_pushes { - store.push(i as u32); + store.append(i as u32); } let mut total_read = 0; @@ -173,132 +153,3 @@ fn test_store_concurrent_load() { assert_eq!(total_read, num_readers * num_pushes); } - -#[test] -fn test_index_load_and_edge_cases() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "index_edge", - size: 1024 * 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let index_reader = index.reader(); - - // 1. compute on empty store - index.compute(|&v| v); - assert_eq!(index_reader.get(&0), None); - - // 2. Load test - let num_items = 1000; - for i in 0..num_items { - store.push(i as u64); - index.compute(|&v| v); - } - - for i in 0..num_items { - assert_eq!(index_reader.get(&(i as u64)), Some(i as u64)); - } - - // 3. Duplicate keys (overwrites) - store.push(100); // 1001st item - index.compute(|&v| v); // index the 100th -> 100 (key 100) - - store.push(10000); // 1002nd item - index.compute(|_v| 100); // Force key 100 to map to value 10000 - assert_eq!(index_reader.get(&100), Some(10000)); -} - -#[test] -fn test_index_concurrent_compute() { - let engine = Arc::new(RodaEngine::new()); - let mut store = engine.store::(StoreOptions { - name: "index_concurrent", - size: 1024 * 1024, - in_memory: true, - }); - let index = std::sync::Mutex::new(store.direct_index::()); - let index = Arc::new(index); - - let num_items = 5000; - for i in 0..num_items { - store.push(i as u32); - } - - let num_workers = 5; - let barrier = Arc::new(Barrier::new(num_workers)); - let mut workers = Vec::new(); - - for _ in 0..num_workers { - let b = barrier.clone(); - let idx = index.clone(); - workers.push(thread::spawn(move || { - b.wait(); - loop { - let mut found = false; - { - let idx_locked = idx.lock().unwrap(); - idx_locked.compute(|&v| { - found = true; - v - }); - } - if !found { - break; - } - } - })); - } - - for worker in workers { - worker.join().unwrap(); - } - - let index_reader = index.lock().unwrap().reader(); - for i in 0..num_items { - assert_eq!(index_reader.get(&(i as u32)), Some(i as u32)); - } -} - -#[test] -fn test_index_reader_concurrent_get() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "index_read_concurrent", - size: 1024 * 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - let num_items = 1000; - for i in 0..num_items { - store.push(i as u32); - index.compute(|&v| v); - } - - let reader = Arc::new(index.reader()); - let num_threads = 8; - let mut threads = Vec::new(); - let barrier = Arc::new(Barrier::new(num_threads)); - - for _t in 0..num_threads { - let r = reader.clone(); - let b = barrier.clone(); - threads.push(thread::spawn(move || { - b.wait(); - for i in 0..num_items { - // Mix get and with - if i % 2 == 0 { - assert_eq!(r.get(&(i as u32)), Some(i as u32)); - } else { - let val = r.with(&(i as u32), |&v| v); - assert_eq!(val, Some(i as u32)); - } - } - })); - } - - for thread in threads { - thread.join().unwrap(); - } -} diff --git a/tests/index_tests.rs b/tests/index_tests.rs deleted file mode 100644 index d9660ee..0000000 --- a/tests/index_tests.rs +++ /dev/null @@ -1,267 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::RodaEngine; -use roda_state::components::{Engine, Index, IndexReader, Store, StoreOptions}; -use std::thread; -use std::time::Duration; - -#[repr(C)] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Pod, Zeroable)] -struct ComplexKey { - id: u32, - category: u32, -} - -#[test] -fn test_index_multiple_values() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - for i in 0..5 { - store.push(i); - } - - // Index them all - for _ in 0..5 { - index.compute(|x| x * 10); - } - - let reader = index.reader(); - for i in 0..5 { - assert_eq!(reader.get(&(i * 10)), Some(i)); - } -} - -#[test] -fn test_multiple_indices_on_same_store() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - - let index_double = store.direct_index::(); - let index_triple = store.direct_index::(); - - store.push(10); - - index_double.compute(|x| x * 2); - index_triple.compute(|x| x * 3); - - let reader_double = index_double.reader(); - let reader_triple = index_triple.reader(); - - assert_eq!(reader_double.get(&20), Some(10)); - assert_eq!(reader_triple.get(&30), Some(10)); -} - -#[test] -fn test_index_complex_key() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - store.push(100); - index.compute(|&val| ComplexKey { - id: val, - category: 1, - }); - - let reader = index.reader(); - assert_eq!( - reader.get(&ComplexKey { - id: 100, - category: 1 - }), - Some(100) - ); - assert_eq!( - reader.get(&ComplexKey { - id: 100, - category: 2 - }), - None - ); -} - -#[test] -fn test_index_shallow_clone_sharing() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let clone1 = index.reader(); - let clone2 = index.reader(); - - store.push(42); - index.compute(|&x| x); - - assert_eq!(clone1.get(&42), Some(42)); - assert_eq!(clone2.get(&42), Some(42)); -} - -#[test] -fn test_index_collision_overwrite() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - // Both 10 and 20 will map to key 1 - store.push(10); - store.push(20); - - index.compute(|_| 1); - index.compute(|_| 1); - - let reader = index.reader(); - // Usually a direct index mapping should store the latest value for a given key - assert_eq!(reader.get(&1), Some(20)); -} - -#[test] -fn test_index_not_found() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - store.push(10); - index.compute(|x| x + 1); - - let reader = index.reader(); - assert_eq!(reader.get(&11), Some(10)); - assert_eq!(reader.get(&999), None); -} - -#[test] -fn test_concurrent_push_and_index() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - let index_reader = index.reader(); - - // Spawn a worker to index everything that comes in - engine.run_worker(move || { - for _ in 0..10 { - index.compute(|&x| x); - } - }); - - // Push values from another thread (main thread) - for i in 0..10 { - store.push(i); - // Give worker some time to process - thread::sleep(Duration::from_millis(1)); - } - - // Give some extra time for the last ones to be indexed - thread::sleep(Duration::from_millis(20)); - - for i in 0..10 { - assert_eq!(index_reader.get(&i), Some(i)); - } -} - -#[test] -fn test_run_worker_with_multiple_stores() { - let engine = RodaEngine::new(); - let mut store_u32 = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let mut store_string = engine.store::<[u8; 16]>(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - - let index_u32 = store_u32.direct_index::(); - let index_string = store_string.direct_index::(); - - // Prepare read-only readers for assertions after workers complete - let index_u32_reader = index_u32.reader(); - let index_string_reader = index_string.reader(); - - for _ in 0..10 { - store_u32.push(100); - } - - let mut pushed_u32 = false; - engine.run_worker(move || { - if !pushed_u32 { - store_u32.push(100); - pushed_u32 = true; - } - index_u32.compute(|&x| x); - }); - - let mut pushed_string = false; - engine.run_worker(move || { - if !pushed_string { - let mut bytes = [0u8; 16]; - bytes[..5].copy_from_slice(b"hello"); - store_string.push(bytes); - pushed_string = true; - } - index_string.compute(|s: &[u8; 16]| s.iter().take_while(|&&b| b != 0).count()); - }); - - // Wait for workers - thread::sleep(Duration::from_millis(50)); - - assert_eq!(index_u32_reader.get(&100), Some(100)); - let res_bytes = index_string_reader.get(&5).unwrap(); - assert_eq!(&res_bytes[..5], b"hello"); -} - -#[test] -fn test_multiple_workers_reading_index_only_original_computes() { - let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { - name: "test", - size: 1024, - in_memory: true, - }); - let index = store.direct_index::(); - - let reader1 = index.reader(); - let reader2 = index.reader(); - - store.push(1); - store.push(2); - - // Only the original index can compute; shallow clones are read-only - engine.run_worker(move || { - index.compute(|&x| x * 10); - index.compute(|&x| x * 10); - }); - - thread::sleep(Duration::from_millis(50)); - - assert_eq!(reader1.get(&10), Some(1)); - assert_eq!(reader2.get(&20), Some(2)); -} diff --git a/tests/journal_tests.rs b/tests/journal_tests.rs index c81b68b..c91427e 100644 --- a/tests/journal_tests.rs +++ b/tests/journal_tests.rs @@ -1,33 +1,33 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; #[test] #[should_panic(expected = "Store is full")] fn test_journal_panic_when_full() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "full_test", size: 2, // Can hold only 2 u64 in_memory: true, }); - store.push(1); - store.push(2); - store.push(3); // This should panic + store.append(1); + store.append(2); + store.append(3); // This should panic } #[test] fn test_journal_no_circularity() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_circular_test", size: 2, in_memory: true, }); let reader = store.reader(); - store.push(1); - store.push(2); + store.append(1); + store.append(2); assert_eq!(reader.get_at(0), Some(1)); assert_eq!(reader.get_at(1), Some(2)); diff --git a/tests/logic_tests.rs b/tests/logic_tests.rs index 2bdeb39..e0edb52 100644 --- a/tests/logic_tests.rs +++ b/tests/logic_tests.rs @@ -1,10 +1,10 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; #[test] fn test_reader_next_and_with_logic() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "logic_test", size: 1024, in_memory: true, @@ -16,7 +16,7 @@ fn test_reader_next_and_with_logic() { assert!(reader.with(|&x| x).is_none()); // Push one value - store.push(100); + store.append(100); // next() should now be true assert!(reader.next()); @@ -29,7 +29,7 @@ fn test_reader_next_and_with_logic() { assert_eq!(reader.with(|&x| x), Some(100)); // Push another value - store.push(200); + store.append(200); // next() should be true assert!(reader.next()); @@ -40,16 +40,16 @@ fn test_reader_next_and_with_logic() { #[test] fn test_reader_get_at_and_last() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "logic_test_2", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(10); - store.push(20); - store.push(30); + store.append(10); + store.append(20); + store.append(30); assert_eq!(reader.get_at(0), Some(10)); assert_eq!(reader.get_at(1), Some(20)); diff --git a/tests/push_read_tests.rs b/tests/push_read_tests.rs index 06d5521..2277965 100644 --- a/tests/push_read_tests.rs +++ b/tests/push_read_tests.rs @@ -1,17 +1,17 @@ +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; #[test] fn test_push_then_read_single() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test1", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(42); + store.append(42); let res = reader.get_window::<1>(0).unwrap(); assert_eq!(res[0], 42); @@ -20,7 +20,7 @@ fn test_push_then_read_single() { #[test] fn test_multiple_push_read_in_order() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test2", size: 1024, in_memory: true, @@ -28,7 +28,7 @@ fn test_multiple_push_read_in_order() { let reader = store.reader(); for v in [1u32, 2, 3, 4, 5] { - store.push(v); + store.append(v); } let res = reader.get_window::<5>(0).unwrap(); @@ -40,7 +40,7 @@ fn test_multiple_push_read_in_order() { #[test] fn test_interleaved_push_and_read() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test3", size: 1024, in_memory: true, @@ -48,10 +48,10 @@ fn test_interleaved_push_and_read() { let reader = store.reader(); // Push values; verify FIFO order via get_window - store.push(10); - store.push(20); - store.push(30); - store.push(40); + store.append(10); + store.append(20); + store.append(30); + store.append(40); let res = reader.get_window::<4>(0).unwrap(); assert_eq!(res[0], 10); @@ -64,12 +64,12 @@ fn test_interleaved_push_and_read() { fn test_stores_are_isolated_by_type() { let engine = RodaEngine::new(); - let mut u_store = engine.store::(StoreOptions { + let mut u_store = engine.new_journal_store::(JournalStoreOptions { name: "u32", size: 1024, in_memory: true, }); - let mut i_store = engine.store::(StoreOptions { + let mut i_store = engine.new_journal_store::(JournalStoreOptions { name: "i64", size: 1024, in_memory: true, @@ -77,10 +77,10 @@ fn test_stores_are_isolated_by_type() { let u_reader = u_store.reader(); let i_reader = i_store.reader(); - u_store.push(1); - i_store.push(-1); - u_store.push(2); - i_store.push(-2); + u_store.append(1); + i_store.append(-1); + u_store.append(2); + i_store.append(-2); let u_res = u_reader.get_window::<2>(0).unwrap(); let i_res = i_reader.get_window::<2>(0).unwrap(); @@ -94,17 +94,17 @@ fn test_stores_are_isolated_by_type() { #[test] fn test_push_after_partial_reads() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "test4", size: 1024, in_memory: true, }); let reader = store.reader(); - store.push(100); - store.push(200); - store.push(300); - store.push(400); + store.append(100); + store.append(200); + store.append(300); + store.append(400); let res = reader.get_window::<4>(0).unwrap(); assert_eq!(res[0], 100); diff --git a/tests/stage_engine_tests.rs b/tests/stage_engine_tests.rs new file mode 100644 index 0000000..373469c --- /dev/null +++ b/tests/stage_engine_tests.rs @@ -0,0 +1,239 @@ +use roda_state::{OutputCollector, Stage, StageEngine, pipe}; +use std::thread; +use std::time::Duration; + +#[test] +fn test_basic_pipeline() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| Some(x + 1)) + .add_stage(|x: u32| Some(x * 2)); + + engine.send(10); + engine.send(20); + + assert_eq!(engine.receive(), Some(22)); // (10 + 1) * 2 + assert_eq!(engine.receive(), Some(42)); // (20 + 1) * 2 +} + +#[test] +fn test_none_filtering() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| if x.is_multiple_of(2) { Some(x) } else { None }); + + engine.send(1); + engine.send(2); + engine.send(3); + engine.send(4); + + assert_eq!(engine.receive(), Some(2)); + assert_eq!(engine.receive(), Some(4)); +} + +#[test] +fn test_multiple_outputs() { + struct Duplicate; + impl Stage for Duplicate { + fn process(&mut self, data: u32, collector: &mut C) + where + C: OutputCollector, + { + collector.push(data); + collector.push(data); + } + } + + let mut engine = StageEngine::::new().add_stage(Duplicate); + + engine.send(5); + assert_eq!(engine.receive(), Some(5)); + assert_eq!(engine.receive(), Some(5)); +} + +#[test] +fn test_load_moderate() { + let count = 1000; + let mut engine = + StageEngine::::with_capacity(count + 1).add_stage(|x: u32| Some(x + 1)); + + for i in 0..count { + engine.send(i as u32); + } + + for i in 0..count { + assert_eq!(engine.receive(), Some(i as u32 + 1)); + } +} + +#[test] +fn test_concurrency_stress() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| { + // Some artificial delay to force concurrency + thread::sleep(Duration::from_millis(1)); + Some(x) + }) + .add_stage(|x: u32| { + thread::sleep(Duration::from_millis(1)); + Some(x) + }); + + let count = 100; + for i in 0..count { + engine.send(i); + } + + for i in 0..count { + assert_eq!(engine.receive(), Some(i)); + } +} + +#[test] +fn test_complex_pipe_macro() { + let mut engine = StageEngine::::new().add_stage(pipe![ + |x: u32| Some(x as u64), + |x: u64| Some(x * 10), + |x: u64| Some(x + 5), + ]); + + engine.send(1); + assert_eq!(engine.receive(), Some(15)); +} + +#[test] +fn test_empty_pipeline() { + let mut engine = StageEngine::::new(); + engine.send(42); + assert_eq!(engine.receive(), Some(42)); +} + +#[test] +fn test_await_idle() { + let mut engine = StageEngine::::new().add_stage(|x: u32| { + // Very short sleep to test await_idle without being too slow + thread::sleep(Duration::from_millis(1)); + Some(x) + }); + + engine.send(1); + // Give it a tiny bit of time to start + thread::sleep(Duration::from_millis(5)); + engine.await_idle(Duration::from_millis(200)); + assert_eq!(engine.output_size(), 1); + assert_eq!(engine.receive(), Some(1)); +} + +#[test] +fn test_large_pod_struct() { + #[repr(C)] + #[derive(Debug, Clone, Copy, bytemuck::Pod, bytemuck::Zeroable, PartialEq)] + struct Large { + data: [f64; 16], + id: u64, + } + + let mut engine = StageEngine::::new().add_stage(|mut l: Large| { + l.id += 1; + Some(l) + }); + + let input = Large { + data: [1.0; 16], + id: 100, + }; + engine.send(input); + + let expected = Large { + data: [1.0; 16], + id: 101, + }; + assert_eq!(engine.receive(), Some(expected)); +} + +#[test] +fn test_nested_pipes() { + let mut engine = StageEngine::::new().add_stage(pipe![ + |x: u32| Some(x + 1), + pipe![|x: u32| Some(x * 2), |x: u32| Some(x + 1),] + ]); + + engine.send(10); + // (10 + 1) * 2 + 1 = 23 + assert_eq!(engine.receive(), Some(23)); +} + +#[test] +fn test_multi_stage_load() { + let stages = 5; + let items = 100; + + let mut engine = StageEngine::::new(); + for _ in 0..stages { + engine = engine.add_stage(|x: u32| Some(x + 1)); + } + + for i in 0..items { + engine.send(i); + } + + for i in 0..items { + assert_eq!(engine.receive(), Some(i + stages as u32)); + } +} + +#[test] +#[should_panic(expected = "Store is full")] +fn test_input_capacity_limit_panic() { + let mut engine = StageEngine::::with_capacity(1); + engine.send(1); + engine.send(2); // Should panic here +} + +#[test] +fn test_stage_producing_none() { + let mut engine = StageEngine::::new() + .add_stage(|x: u32| if x > 10 { Some(x) } else { None }) + .add_stage(|x: u32| Some(x * 2)); + + engine.send(5); + engine.send(15); + + engine.await_idle(Duration::from_millis(100)); + assert_eq!(engine.output_size(), 1); + assert_eq!(engine.receive(), Some(30)); +} + +#[test] +fn test_worker_panic_on_drop() { + // This test ensures that if a worker panics, the engine will panic on drop. + let result = std::panic::catch_unwind(|| { + let mut engine = StageEngine::::new().add_stage(|_| { + panic!("Stage panic"); + #[allow(unreachable_code)] + Some(0u32) + }); + engine.send(1); + // Wait for worker to panic + thread::sleep(Duration::from_millis(50)); + // engine is dropped here + }); + assert!(result.is_err()); +} + +#[test] +fn test_long_pipeline_heavy_load() { + let stages = 10; + let items = 5000; + + let mut engine = StageEngine::::with_capacity(items + 1); + for _ in 0..stages { + engine = engine.add_stage(|x: u32| Some(x + 1)); + } + + for i in 0..items { + engine.send(i as u32); + } + + for i in 0..items { + assert_eq!(engine.receive(), Some(i as u32 + stages as u32)); + } +} diff --git a/tests/store_no_alloc_tests.rs b/tests/store_no_alloc_tests.rs index ed2aad1..da46aa4 100644 --- a/tests/store_no_alloc_tests.rs +++ b/tests/store_no_alloc_tests.rs @@ -1,6 +1,6 @@ use assert_no_alloc::*; +use roda_state::JournalStoreOptions; use roda_state::RodaEngine; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; #[cfg(debug_assertions)] #[global_allocator] @@ -9,26 +9,26 @@ static ALLOC: AllocDisabler = AllocDisabler; #[test] fn test_store_push_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_push", size: 1024, in_memory: true, }); assert_no_alloc(|| { - store.push(42); + store.append(42); }); } #[test] fn test_store_reader_next_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_next", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { @@ -39,12 +39,12 @@ fn test_store_reader_next_no_alloc() { #[test] fn test_store_reader_get_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); reader.next(); @@ -56,13 +56,13 @@ fn test_store_reader_get_no_alloc() { #[test] fn test_store_reader_get_window_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_window", size: 1024, in_memory: true, }); - store.push(42); - store.push(43); + store.append(42); + store.append(43); let reader = store.reader(); assert_no_alloc(|| { @@ -75,12 +75,12 @@ fn test_store_reader_get_window_no_alloc() { #[test] fn test_store_reader_get_at_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_at", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { @@ -91,28 +91,15 @@ fn test_store_reader_get_at_no_alloc() { #[test] fn test_store_reader_get_last_no_alloc() { let engine = RodaEngine::new(); - let mut store = engine.store::(StoreOptions { + let mut store = engine.new_journal_store::(JournalStoreOptions { name: "no_alloc_get_last", size: 1024, in_memory: true, }); - store.push(42); + store.append(42); let reader = store.reader(); assert_no_alloc(|| { let _ = reader.get_last(); }); } - -#[test] -fn test_store_direct_index_allocations_allowed() { - let engine = RodaEngine::new(); - let store = engine.store::(StoreOptions { - name: "direct_index_alloc", - size: 1024, - in_memory: true, - }); - - // direct_index now allocates because it uses crossbeam-skiplist - let _ = store.direct_index::(); -} diff --git a/tests/window_tests.rs b/tests/window_tests.rs deleted file mode 100644 index 16f173e..0000000 --- a/tests/window_tests.rs +++ /dev/null @@ -1,315 +0,0 @@ -use bytemuck::{Pod, Zeroable}; -use roda_state::components::{Engine, Store, StoreOptions, StoreReader}; -use roda_state::{RodaEngine, Window}; - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct DataPoint { - pub value: f64, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy, Default, PartialEq, Pod, Zeroable)] -pub struct Analysis { - pub average: f64, - pub is_increasing: u32, - pub _pad: u32, -} - -#[test] -fn test_window_filling_and_sliding() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - // Run window reduce inside worker - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(3, |window| { - if window.len() < 3 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - let increasing = - window[2].value > window[1].value && window[1].value > window[0].value; - Some(Analysis { - average: sum / 3.0, - is_increasing: if increasing { 1 } else { 0 }, - ..Default::default() - }) - }); - }); - - // Push data points - for i in 1..=5 { - source.push(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - // Validate by get_window all outputs (5 - 3 + 1 = 3) - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 2.0); - assert_eq!(res[0].is_increasing, 1); - assert_eq!(res[1].average, 3.0); - assert_eq!(res[1].is_increasing, 1); - assert_eq!(res[2].average, 4.0); - assert_eq!(res[2].is_increasing, 1); -} - -#[test] -fn test_window_size_one() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - // Window size 1 should process every item individually - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(1, |window| { - assert_eq!(window.len(), 1); - Some(Analysis { - average: window[0].value, - is_increasing: 0, - ..Default::default() - }) - }); - }); - - // Push values - for v in [10.0, 20.0, 30.0] { - source.push(DataPoint { value: v }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 10.0); - assert_eq!(res[0].is_increasing, 0); - assert_eq!(res[1].average, 20.0); - assert_eq!(res[1].is_increasing, 0); - assert_eq!(res[2].average, 30.0); - assert_eq!(res[2].is_increasing, 0); -} - -#[test] -fn test_window_large_sliding() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 100, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 100, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - // Larger window size - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(10, |window| { - if window.len() < 10 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - Some(Analysis { - average: sum / 10.0, - is_increasing: if window[9].value > window[0].value { - 1 - } else { - 0 - }, - ..Default::default() - }) - }); - }); - - // Push values 0..11 -> expect 3 outputs - for i in 0..12 { - source.push(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0].average, 4.5); - assert_eq!(res[0].is_increasing, 1); - assert_eq!(res[1].average, 5.5); - assert_eq!(res[1].is_increasing, 1); - assert_eq!(res[2].average, 6.5); - assert_eq!(res[2].is_increasing, 1); -} - -#[test] -fn test_window_worker_large() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 2000, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 2000, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(10, |window| { - if window.len() < 10 { - return None; - } - let sum: f64 = window.iter().map(|d| d.value).sum(); - Some(Analysis { - average: sum / 10.0, - is_increasing: if window[window.len() - 1].value > window[0].value { - 1 - } else { - 0 - }, - ..Default::default() - }) - }); - }); - - for i in 0..1000 { - source.push(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<991>(0).unwrap(); - assert_eq!(res[0].average, 4.5); // (0+1+2+3+4+5+6+7+8+9)/10 = 45/10 = 4.5 - assert_eq!(res[0].is_increasing, 1); -} - -#[test] -fn test_window_max_value() { - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(3, |window| { - window.iter().map(|d| d.value).max_by(|a, b| a.total_cmp(b)) - }); - }); - - // Push values: expect maxima per 3-sized window - for v in [1.0, 3.0, 2.0, 5.0, 4.0] { - source.push(DataPoint { value: v }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<3>(0).unwrap(); - assert_eq!(res[0], 3.0); - assert_eq!(res[1], 5.0); - assert_eq!(res[2], 5.0); -} - -#[test] -fn test_window_all_none_until_full() { - use std::sync::Arc; - use std::sync::atomic::{AtomicUsize, Ordering}; - let engine = RodaEngine::new(); - let mut source = engine.store::(StoreOptions { - name: "source", - size: 10, - in_memory: true, - }); - let mut target = engine.store::(StoreOptions { - name: "target", - size: 10, - in_memory: true, - }); - let source_reader = source.reader(); - let target_reader = target.reader(); - let pipeline = Window::new(); - - let call_count = Arc::new(AtomicUsize::new(0)); - let cc = call_count.clone(); - engine.run_worker(move || { - source_reader.next(); - pipeline - .from(&source_reader) - .to(&mut target) - .reduce(5, |window: &[DataPoint]| { - cc.fetch_add(1, Ordering::Relaxed); - if window.len() == 5 { Some(1u8) } else { None } - }); - }); - - for i in 0..5 { - source.push(DataPoint { value: i as f64 }); - } - - // Give some time for the worker to process - std::thread::sleep(std::time::Duration::from_millis(100)); - - let res = target_reader.get_window::<1>(0).unwrap(); - assert_eq!(res[0], 1); -}