diff --git a/Cargo.lock b/Cargo.lock index 4c8b3767ad..78595b5303 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -132,35 +132,6 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" -[[package]] -name = "apache-avro" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" -dependencies = [ - "bigdecimal", - "bon", - "bzip2", - "crc32fast", - "digest 0.10.7", - "liblzma", - "log", - "miniz_oxide", - "num-bigint", - "quad-rand", - "rand 0.9.4", - "regex-lite", - "serde", - "serde_bytes", - "serde_json", - "snap", - "strum", - "strum_macros", - "thiserror 2.0.18", - "uuid", - "zstd", -] - [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -190,9 +161,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -211,9 +182,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -225,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -236,17 +207,41 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "num-complex", "num-integer", "num-traits", ] +[[package]] +name = "arrow-avro" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "049230728cd6e093088c8d231b4beede184e35cad7777c1505c0d5a8571f4376" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "bytes", + "bzip2", + "crc", + "flate2", + "indexmap 2.14.0", + "liblzma", + "rand 0.9.4", + "serde", + "serde_json", + "snap", + "strum_macros 0.28.0", + "uuid", + "zstd", +] + [[package]] name = "arrow-buffer" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", @@ -256,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -278,9 +273,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -293,9 +288,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -306,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302b2e036335f3f04d65dad3f74ff1f2aae6dc671d6aa04dc6b61193761e16fb" +checksum = "28abfe8bf9f124e5fc83b334af4fa58f8d0323ad25312ccb2d1da50178415704" dependencies = [ "arrow-arith", "arrow-array", @@ -334,9 +329,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -350,15 +345,16 @@ dependencies = [ [[package]] name = "arrow-json" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", "indexmap 2.14.0", @@ -374,9 +370,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -387,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -400,9 +396,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "serde_core", "serde_json", @@ -410,9 +406,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -424,9 +420,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -564,7 +560,7 @@ dependencies = [ "fastrand", "hex", "http 1.4.0", - "sha1", + "sha1 0.10.6", "time", "tokio", "tracing", @@ -1212,7 +1208,6 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "serde", ] [[package]] @@ -1357,31 +1352,6 @@ dependencies = [ "time", ] -[[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" -dependencies = [ - "darling 0.23.0", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.117", -] - [[package]] name = "brotli" version = "8.0.2" @@ -1801,6 +1771,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + [[package]] name = "crc32fast" version = "1.5.0" @@ -2067,14 +2052,12 @@ dependencies = [ [[package]] name = "datafusion" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "arrow-schema", "async-trait", - "bytes", "bzip2", "chrono", "datafusion-catalog", @@ -2105,14 +2088,13 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "indexmap 2.14.0", "itertools 0.14.0", "liblzma", "log", "object_store", "parking_lot", "parquet", - "rand 0.9.4", - "regex", "sqlparser", "tempfile", "tokio", @@ -2123,9 +2105,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2148,9 +2129,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2171,9 +2151,8 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22c001ad1ac11cda09dab69b151eef5b1a992e23bc524ab0d1e63e5dea327" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2199,17 +2178,16 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", - "apache-avro", "arrow", "arrow-ipc", + "arrow-schema", "chrono", + "foldhash 0.2.0", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "hex", "indexmap 2.14.0", "itertools 0.14.0", @@ -2217,18 +2195,17 @@ dependencies = [ "log", "object_store", "parquet", - "paste", "recursive", "sqlparser", "tokio", + "uuid", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "futures", "log", @@ -2237,9 +2214,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-compression", @@ -2263,6 +2239,7 @@ dependencies = [ "liblzma", "log", "object_store", + "parking_lot", "rand 0.9.4", "tokio", "tokio-util", @@ -2272,9 +2249,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "arrow-ipc", @@ -2296,29 +2272,26 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a579c3bd290c66ea4b269493e75e8a3ed42c9c895a651f10210a29538aee50c4" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "apache-avro", "arrow", + "arrow-avro", "async-trait", "bytes", "datafusion-common", "datafusion-datasource", - "datafusion-physical-expr-common", + "datafusion-physical-expr-adapter", "datafusion-physical-plan", "datafusion-session", "futures", - "num-traits", "object_store", ] [[package]] name = "datafusion-datasource-csv" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2339,9 +2312,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2356,16 +2328,14 @@ dependencies = [ "datafusion-session", "futures", "object_store", - "serde_json", "tokio", "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", @@ -2375,6 +2345,7 @@ dependencies = [ "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", @@ -2393,20 +2364,17 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" [[package]] name = "datafusion-execution" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "arrow-buffer", "async-trait", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -2423,11 +2391,11 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", + "arrow-schema", "async-trait", "chrono", "datafusion-common", @@ -2438,7 +2406,6 @@ dependencies = [ "datafusion-physical-expr-common", "indexmap 2.14.0", "itertools 0.14.0", - "paste", "recursive", "serde_json", "sqlparser", @@ -2446,22 +2413,19 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", "indexmap 2.14.0", "itertools 0.14.0", - "paste", ] [[package]] name = "datafusion-functions" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "arrow-buffer", @@ -2476,26 +2440,24 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", + "datafusion-physical-expr-common", "hex", "itertools 0.14.0", "log", - "md-5 0.10.6", + "md-5 0.11.0", "memchr", "num-traits", "rand 0.9.4", "regex", - "sha2 0.10.9", - "unicode-segmentation", + "sha2 0.11.0", "uuid", ] [[package]] name = "datafusion-functions-aggregate" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-doc", @@ -2505,19 +2467,17 @@ dependencies = [ "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", + "foldhash 0.2.0", "half", "log", "num-traits", - "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr-common", @@ -2526,9 +2486,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "arrow-ord", @@ -2542,34 +2501,32 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "itertools 0.14.0", "itoa", "log", - "paste", + "memchr", ] [[package]] name = "datafusion-functions-table" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "async-trait", "datafusion-catalog", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", "parking_lot", - "paste", ] [[package]] name = "datafusion-functions-window" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", @@ -2580,14 +2537,12 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "log", - "paste", ] [[package]] name = "datafusion-functions-window-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2595,9 +2550,8 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "datafusion-doc", "quote", @@ -2606,9 +2560,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "chrono", @@ -2626,11 +2579,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", "arrow", "datafusion-common", "datafusion-expr", @@ -2638,11 +2589,10 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", - "paste", "petgraph", "recursive", "tokio", @@ -2650,9 +2600,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", @@ -2665,26 +2614,24 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", "arrow", "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", + "pin-project", ] [[package]] name = "datafusion-physical-optimizer" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", @@ -2701,12 +2648,12 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ - "ahash", "arrow", + "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "async-trait", @@ -2721,7 +2668,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "indexmap 2.14.0", "itertools 0.14.0", "log", @@ -2733,9 +2680,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a387aaef949dc16bb6abc81bd1af850ec7449183aef011214f9724957495738" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "chrono", @@ -2756,14 +2702,12 @@ dependencies = [ "datafusion-proto-common", "object_store", "prost", - "rand 0.9.4", ] [[package]] name = "datafusion-proto-common" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e614c7c53a9c304c6a850b821010bb492e57300311835f1180613f9d2c63d9" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", @@ -2772,9 +2716,8 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "datafusion-common", @@ -2783,15 +2726,13 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools 0.14.0", "log", ] [[package]] name = "datafusion-session" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "async-trait", "datafusion-common", @@ -2803,9 +2744,8 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e059dcf8544da0d6598d0235be3cc29c209094a5976b2e4822e4a2cf91c2b5c5" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "bigdecimal", @@ -2818,21 +2758,23 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-functions-nested", "log", + "num-traits", "percent-encoding", "rand 0.9.4", "serde_json", - "sha1", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", + "twox-hash", "url", ] [[package]] name = "datafusion-sql" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "arrow", "bigdecimal", @@ -2849,9 +2791,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "53.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98494539a5468979cc42d86c7bc5f0f8cb71ee5c742694c26fc34efdd29dd2e5" +version = "54.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1321d60cc37ee487d1e7ce7f501357c3236b2542#1321d60cc37ee487d1e7ce7f501357c3236b2542" dependencies = [ "async-recursion", "async-trait", @@ -3072,9 +3013,9 @@ dependencies = [ [[package]] name = "endian-type" -version = "0.1.2" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +checksum = "869b0adbda23651a9c5c0c3d270aac9fcb52e8622a8f2b17e57802d7791962f2" [[package]] name = "env_filter" @@ -3156,17 +3097,6 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" -[[package]] -name = "fd-lock" -version = "4.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" -dependencies = [ - "cfg-if", - "rustix 1.1.4", - "windows-sys 0.59.0", -] - [[package]] name = "ferroid" version = "2.0.0" @@ -3516,9 +3446,14 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hashlink" @@ -3913,7 +3848,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.17.0", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -4503,9 +4438,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.30.1" +version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ "bitflags 2.11.1", "cfg-if", @@ -4563,7 +4498,6 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", - "serde", ] [[package]] @@ -4798,9 +4732,9 @@ dependencies = [ [[package]] name = "parquet" -version = "58.1.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" +checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908" dependencies = [ "ahash", "arrow-array", @@ -4816,7 +4750,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "lz4_flex", "num-bigint", "num-integer", @@ -5355,12 +5289,6 @@ dependencies = [ "pulldown-cmark", ] -[[package]] -name = "quad-rand" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" - [[package]] name = "quick-xml" version = "0.39.2" @@ -5450,9 +5378,9 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "radix_trie" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +checksum = "3b4431027dcd37fc2a73ef740b5f233aa805897935b8bce0195e41bbf9a3289a" dependencies = [ "endian-type", "nibble_vec", @@ -5995,24 +5923,23 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rustyline" -version = "17.0.2" +version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" +checksum = "4a990b25f351b25139ddc7f21ee3f6f56f86d6846b74ac8fad3a719a287cd4a0" dependencies = [ "bitflags 2.11.1", "cfg-if", "clipboard-win", - "fd-lock", "home", "libc", "log", "memchr", - "nix 0.30.1", + "nix 0.31.3", "radix_trie", "unicode-segmentation", "unicode-width 0.2.2", "utf8parse", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6142,16 +6069,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_bytes" -version = "0.11.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" -dependencies = [ - "serde", - "serde_core", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -6189,6 +6106,7 @@ version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ + "indexmap 2.14.0", "itoa", "memchr", "serde", @@ -6297,6 +6215,17 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.2", +] + [[package]] name = "sha2" version = "0.10.9" @@ -6419,9 +6348,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.61.0" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" +checksum = "13c6d1b651dc4edf07eead2a0c6c78016ce971bc2c10da5266861b13f25e7cec" dependencies = [ "log", "recursive", @@ -6523,7 +6452,7 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros", + "strum_macros 0.27.2", ] [[package]] @@ -6538,13 +6467,26 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "strum_macros" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "substrait" -version = "0.62.2" +version = "0.63.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +checksum = "e620ff4d5c02fd6f7752931aa74b16a26af66a63022cc1ad412c77edbe0bab47" dependencies = [ "heck 0.5.0", + "indexmap 2.14.0", "pbjson", "pbjson-build", "pbjson-types", @@ -7224,6 +7166,9 @@ name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +dependencies = [ + "rand 0.9.4", +] [[package]] name = "typenum" @@ -7416,7 +7361,6 @@ dependencies = [ "atomic", "getrandom 0.4.2", "js-sys", - "serde_core", "wasm-bindgen", ] diff --git a/Cargo.toml b/Cargo.toml index 99d2b85932..c778a192b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,19 +26,22 @@ edition = "2024" rust-version = "1.88.0" [workspace.dependencies] -arrow = { version = "58", features = ["ipc_compression"] } -arrow-flight = { version = "58", features = ["flight-sql-experimental"] } +arrow = { version = "58.3", features = ["ipc_compression"] } +arrow-flight = { version = "58.3", features = ["flight-sql-experimental"] } clap = { version = "4.5", features = ["derive", "cargo"] } -datafusion = "53" -datafusion-cli = "53" -datafusion-proto = "53" -datafusion-proto-common = "53" -datafusion-spark = "53" -datafusion-substrait = "53" +# DataFusion 54 has not been published to crates.io yet, so we pin to a commit +# on apache/datafusion `branch-54`. Switch back to the published `"54"` version +# once it is released. +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } +datafusion-cli = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } +datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } +datafusion-proto-common = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } +datafusion-spark = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } +datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "1321d60cc37ee487d1e7ce7f501357c3236b2542" } insta = "1.47" -object_store = "0.13" +object_store = "0.13.2" prost = "0.14" prost-types = "0.14" rstest = { version = "0.26" } diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml index cdfe436b98..385ab9ac68 100644 --- a/ballista-cli/Cargo.toml +++ b/ballista-cli/Cargo.toml @@ -44,7 +44,7 @@ percent-encoding = { version = "2.3.2", optional = true } prometheus-parse = { version = "0.2", optional = true } ratatui = { version = "0.30.0", optional = true } reqwest = { version = "0.13.3", features = ["json"], optional = true } -rustyline = "17.0.1" +rustyline = "18.0.0" serde = { version = "1", features = ["derive"], optional = true } serde_json = { version = "1", optional = true } tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "time", "parking_lot"] } diff --git a/ballista/client/tests/multi_file_scan.rs b/ballista/client/tests/multi_file_scan.rs new file mode 100644 index 0000000000..8ed65c498c --- /dev/null +++ b/ballista/client/tests/multi_file_scan.rs @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod common; + +// Regression coverage for the DataFusion 54 upgrade tracked in +// https://github.com/apache/datafusion-ballista/issues/1776 (see also the +// linked datafusion-distributed issue #460 and PR #467 about FileScanConfig +// work stealing). +// +// DataFusion 54's `FileScanConfig::create_sibling_state` returns a +// `SharedWorkSource` populated with every file in the scan, and each +// partition's stream drains files from that queue. In a single-process +// DataFusion run that's fine because all partitions of the same +// DataSourceExec instance cooperatively drain one queue, but Ballista +// deserialises a fresh DataSourceExec for every task and runs a single +// partition against it. Without intervention the partition that does run +// drains the whole queue and reads every file, so a 6-file table executed +// by 6 tasks returns 6x the data. +// +// `restrict_file_scan_to_partition` in ballista-core sets +// `preserve_order = true` on every FileScanConfig before execution, which +// short-circuits `FileScanConfig::create_sibling_state` to `None`. Each +// partition then falls back to `WorkSource::Local(file_groups[partition])` +// and scans exactly the files the planner assigned to it, so a 6-file scan +// dispatched as 6 tasks reads 6 files instead of 36. These tests would fail +// without that helper. +#[cfg(test)] +#[cfg(feature = "standalone")] +mod work_stealing { + use ballista::prelude::SessionContextExt; + use datafusion::arrow::array::{Int64Array, RecordBatch}; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::common::Result; + use datafusion::config::TableParquetOptions; + use datafusion::dataframe::DataFrameWriteOptions; + use datafusion::prelude::{ParquetReadOptions, SessionContext}; + use std::sync::Arc; + use tempfile::TempDir; + + /// Writes `num_files` parquet files into `dir`, each holding the rows + /// `[file_idx * rows_per_file .. (file_idx + 1) * rows_per_file)`. + /// Returns the total number of rows written and the expected sum across + /// the `value` column, which the tests use to detect duplicated or missing + /// reads. + async fn write_parquet_dataset( + dir: &std::path::Path, + num_files: usize, + rows_per_file: usize, + ) -> Result<(usize, i64)> { + let schema = Arc::new(Schema::new(vec![Field::new( + "value", + DataType::Int64, + false, + )])); + + // DataFusion-only context for writing the fixture so we don't depend + // on the cluster being healthy for setup. + let writer_ctx = SessionContext::new(); + for file_idx in 0..num_files { + let start = (file_idx * rows_per_file) as i64; + let values: Vec = (start..start + rows_per_file as i64).collect(); + let batch = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(Int64Array::from(values))], + )?; + let df = writer_ctx.read_batch(batch)?; + let path = dir.join(format!("part-{file_idx:04}.parquet")); + df.write_parquet( + path.to_str().unwrap(), + DataFrameWriteOptions::default(), + Some(TableParquetOptions::default()), + ) + .await?; + } + + let total_rows = num_files * rows_per_file; + let total_sum = (0..total_rows as i64).sum(); + Ok((total_rows, total_sum)) + } + + #[tokio::test] + async fn multi_file_parquet_scan_counts_every_row_exactly_once() -> Result<()> { + let tmp_dir = TempDir::new().unwrap(); + let (expected_rows, expected_sum) = + write_parquet_dataset(tmp_dir.path(), 6, 7).await?; + + let ctx = SessionContext::standalone().await?; + ctx.register_parquet( + "t", + tmp_dir.path().to_str().unwrap(), + ParquetReadOptions::default(), + ) + .await?; + + let batches = ctx + .sql("SELECT COUNT(*) AS row_count, SUM(value) AS value_sum FROM t") + .await? + .collect() + .await?; + + assert_eq!(batches.len(), 1); + let batch = &batches[0]; + let row_count = batch + .column_by_name("row_count") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(0); + let value_sum = batch + .column_by_name("value_sum") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(0); + + assert_eq!( + row_count, expected_rows as i64, + "Ballista returned the wrong row count; work stealing causes \ + duplicated rows here" + ); + assert_eq!( + value_sum, expected_sum, + "Ballista returned the wrong column sum; duplicated reads inflate \ + this" + ); + + Ok(()) + } + + // Regression for an earlier version of the work-stealing fix that emptied + // out file_groups for all partition slots except the running task's. That + // broke TPC-H Q11: in a broadcast hash join the build-side + // DataSourceExec is read with execute(0..K) by the join itself, so + // emptying the other slots starved the hash table and the join hung. + // This test joins two multi-file parquet tables under a configuration + // that strongly biases the planner toward broadcast hash join, and + // checks the join still returns every matched row. + #[tokio::test] + async fn multi_file_parquet_broadcast_hash_join_returns_full_result() -> Result<()> { + let left_dir = TempDir::new().unwrap(); + let right_dir = TempDir::new().unwrap(); + // Left side is intentionally larger so the planner picks the small + // right side as the broadcast build input. + let (left_rows, _) = write_parquet_dataset(left_dir.path(), 5, 8).await?; + let (right_rows, _) = write_parquet_dataset(right_dir.path(), 4, 4).await?; + + let ctx = SessionContext::standalone().await?; + ctx.register_parquet( + "l", + left_dir.path().to_str().unwrap(), + ParquetReadOptions::default(), + ) + .await?; + ctx.register_parquet( + "r", + right_dir.path().to_str().unwrap(), + ParquetReadOptions::default(), + ) + .await?; + + let batches = ctx + .sql("SELECT COUNT(*) AS matched FROM l JOIN r ON l.value = r.value") + .await? + .collect() + .await?; + + let matched = batches[0] + .column_by_name("matched") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(0); + // Both sides use disjoint ranges (left = 0..40, right = 0..16), so + // the join must match exactly `right_rows` rows. Anything less means + // the build-side scan lost data; anything more would mean the probe + // side double-read. + assert_eq!( + matched, right_rows as i64, + "broadcast hash join over multi-file scans must see every \ + build-side row exactly once; left had {left_rows} rows, right \ + had {right_rows}" + ); + + Ok(()) + } + + #[tokio::test] + async fn multi_file_parquet_group_by_returns_each_value_once() -> Result<()> { + let tmp_dir = TempDir::new().unwrap(); + let (expected_rows, _) = write_parquet_dataset(tmp_dir.path(), 4, 5).await?; + + let ctx = SessionContext::standalone().await?; + ctx.register_parquet( + "t", + tmp_dir.path().to_str().unwrap(), + ParquetReadOptions::default(), + ) + .await?; + + // GROUP BY across the whole dataset exercises a shuffle on top of the + // multi-file scan. If the scan double-counts, the per-key counts + // become 2 or higher. + let batches = ctx + .sql("SELECT value, COUNT(*) AS c FROM t GROUP BY value") + .await? + .collect() + .await?; + + let mut total_keys = 0usize; + for batch in &batches { + let counts = batch + .column_by_name("c") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..counts.len() { + assert_eq!( + counts.value(i), + 1, + "value at row {i} of batch was read {} times instead of \ + once; work stealing surfaces as a count > 1 here", + counts.value(i) + ); + total_keys += 1; + } + } + assert_eq!( + total_keys, expected_rows, + "expected every distinct value to be present exactly once" + ); + + Ok(()) + } +} diff --git a/ballista/core/src/diagram.rs b/ballista/core/src/diagram.rs index 5498c1161c..cbd148c85b 100644 --- a/ballista/core/src/diagram.rs +++ b/ballista/core/src/diagram.rs @@ -84,43 +84,27 @@ fn build_exec_plan_diagram( id: &mut AtomicUsize, draw_entity: bool, ) -> Result { - let operator_str = if plan.as_any().downcast_ref::().is_some() { + let operator_str = if plan.is::() { "AggregateExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "SortExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "ProjectionExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "HashJoinExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "DataSourceExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "FilterExec" - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.is::() { "ShuffleWriterExec" - } else if plan - .as_any() - .downcast_ref::() - .is_some() - { + } else if plan.is::() { "SortShuffleWriterExec" - } else if plan - .as_any() - .downcast_ref::() - .is_some() - { + } else if plan.is::() { "UnresolvedShuffleExec" - } else if plan - .as_any() - .downcast_ref::() - .is_some() - { + } else if plan.is::() { "CoalesceBatchesExec" - } else if plan - .as_any() - .downcast_ref::() - .is_some() - { + } else if plan.is::() { "CoalescePartitionsExec" } else { warn!("Unknown: {plan:?}"); @@ -137,7 +121,7 @@ fn build_exec_plan_diagram( )?; } for child in plan.children() { - if let Some(shuffle) = child.as_any().downcast_ref::() { + if let Some(shuffle) = child.downcast_ref::() { if !draw_entity { writeln!( w, diff --git a/ballista/core/src/execution_plans/distributed_explain_analyze.rs b/ballista/core/src/execution_plans/distributed_explain_analyze.rs index 0a7ddabb36..7508847a15 100644 --- a/ballista/core/src/execution_plans/distributed_explain_analyze.rs +++ b/ballista/core/src/execution_plans/distributed_explain_analyze.rs @@ -36,7 +36,6 @@ use datafusion::physical_plan::{ }; use datafusion_proto::logical_plan::AsLogicalPlan; use futures::StreamExt; -use std::any::Any; use std::convert::TryInto; use std::marker::PhantomData; use std::sync::Arc; @@ -107,10 +106,6 @@ impl ExecutionPlan for DistributedExplainAnalyzeExec "DistributedExplainAnalyzeExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn properties(&self) -> &Arc { &self.properties } @@ -132,7 +127,6 @@ impl ExecutionPlan for DistributedExplainAnalyzeExec let query_exec = children.pop().unwrap(); if query_exec - .as_any() .downcast_ref::>() .is_some() { @@ -172,7 +166,6 @@ impl ExecutionPlan for DistributedExplainAnalyzeExec } let job_id = query_exec - .as_any() .downcast_ref::>() .ok_or_else(|| { DataFusionError::Internal( diff --git a/ballista/core/src/execution_plans/distributed_query.rs b/ballista/core/src/execution_plans/distributed_query.rs index 2ef4521f67..223e6cfa00 100644 --- a/ballista/core/src/execution_plans/distributed_query.rs +++ b/ballista/core/src/execution_plans/distributed_query.rs @@ -48,7 +48,6 @@ use datafusion_proto::logical_plan::{ use futures::{Stream, StreamExt, TryFutureExt, TryStreamExt}; use log::{debug, error, info}; use parking_lot::Mutex; -use std::any::Any; use std::fmt::Debug; use std::marker::PhantomData; use std::sync::Arc; @@ -176,10 +175,6 @@ impl ExecutionPlan for DistributedQueryExec { "DistributedQueryExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.plan.schema().as_arrow().clone().into() } @@ -839,7 +834,6 @@ mod test { let new_exec = exec.clone().with_new_children(vec![]).unwrap(); let new_exec = new_exec - .as_any() .downcast_ref::>() .unwrap(); diff --git a/ballista/core/src/execution_plans/mod.rs b/ballista/core/src/execution_plans/mod.rs index ae46fad687..d97964ef03 100644 --- a/ballista/core/src/execution_plans/mod.rs +++ b/ballista/core/src/execution_plans/mod.rs @@ -20,6 +20,7 @@ mod distributed_explain_analyze; mod distributed_query; +mod restrict_file_scan; mod shuffle_reader; mod shuffle_writer; mod shuffle_writer_trait; @@ -31,6 +32,7 @@ use std::path::{Path, PathBuf}; use datafusion::common::exec_err; pub use distributed_explain_analyze::DistributedExplainAnalyzeExec; pub use distributed_query::DistributedQueryExec; +pub use restrict_file_scan::restrict_file_scan_to_partition; pub use shuffle_reader::{CoalescePlan, PartitionGroup, ShuffleReaderExec}; pub use shuffle_reader::{stats_for_partition, stats_for_partitions}; pub use shuffle_writer::DEFAULT_SHUFFLE_CHANNEL_CAPACITY; diff --git a/ballista/core/src/execution_plans/restrict_file_scan.rs b/ballista/core/src/execution_plans/restrict_file_scan.rs new file mode 100644 index 0000000000..8d79ded090 --- /dev/null +++ b/ballista/core/src/execution_plans/restrict_file_scan.rs @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Disable DataFusion 54's cross-partition file work stealing on every +//! `FileScanConfig` in a plan tree. +//! +//! DataFusion 54 added a `SharedWorkSource` to `FileScanConfig`: when any +//! partition opens its stream, it pulls files from a queue populated with +//! every file in the scan. That model assumes every partition of the same +//! `DataSourceExec` instance runs together and cooperatively drains the +//! queue exactly once. Ballista breaks the assumption — each task +//! deserialises its own copy of the plan and runs a single partition — so +//! the partition that does run drains the whole queue and ends up scanning +//! every file. A 6-file scan executed by 6 tasks reads 36 files and returns +//! six copies of the data. +//! +//! The fix is to pin every `FileScanConfig` to `preserve_order = true` +//! before execution. DataFusion's `FileScanConfig::create_sibling_state` +//! short-circuits to `None` when that flag is set, so no shared queue is +//! ever installed. Each partition then falls back to its own +//! `WorkSource::Local(file_groups[partition])` and scans exactly the files +//! the planner assigned to it. +//! +//! Notes: +//! * We can't just narrow `file_groups` per task, because broadcast hash +//! joins call `execute(0..K)` on the build-side `DataSourceExec` from +//! inside the join, so every partition slot must keep its files. TPC-H +//! Q11 hangs if you empty out the build-side slots — see +//! `ballista/client/tests/multi_file_scan.rs` for the simpler regression. +//! * `preserve_order = true` only disables file reordering at scan time; +//! it's already implicitly true whenever the config has an output +//! ordering, so the runtime path is well-exercised upstream. + +use std::sync::Arc; + +use datafusion::common::Result; +use datafusion::common::tree_node::{Transformed, TreeNode}; +use datafusion::datasource::physical_plan::{FileScanConfig, FileScanConfigBuilder}; +use datafusion::datasource::source::DataSourceExec; +use datafusion::physical_plan::ExecutionPlan; + +/// Rewrite every `FileScanConfig` in `plan` so its sibling work source is +/// suppressed, forcing each partition to scan only its own file group. +/// +/// The `partition` argument is the index of the partition this task will +/// execute. It is currently unused — pinning `preserve_order = true` is +/// enough to disable work stealing for any partition — but kept in the +/// signature so callers can stay symmetric across writer types and so a +/// future per-task narrowing scheme can drop in without touching them. +/// +/// If the leaf is something other than a `FileScanConfig`-backed +/// `DataSourceExec`, or the config is single-partition (and so already has +/// nothing to share), the node is returned unchanged. +pub fn restrict_file_scan_to_partition( + plan: Arc, + _partition: usize, +) -> Result> { + plan.transform_down(|node| { + let Some(data_source_exec) = node.downcast_ref::() else { + return Ok(Transformed::no(node)); + }; + let Some(file_scan) = data_source_exec + .data_source() + .downcast_ref::() + else { + return Ok(Transformed::no(node)); + }; + + // Single-partition scans don't trigger the work-stealing bug + // (there's nothing to steal from), and the flag is already set if + // the user opted into ordering preservation. + if file_scan.file_groups.len() <= 1 || file_scan.preserve_order { + return Ok(Transformed::no(node)); + } + + let new_config = FileScanConfigBuilder::from(file_scan.clone()) + .with_preserve_order(true) + .build(); + let new_exec = + DataSourceExec::from_data_source(new_config) as Arc; + Ok(Transformed::yes(new_exec)) + }) + .map(|t| t.data) +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::datasource::listing::PartitionedFile; + use datafusion::datasource::physical_plan::{FileGroup, ParquetSource}; + use datafusion::execution::object_store::ObjectStoreUrl; + use std::sync::Arc; + + fn dummy_file(name: &str) -> PartitionedFile { + PartitionedFile::new(name.to_string(), 0) + } + + fn build_plan(num_groups: usize) -> Arc { + let schema = Arc::new(Schema::new(vec![Field::new( + "value", + DataType::Int64, + false, + )])); + let groups: Vec = (0..num_groups) + .map(|i| FileGroup::new(vec![dummy_file(&format!("f{i}.parquet"))])) + .collect(); + let file_source = Arc::new(ParquetSource::new(schema.clone())); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source) + .with_file_groups(groups) + .build(); + DataSourceExec::from_data_source(config) as Arc + } + + fn file_scan(plan: &Arc) -> FileScanConfig { + let exec = plan + .downcast_ref::() + .expect("DataSourceExec"); + exec.data_source() + .downcast_ref::() + .expect("FileScanConfig") + .clone() + } + + #[test] + fn sets_preserve_order_to_disable_work_stealing() { + let plan = build_plan(4); + assert!( + !file_scan(&plan).preserve_order, + "test fixture should start with default preserve_order=false" + ); + let restricted = restrict_file_scan_to_partition(plan, 2).unwrap(); + let scan = file_scan(&restricted); + assert!( + scan.preserve_order, + "preserve_order must be set so create_sibling_state returns None and \ + the SharedWorkSource is never installed" + ); + } + + #[test] + fn keeps_all_files_in_their_original_groups() { + let plan = build_plan(3); + let restricted = restrict_file_scan_to_partition(plan, 1).unwrap(); + let scan = file_scan(&restricted); + let groups: Vec> = scan + .file_groups + .iter() + .map(|g| g.iter().map(|f| f.path().to_string()).collect()) + .collect(); + assert_eq!( + groups, + vec![ + vec!["f0.parquet".to_string()], + vec!["f1.parquet".to_string()], + vec!["f2.parquet".to_string()], + ], + "every file_groups slot must keep its files so broadcast hash joins \ + can still iterate the full set on the build side" + ); + } + + #[test] + fn single_partition_scan_is_left_alone() { + let plan = build_plan(1); + let restricted = restrict_file_scan_to_partition(Arc::clone(&plan), 0).unwrap(); + // Single-partition scans have nothing to steal; the transform skips + // them and returns the original Arc untouched. + assert!(Arc::ptr_eq(&plan, &restricted)); + } + + #[test] + fn preserves_partition_count() { + let plan = build_plan(3); + let restricted = restrict_file_scan_to_partition(plan, 1).unwrap(); + let scan = file_scan(&restricted); + assert_eq!( + scan.file_groups.len(), + 3, + "file_groups length must be preserved so DataSourceExec keeps its \ + advertised partition count" + ); + } +} diff --git a/ballista/core/src/execution_plans/shuffle_reader.rs b/ballista/core/src/execution_plans/shuffle_reader.rs index 8311c2a6a0..5b80c186dd 100644 --- a/ballista/core/src/execution_plans/shuffle_reader.rs +++ b/ballista/core/src/execution_plans/shuffle_reader.rs @@ -47,7 +47,6 @@ use itertools::Itertools; use log::{debug, error, trace}; use rand::prelude::SliceRandom; use rand::rng; -use std::any::Any; use std::collections::HashMap; use std::fmt::Debug; use std::fs::File; @@ -311,10 +310,6 @@ impl ExecutionPlan for ShuffleReaderExec { "ShuffleReaderExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -426,7 +421,7 @@ impl ExecutionPlan for ShuffleReaderExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result { + fn partition_statistics(&self, partition: Option) -> Result> { if self.broadcast { if let Some(idx) = partition && idx != 0 @@ -445,7 +440,7 @@ impl ExecutionPlan for ShuffleReaderExec { "broadcast shuffle reader at stage {} returned aggregated statistics: {:?}", self.stage_id, stats ); - return Ok(stats); + return Ok(Arc::new(stats)); } if let Some(idx) = partition { let partition_count = self.properties().partitioning.partition_count(); @@ -474,7 +469,7 @@ impl ExecutionPlan for ShuffleReaderExec { "shuffle reader at stage: {} and partition {} returned statistics: {:?}", self.stage_id, idx, stat_for_partition ); - stat_for_partition + stat_for_partition.map(Arc::new) } else { let stats_for_partitions = stats_for_partitions( self.schema.fields().len(), @@ -487,7 +482,7 @@ impl ExecutionPlan for ShuffleReaderExec { "shuffle reader at stage: {} returned statistics for all partitions: {:?}", self.stage_id, stats_for_partitions ); - Ok(stats_for_partitions) + Ok(Arc::new(stats_for_partitions)) } } } diff --git a/ballista/core/src/execution_plans/shuffle_writer.rs b/ballista/core/src/execution_plans/shuffle_writer.rs index a852b91fc5..c52c9a0c4c 100644 --- a/ballista/core/src/execution_plans/shuffle_writer.rs +++ b/ballista/core/src/execution_plans/shuffle_writer.rs @@ -24,7 +24,6 @@ use datafusion::arrow::ipc::CompressionType; use datafusion::arrow::ipc::writer::IpcWriteOptions; use datafusion::arrow::ipc::writer::StreamWriter; -use std::any::Any; use std::fmt::Debug; use std::fs; use std::fs::File; @@ -35,7 +34,7 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; -use crate::execution_plans::create_shuffle_path; +use crate::execution_plans::{create_shuffle_path, restrict_file_scan_to_partition}; use crate::extension::SessionConfigExt; use crate::utils; @@ -207,9 +206,13 @@ impl ShuffleWriterExec { ) -> impl Future>> { let write_metrics = ShuffleWriteMetrics::new(input_partition, &self.metrics); let output_partitioning = self.shuffle_output_partitioning.clone(); - let plan = self.plan.clone(); + // Restrict file scans to this task's partition so DataFusion 54's + // shared work queue can't pull files from sibling partitions; see + // [`restrict_file_scan_to_partition`] for the full story. + let plan = restrict_file_scan_to_partition(self.plan.clone(), input_partition); async move { + let plan = plan?; let now = Instant::now(); let channel_capacity = context .session_config() @@ -285,7 +288,7 @@ impl ShuffleWriterExec { exprs, num_output_partitions, repart_time, - ); + )?; while let Some(input_batch) = rx.blocking_recv() { partitioner.partition( @@ -437,10 +440,6 @@ impl ExecutionPlan for ShuffleWriterExec { "ShuffleWriterExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.plan.schema() } @@ -560,7 +559,7 @@ impl ExecutionPlan for ShuffleWriterExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result { + fn partition_statistics(&self, partition: Option) -> Result> { self.plan.partition_statistics(partition) } } @@ -787,7 +786,10 @@ mod tests { let batch = RecordBatch::try_new( schema.clone(), vec![ - Arc::new(UInt32Array::from(vec![Some(1), Some(3)])), + // 0 and 2 deliberately hash to different partitions under + // DataFusion 54's repartition hash seed; bumping the seed + // again may require picking new values here. + Arc::new(UInt32Array::from(vec![Some(0), Some(2)])), Arc::new(StringArray::from(vec![Some("hello"), Some("world")])), ], )?; diff --git a/ballista/core/src/execution_plans/sort_shuffle/writer.rs b/ballista/core/src/execution_plans/sort_shuffle/writer.rs index 8d88b7562d..7e0029ca67 100644 --- a/ballista/core/src/execution_plans/sort_shuffle/writer.rs +++ b/ballista/core/src/execution_plans/sort_shuffle/writer.rs @@ -21,7 +21,6 @@ //! per input partition, along with an index file mapping partition IDs to //! byte offsets. -use std::any::Any; use std::fs::File; use std::future::Future; use std::io::{BufWriter, Seek, Write}; @@ -35,7 +34,7 @@ use super::config::SortShuffleConfig; use super::index::ShuffleIndex; use super::partitioned_batch_iterator::PartitionedBatchIterator; use super::spill::SpillManager; -use crate::execution_plans::create_shuffle_path; +use crate::execution_plans::{create_shuffle_path, restrict_file_scan_to_partition}; use crate::serde::protobuf::ShuffleWritePartition; use datafusion::arrow::array::{ @@ -203,13 +202,17 @@ impl SortShuffleWriterExec { ) -> impl Future>> { let metrics = SortShuffleWriteMetrics::new(input_partition, &self.metrics); let config = self.config.clone(); - let plan = self.plan.clone(); + // Restrict file scans to this task's partition so DataFusion 54's + // shared work queue can't pull files from sibling partitions; see + // [`restrict_file_scan_to_partition`] for the full story. + let plan = restrict_file_scan_to_partition(self.plan.clone(), input_partition); let work_dir = self.work_dir.clone(); let job_id = self.job_id.clone(); let stage_id = self.stage_id; let partitioning = self.shuffle_output_partitioning.clone(); async move { + let plan = plan?; let now = Instant::now(); let mut stream = plan.execute(input_partition, context.clone())?; let schema = stream.schema(); @@ -538,10 +541,6 @@ impl ExecutionPlan for SortShuffleWriterExec { "SortShuffleWriterExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.plan.schema() } @@ -662,7 +661,7 @@ impl ExecutionPlan for SortShuffleWriterExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result { + fn partition_statistics(&self, partition: Option) -> Result> { self.plan.partition_statistics(partition) } } @@ -1163,7 +1162,8 @@ mod tests { // Reference: DataFusion's BatchPartitioner::new_hash_partitioner let mut ref_partitioner = - BatchPartitioner::new_hash_partitioner(exprs.clone(), 4, Time::default()); + BatchPartitioner::new_hash_partitioner(exprs.clone(), 4, Time::default()) + .unwrap(); let mut ref_assignments = [usize::MAX; 10]; ref_partitioner .partition(batch.clone(), |partition, sub_batch| { diff --git a/ballista/core/src/execution_plans/unresolved_shuffle.rs b/ballista/core/src/execution_plans/unresolved_shuffle.rs index 3f3567b6a9..64bfd8fec9 100644 --- a/ballista/core/src/execution_plans/unresolved_shuffle.rs +++ b/ballista/core/src/execution_plans/unresolved_shuffle.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::sync::Arc; use datafusion::arrow::datatypes::SchemaRef; @@ -192,10 +191,6 @@ impl ExecutionPlan for UnresolvedShuffleExec { "UnresolvedShuffleExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.schema.clone() } diff --git a/ballista/core/src/planner.rs b/ballista/core/src/planner.rs index 54e3f605f0..5ceb66f17b 100644 --- a/ballista/core/src/planner.rs +++ b/ballista/core/src/planner.rs @@ -308,12 +308,10 @@ mod test { assert!(matches!(analyze_df.logical_plan(), LogicalPlan::Analyze(_))); let explain = plan - .as_any() .downcast_ref::>() .unwrap(); assert!( explain.children()[0] - .as_any() .downcast_ref::>() .is_some() ); diff --git a/ballista/core/src/registry.rs b/ballista/core/src/registry.rs index 1d45e7b58a..0d291ed00e 100644 --- a/ballista/core/src/registry.rs +++ b/ballista/core/src/registry.rs @@ -21,7 +21,7 @@ use datafusion::functions::all_default_functions; use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; use datafusion::logical_expr::planner::ExprPlanner; -use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use datafusion::logical_expr::{AggregateUDF, HigherOrderUDF, ScalarUDF, WindowUDF}; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -135,6 +135,19 @@ impl FunctionRegistry for BallistaFunctionRegistry { )) }) } + + fn higher_order_function_names(&self) -> HashSet { + HashSet::new() + } + + fn higher_order_function( + &self, + name: &str, + ) -> datafusion::common::Result> { + Err(DataFusionError::Internal(format!( + "There is no higher order function named \"{name}\" in the TaskContext" + ))) + } } impl From<&SessionState> for BallistaFunctionRegistry { diff --git a/ballista/core/src/serde/mod.rs b/ballista/core/src/serde/mod.rs index b908674337..6c2a6e8c4d 100644 --- a/ballista/core/src/serde/mod.rs +++ b/ballista/core/src/serde/mod.rs @@ -36,6 +36,7 @@ use datafusion_proto::physical_plan::from_proto::parse_protobuf_partitioning; use datafusion_proto::physical_plan::to_proto::serialize_partitioning; use datafusion_proto::physical_plan::{ DefaultPhysicalExtensionCodec, DefaultPhysicalProtoConverter, + PhysicalPlanDecodeContext, }; use datafusion_proto::protobuf::proto_error; use datafusion_proto::protobuf::{LogicalPlanNode, PhysicalPlanNode}; @@ -381,15 +382,15 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { ) })?; let converter = DefaultPhysicalProtoConverter {}; + let decode_ctx = PhysicalPlanDecodeContext::new(ctx, self.default_codec.as_ref()); match ballista_plan { PhysicalPlanType::ShuffleWriter(shuffle_writer) => { let input = inputs[0].clone(); let shuffle_output_partitioning = parse_protobuf_hash_partitioning( shuffle_writer.output_partitioning.as_ref(), - ctx, + &decode_ctx, input.schema().as_ref(), - self.default_codec.as_ref(), &converter, )?; @@ -406,9 +407,8 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { let shuffle_output_partitioning = parse_protobuf_hash_partitioning( sort_shuffle_writer.output_partitioning.as_ref(), - ctx, + &decode_ctx, input.schema().as_ref(), - self.default_codec.as_ref(), &converter, )?; @@ -460,9 +460,8 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { .collect::, DataFusionError>>()?; let partitioning = parse_protobuf_partitioning( shuffle_reader.partitioning.as_ref(), - ctx, + &decode_ctx, schema.as_ref(), - self.default_codec.as_ref(), &converter, )?; let partitioning = partitioning @@ -503,9 +502,8 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { Arc::new(convert_required!(unresolved_shuffle.schema)?); let partitioning = parse_protobuf_partitioning( unresolved_shuffle.partitioning.as_ref(), - ctx, + &decode_ctx, schema.as_ref(), - self.default_codec.as_ref(), &converter, )?; let partitioning = partitioning @@ -540,7 +538,7 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { node: Arc, buf: &mut Vec, ) -> Result<(), DataFusionError> { - if let Some(exec) = node.as_any().downcast_ref::() { + if let Some(exec) = node.downcast_ref::() { // note that we use shuffle_output_partitioning() rather than output_partitioning() // to get the true output partitioning let output_partitioning = match exec.shuffle_output_partitioning() { @@ -579,7 +577,7 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { })?; Ok(()) - } else if let Some(exec) = node.as_any().downcast_ref::() { + } else if let Some(exec) = node.downcast_ref::() { let output_partitioning = match exec.shuffle_output_partitioning() { Partitioning::Hash(exprs, partition_count) => { Some(datafusion_proto::protobuf::PhysicalHashRepartition { @@ -622,7 +620,7 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { })?; Ok(()) - } else if let Some(exec) = node.as_any().downcast_ref::() { + } else if let Some(exec) = node.downcast_ref::() { let stage_id = exec.stage_id as u32; let mut partition = vec![]; for location in &exec.partition { @@ -665,7 +663,7 @@ impl PhysicalExtensionCodec for BallistaPhysicalExtensionCodec { })?; Ok(()) - } else if let Some(exec) = node.as_any().downcast_ref::() { + } else if let Some(exec) = node.downcast_ref::() { let converter = DefaultPhysicalProtoConverter {}; let partitioning = serialize_partitioning( &exec.properties().partitioning, @@ -800,7 +798,6 @@ mod test { let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected UnresolvedShuffleExec"); @@ -837,7 +834,6 @@ mod test { let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected ShuffleReaderExec"); @@ -880,7 +876,6 @@ mod test { let ctx = SessionContext::new().task_ctx(); let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected ShuffleReaderExec"); @@ -934,7 +929,6 @@ mod test { let ctx = SessionContext::new().task_ctx(); let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected ShuffleReaderExec"); @@ -980,7 +974,6 @@ mod test { let ctx = SessionContext::new().task_ctx(); let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected UnresolvedShuffleExec"); @@ -1031,7 +1024,6 @@ mod test { let ctx = SessionContext::new().task_ctx(); let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected ShuffleReaderExec"); @@ -1129,7 +1121,6 @@ mod test { let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected UnresolvedShuffleExec"); @@ -1156,7 +1147,6 @@ mod test { let decoded_plan = codec.try_decode(&buf, &[], &ctx).unwrap(); let decoded_exec = decoded_plan - .as_any() .downcast_ref::() .expect("Expected ShuffleReaderExec"); diff --git a/ballista/core/src/serde/scheduler/from_proto.rs b/ballista/core/src/serde/scheduler/from_proto.rs index b99cb274fb..4b2601cbb5 100644 --- a/ballista/core/src/serde/scheduler/from_proto.rs +++ b/ballista/core/src/serde/scheduler/from_proto.rs @@ -361,6 +361,7 @@ pub fn get_task_definition &dyn Any { - self - } - fn schema(&self) -> SchemaRef { self.plan.schema() } @@ -125,7 +121,7 @@ impl ExecutionPlan for CollectExec { })) } - fn partition_statistics(&self, partition: Option) -> Result { + fn partition_statistics(&self, partition: Option) -> Result> { self.plan.partition_statistics(partition) } } diff --git a/ballista/executor/src/execution_engine.rs b/ballista/executor/src/execution_engine.rs index c57d104227..a54971b5e7 100644 --- a/ballista/executor/src/execution_engine.rs +++ b/ballista/executor/src/execution_engine.rs @@ -114,7 +114,7 @@ impl ExecutionEngine for DefaultExecutionEngine { ) -> Result> { let plan = plan .transform(|p| { - if let Some(reader) = p.as_any().downcast_ref::() { + if let Some(reader) = p.downcast_ref::() { match &self.client_pool { Some(client_pool) => Ok(Transformed::yes(Arc::new( reader @@ -133,7 +133,7 @@ impl ExecutionEngine for DefaultExecutionEngine { // the query plan created by the scheduler always starts with a shuffle writer // (either ShuffleWriterExec or SortShuffleWriterExec) - if let Some(shuffle_writer) = plan.as_any().downcast_ref::() { + if let Some(shuffle_writer) = plan.downcast_ref::() { // recreate the shuffle writer with the correct working directory let exec = ShuffleWriterExec::try_new( job_id, @@ -146,7 +146,7 @@ impl ExecutionEngine for DefaultExecutionEngine { ShuffleWriterVariant::Hash(exec), ))) } else if let Some(sort_shuffle_writer) = - plan.as_any().downcast_ref::() + plan.downcast_ref::() { // recreate the sort shuffle writer with the correct working directory let exec = SortShuffleWriterExec::try_new( diff --git a/ballista/executor/src/execution_loop.rs b/ballista/executor/src/execution_loop.rs index 6cc8c1192f..1d22b3e5ae 100644 --- a/ballista/executor/src/execution_loop.rs +++ b/ballista/executor/src/execution_loop.rs @@ -40,6 +40,7 @@ use datafusion_proto::physical_plan::AsExecutionPlan; use futures::FutureExt; use log::{debug, error, info, trace, warn}; use std::any::Any; +use std::collections::HashMap; use std::convert::TryInto; use std::error::Error; use std::sync::mpsc::{Receiver, Sender, TryRecvError}; @@ -264,6 +265,7 @@ async fn run_received_task &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::new(Schema::empty()) } diff --git a/ballista/executor/src/executor_server.rs b/ballista/executor/src/executor_server.rs index c87c1322b5..ef257bba55 100644 --- a/ballista/executor/src/executor_server.rs +++ b/ballista/executor/src/executor_server.rs @@ -398,6 +398,7 @@ impl ExecutorServer Result>> { - let transformed = - if let Some(hash_join) = plan.as_any().downcast_ref::() { - match hash_join.partition_mode() { - PartitionMode::Auto => try_collect_left( - hash_join, - false, - collect_threshold_byte_size, - collect_threshold_num_rows, - )? + let transformed = if let Some(hash_join) = plan.downcast_ref::() { + match hash_join.partition_mode() { + PartitionMode::Auto => try_collect_left( + hash_join, + false, + collect_threshold_byte_size, + collect_threshold_num_rows, + )? + .map_or_else( + || partitioned_hash_join(hash_join).map(Some), + |v| Ok(Some(v)), + )?, + PartitionMode::CollectLeft => try_collect_left(hash_join, true, 0, 0)? .map_or_else( || partitioned_hash_join(hash_join).map(Some), |v| Ok(Some(v)), )?, - PartitionMode::CollectLeft => try_collect_left(hash_join, true, 0, 0)? - .map_or_else( - || partitioned_hash_join(hash_join).map(Some), - |v| Ok(Some(v)), - )?, - PartitionMode::Partitioned => { - let left = hash_join.left(); - let right = hash_join.right(); - if hash_join.join_type().supports_swap() - && should_swap_join_order(&**left, &**right)? - { - hash_join - .swap_inputs(PartitionMode::Partitioned) - .map(Some)? - } else { - None - } + PartitionMode::Partitioned => { + let left = hash_join.left(); + let right = hash_join.right(); + if hash_join.join_type().supports_swap() + && should_swap_join_order(&**left, &**right)? + { + hash_join + .swap_inputs(PartitionMode::Partitioned) + .map(Some)? + } else { + None } } - } else if let Some(cross_join) = plan.as_any().downcast_ref::() { - let left = cross_join.left(); - let right = cross_join.right(); - if right.properties().output_partitioning().partition_count() > 1 { - None - } else if should_swap_join_order(&**left, &**right)? { - cross_join.swap_inputs().map(Some)? - } else { - None - } - } else if let Some(nl_join) = plan.as_any().downcast_ref::() { - let left = nl_join.left(); - let right = nl_join.right(); - // next few lines are different from original datafusion rule - // partition count of right side has to be equal one to be - // able to swap inputs - if right.properties().output_partitioning().partition_count() > 1 { - None - } else if nl_join.join_type().supports_swap() - && should_swap_join_order(&**left, &**right)? - { - nl_join.swap_inputs().map(Some)? - } else { - None - } + } + } else if let Some(cross_join) = plan.downcast_ref::() { + let left = cross_join.left(); + let right = cross_join.right(); + if right.properties().output_partitioning().partition_count() > 1 { + None + } else if should_swap_join_order(&**left, &**right)? { + cross_join.swap_inputs().map(Some)? } else { None - }; + } + } else if let Some(nl_join) = plan.downcast_ref::() { + let left = nl_join.left(); + let right = nl_join.right(); + // next few lines are different from original datafusion rule + // partition count of right side has to be equal one to be + // able to swap inputs + if right.properties().output_partitioning().partition_count() > 1 { + None + } else if nl_join.join_type().supports_swap() + && should_swap_join_order(&**left, &**right)? + { + nl_join.swap_inputs().map(Some)? + } else { + None + } + } else { + None + }; Ok(if let Some(transformed) = transformed { Transformed::yes(transformed) @@ -416,7 +415,7 @@ fn hash_join_convert_symmetric_subrule( config_options: &ConfigOptions, ) -> Result> { // Check if the current plan node is a HashJoinExec. - if let Some(hash_join) = input.as_any().downcast_ref::() { + if let Some(hash_join) = input.downcast_ref::() { let left_unbounded = hash_join.left.boundedness().is_unbounded(); let left_incremental = matches!( hash_join.left.pipeline_behavior(), @@ -556,7 +555,7 @@ pub fn hash_join_swap_subrule( mut input: Arc, _config_options: &ConfigOptions, ) -> Result> { - if let Some(hash_join) = input.as_any().downcast_ref::() + if let Some(hash_join) = input.downcast_ref::() && hash_join.left.boundedness().is_unbounded() && !hash_join.right.boundedness().is_unbounded() && matches!( @@ -830,7 +829,7 @@ mod test { // `swap_inputs` for Inner wraps the join in a ProjectionExec to // restore the output column order. Walk the tree to find the join. fn find_hash_join(plan: &Arc) -> Option<&HashJoinExec> { - if let Some(hj) = plan.as_any().downcast_ref::() { + if let Some(hj) = plan.downcast_ref::() { return Some(hj); } for child in plan.children() { diff --git a/ballista/scheduler/src/planner.rs b/ballista/scheduler/src/planner.rs index 04405d9fd2..182c90c60f 100644 --- a/ballista/scheduler/src/planner.rs +++ b/ballista/scheduler/src/planner.rs @@ -38,6 +38,7 @@ use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::joins::{HashJoinExec, PartitionMode}; use datafusion::physical_plan::repartition::RepartitionExec; +use datafusion::physical_plan::scalar_subquery::ScalarSubqueryExec; use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion::physical_plan::{ ExecutionPlan, Partitioning, with_new_children_if_necessary, @@ -132,6 +133,23 @@ impl DefaultDistributedPlanner { // Apply broadcast-join promotion before recursing. let execution_plan = Self::maybe_promote_to_broadcast(execution_plan, config)?; + // ScalarSubqueryExec must travel with its embedded ScalarSubqueryExpr + // nodes in the same serialized plan, otherwise the executor cannot + // deserialise them: the proto codec only installs the + // ScalarSubqueryResults context while it decodes the input under a + // surrounding ScalarSubqueryExec, and a bare ScalarSubqueryExpr + // returns "ScalarSubqueryExpr can only be deserialized as part of a + // surrounding ScalarSubqueryExec". Treat the whole subtree (main + // input + subqueries) as opaque so it stays inside one Ballista + // stage; any internal RepartitionExec / SortPreservingMergeExec runs + // in-process under the wrapping ScalarSubqueryExec instead of being + // hoisted into separate distributed stages. TPC-H Q11 hits this and + // would otherwise hang forever as the executor rejects the bad plan + // and the scheduler keeps retrying. + if execution_plan.is::() { + return Ok((execution_plan, vec![])); + } + // recurse down and replace children if execution_plan.children().is_empty() { return Ok((execution_plan, vec![])); @@ -139,15 +157,13 @@ impl DefaultDistributedPlanner { // Broadcast-join lowering: HashJoinExec(CollectLeft) gets its own // controlled recursion so the build side is written as a broadcast stage. - if let Some(hash_join) = execution_plan.as_any().downcast_ref::() + if let Some(hash_join) = execution_plan.downcast_ref::() && *hash_join.partition_mode() == PartitionMode::CollectLeft { // Build subtree: peel CoalescePartitionsExec if present, then // recurse to lower its internal stages. let mut build = hash_join.left().clone(); - if let Some(coalesce) = - build.as_any().downcast_ref::() - { + if let Some(coalesce) = build.downcast_ref::() { build = coalesce.children()[0].clone(); } let (build, mut stages) = @@ -191,10 +207,7 @@ impl DefaultDistributedPlanner { stages.append(&mut child_stages); } - if let Some(_coalesce) = execution_plan - .as_any() - .downcast_ref::() - { + if let Some(_coalesce) = execution_plan.downcast_ref::() { let input = children[0].clone(); let input = self.optimizer_enforce_sorting.optimize(input, config)?; let shuffle_writer = create_shuffle_writer_with_config( @@ -211,10 +224,9 @@ impl DefaultDistributedPlanner { with_new_children_if_necessary(execution_plan, vec![unresolved_shuffle])?, stages, )) - } else if let Some(_sort_preserving_merge) = execution_plan - .as_any() - .downcast_ref::( - ) { + } else if let Some(_sort_preserving_merge) = + execution_plan.downcast_ref::() + { let shuffle_writer = create_shuffle_writer_with_config( job_id, self.next_stage_id(), @@ -228,9 +240,7 @@ impl DefaultDistributedPlanner { with_new_children_if_necessary(execution_plan, vec![unresolved_shuffle])?, stages, )) - } else if let Some(repart) = - execution_plan.as_any().downcast_ref::() - { + } else if let Some(repart) = execution_plan.downcast_ref::() { match repart.properties().output_partitioning() { Partitioning::Hash(_, _) => { let input = children[0].clone(); @@ -288,7 +298,7 @@ impl DefaultDistributedPlanner { debug!("broadcast check: threshold is 0, broadcast disabled"); return Ok(plan); } - let Some(hash_join) = plan.as_any().downcast_ref::() else { + let Some(hash_join) = plan.downcast_ref::() else { return Ok(plan); }; debug!( @@ -391,7 +401,6 @@ impl DefaultDistributedPlanner { }; let promoted_join = promoted - .as_any() .downcast_ref::() .expect("promoted plan must still be a HashJoinExec"); let new_left: Arc = if promoted_join @@ -429,9 +438,7 @@ fn create_unresolved_shuffle( pub fn find_unresolved_shuffles( plan: &Arc, ) -> Result> { - if let Some(unresolved_shuffle) = - plan.as_any().downcast_ref::() - { + if let Some(unresolved_shuffle) = plan.downcast_ref::() { Ok(vec![unresolved_shuffle.clone()]) } else { Ok(plan @@ -454,9 +461,7 @@ pub fn remove_unresolved_shuffles( ) -> Result> { let mut new_children: Vec> = vec![]; for child in stage.children() { - if let Some(unresolved_shuffle) = - child.as_any().downcast_ref::() - { + if let Some(unresolved_shuffle) = child.downcast_ref::() { let p = partition_locations .get(&unresolved_shuffle.stage_id) .ok_or_else(|| { @@ -517,7 +522,7 @@ pub fn rollback_resolved_shuffles( ) -> Result> { let mut new_children: Vec> = vec![]; for child in stage.children() { - if let Some(shuffle_reader) = child.as_any().downcast_ref::() { + if let Some(shuffle_reader) = child.downcast_ref::() { let stage_id = shuffle_reader.stage_id; let unresolved = if shuffle_reader.broadcast { Arc::new(UnresolvedShuffleExec::new_broadcast( @@ -614,7 +619,7 @@ mod test { macro_rules! downcast_exec { ($exec: expr, $ty: ty) => { - $exec.as_any().downcast_ref::<$ty>().expect(&format!( + $exec.downcast_ref::<$ty>().expect(&format!( "Downcast to {} failed. Got {:?}", stringify!($ty), $exec @@ -915,11 +920,10 @@ order by let mut walker: Vec> = vec![stage.clone() as Arc]; while let Some(node) = walker.pop() { - if let Some(hj) = node.as_any().downcast_ref::() { + if let Some(hj) = node.downcast_ref::() { assert_eq!(*hj.partition_mode(), PartitionMode::CollectLeft); let left = hj.children()[0].clone(); let unresolved = left - .as_any() .downcast_ref::() .expect("left input should be UnresolvedShuffleExec"); assert!(unresolved.broadcast, "left input should be broadcast"); @@ -959,15 +963,13 @@ order by let mut walker: Vec> = vec![stage.clone() as Arc]; while let Some(node) = walker.pop() { - if let Some(unresolved) = - node.as_any().downcast_ref::() - { + if let Some(unresolved) = node.downcast_ref::() { assert!( !unresolved.broadcast, "no broadcast reader expected with threshold=0" ); } - if let Some(hj) = node.as_any().downcast_ref::() { + if let Some(hj) = node.downcast_ref::() { assert_ne!( *hj.partition_mode(), PartitionMode::CollectLeft, @@ -1009,8 +1011,7 @@ order by let mut walker: Vec> = vec![stage.clone() as Arc]; while let Some(node) = walker.pop() { - if let Some(unresolved) = - node.as_any().downcast_ref::() + if let Some(unresolved) = node.downcast_ref::() && unresolved.broadcast { max_upstream = max_upstream.max(unresolved.upstream_partition_count); @@ -1080,7 +1081,6 @@ order by let resolved_child = resolved.children()[0].clone(); let reader = resolved_child - .as_any() .downcast_ref::() .expect("expected resolved ShuffleReaderExec"); assert!(reader.broadcast); @@ -1110,7 +1110,6 @@ order by let rolled_back = crate::planner::rollback_resolved_shuffles(parent)?; let child = rolled_back.children()[0].clone(); let unresolved = child - .as_any() .downcast_ref::() .expect("expected rolled-back UnresolvedShuffleExec"); assert!(unresolved.broadcast); @@ -1180,13 +1179,13 @@ order by assert_eq!(3, stages.len()); // stage0 - let stage0 = stages[0].clone(); + let stage0 = stages[0].as_ref() as &dyn ExecutionPlan; let shuffle_write = downcast_exec!(stage0, SortShuffleWriterExec); let partitioning = shuffle_write.shuffle_output_partitioning(); assert_eq!(2, partitioning.partition_count()); let partition_col = match partitioning { Partitioning::Hash(exprs, 2) => match exprs.as_slice() { - [col] => col.as_any().downcast_ref::(), + [col] => col.downcast_ref::(), _ => None, }, _ => None, @@ -1200,7 +1199,7 @@ order by let window = downcast_exec!(filter.children()[0], BoundedWindowAggExec); let partition_by = window.partition_keys(); let partition_by = match partition_by[..] { - [ref col] => col.as_any().downcast_ref::(), + [ref col] => col.downcast_ref::(), _ => None, }; assert_eq!(Some(&Column::new("l_shipmode", 1)), partition_by); @@ -1217,7 +1216,7 @@ order by ); assert_eq!( Some(&Column::new("l_shipmode", 1)), - expr1.expr.as_any().downcast_ref() + expr1.expr.downcast_ref() ); assert_eq!( SortOptions { @@ -1228,7 +1227,7 @@ order by ); assert_eq!( Some(&Column::new("l_shipdate", 0)), - expr2.expr.as_any().downcast_ref() + expr2.expr.downcast_ref() ); } _ => panic!("invalid sort {sort:?}"), diff --git a/ballista/scheduler/src/state/aqe/adapter.rs b/ballista/scheduler/src/state/aqe/adapter.rs index dae2a2944e..02edef0079 100644 --- a/ballista/scheduler/src/state/aqe/adapter.rs +++ b/ballista/scheduler/src/state/aqe/adapter.rs @@ -44,7 +44,7 @@ impl BallistaAdapter { &mut self, plan: Arc, ) -> datafusion::error::Result>> { - if let Some(exchange) = plan.as_any().downcast_ref::() { + if let Some(exchange) = plan.downcast_ref::() { let schema = exchange.schema().clone(); let partitions = exchange.shuffle_partitions().ok_or_else(|| { DataFusionError::Execution( @@ -111,7 +111,7 @@ impl BallistaAdapter { job_id: &str, config: &ConfigOptions, ) -> datafusion::error::Result { - if let Some(root) = plan.as_any().downcast_ref::() { + if let Some(root) = plan.downcast_ref::() { let mut adapter = BallistaAdapter::default(); let plan = root .input() @@ -138,8 +138,7 @@ impl BallistaAdapter { plan: writer, inputs: adapter.inputs, }) - } else if let Some(root) = plan.as_any().downcast_ref::() - { + } else if let Some(root) = plan.downcast_ref::() { let mut adapter = BallistaAdapter::default(); let plan = root .input() diff --git a/ballista/scheduler/src/state/aqe/execution_plan/adaptive.rs b/ballista/scheduler/src/state/aqe/execution_plan/adaptive.rs index 1a1c3962c4..9bc7451ae7 100644 --- a/ballista/scheduler/src/state/aqe/execution_plan/adaptive.rs +++ b/ballista/scheduler/src/state/aqe/execution_plan/adaptive.rs @@ -22,7 +22,6 @@ use datafusion::{ physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}, }; use parking_lot::Mutex; -use std::any::Any; use std::fmt::Formatter; use std::sync::atomic::AtomicBool; use std::sync::{Arc, atomic::AtomicI64}; @@ -126,10 +125,6 @@ impl ExecutionPlan for AdaptiveDatafusionExec { "AdaptiveDatafusionExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn properties(&self) -> &Arc { self.input.properties() } diff --git a/ballista/scheduler/src/state/aqe/execution_plan/exchange.rs b/ballista/scheduler/src/state/aqe/execution_plan/exchange.rs index 7577878cd9..41c25b25f4 100644 --- a/ballista/scheduler/src/state/aqe/execution_plan/exchange.rs +++ b/ballista/scheduler/src/state/aqe/execution_plan/exchange.rs @@ -29,7 +29,6 @@ use datafusion::{ }; use log::trace; use parking_lot::Mutex; -use std::any::Any; use std::ops::Deref; use std::sync::{Arc, atomic::AtomicI64}; @@ -267,10 +266,6 @@ impl ExecutionPlan for ExchangeExec { "ExchangeExec" } - fn as_any(&self) -> &dyn Any { - self - } - fn properties(&self) -> &Arc { &self.properties } @@ -321,7 +316,7 @@ impl ExecutionPlan for ExchangeExec { )) } - fn partition_statistics(&self, partition: Option) -> Result { + fn partition_statistics(&self, partition: Option) -> Result> { let schema = self.input.schema(); match self.shuffle_partitions.lock().deref() { // @@ -346,7 +341,7 @@ impl ExecutionPlan for ExchangeExec { "shuffle reader at stage: {:?} and partition {} returned statistics: {:?}", self.stage_id, idx, stat_for_partition ); - stat_for_partition + stat_for_partition.map(Arc::new) } else { let stats_for_partitions = stats_for_partitions( schema.fields().len(), @@ -359,10 +354,10 @@ impl ExecutionPlan for ExchangeExec { "shuffle reader at stage: {:?} returned statistics for all partitions: {:?}", self.stage_id, stats_for_partitions ); - Ok(stats_for_partitions) + Ok(Arc::new(stats_for_partitions)) } } - None => Ok(Statistics::new_unknown(&schema)), + None => Ok(Arc::new(Statistics::new_unknown(&schema))), } } } diff --git a/ballista/scheduler/src/state/aqe/optimizer_rule/coalesce_partitions.rs b/ballista/scheduler/src/state/aqe/optimizer_rule/coalesce_partitions.rs index 6efd6b332a..b4b25346bb 100644 --- a/ballista/scheduler/src/state/aqe/optimizer_rule/coalesce_partitions.rs +++ b/ballista/scheduler/src/state/aqe/optimizer_rule/coalesce_partitions.rs @@ -155,7 +155,7 @@ impl PhysicalOptimizerRule for CoalescePartitionsRule { ); // Get the subtree below the root. Two root kinds, same outcome. - let input = if let Some(ex) = plan.as_any().downcast_ref::() { + let input = if let Some(ex) = plan.downcast_ref::() { debug!( "[coalesce-rule] root=ExchangeExec plan_id={} stage_id={:?} stage_resolved={}", ex.plan_id, @@ -163,7 +163,7 @@ impl PhysicalOptimizerRule for CoalescePartitionsRule { ex.shuffle_partitions().is_some(), ); ex.input().clone() - } else if let Some(adp) = plan.as_any().downcast_ref::() { + } else if let Some(adp) = plan.downcast_ref::() { debug!( "[coalesce-rule] root=AdaptiveDatafusionExec stage_id={:?}", adp.stage_id(), @@ -182,7 +182,7 @@ impl PhysicalOptimizerRule for CoalescePartitionsRule { // *this* stage's group, they belong to whatever stage wrote them. let mut leaves: Vec> = Vec::new(); input.apply(|node| { - if node.as_any().is::() { + if node.is::() { leaves.push(node.clone()); Ok(TreeNodeRecursion::Jump) } else { @@ -192,8 +192,7 @@ impl PhysicalOptimizerRule for CoalescePartitionsRule { // Helper: downcast each Arc back to &ExchangeExec. fn as_exchange(arc: &Arc) -> &ExchangeExec { - arc.as_any() - .downcast_ref::() + arc.downcast_ref::() .expect("filtered to ExchangeExec above") } diff --git a/ballista/scheduler/src/state/aqe/optimizer_rule/distributed_exchange.rs b/ballista/scheduler/src/state/aqe/optimizer_rule/distributed_exchange.rs index 739cf460d8..f7e6a6812a 100644 --- a/ballista/scheduler/src/state/aqe/optimizer_rule/distributed_exchange.rs +++ b/ballista/scheduler/src/state/aqe/optimizer_rule/distributed_exchange.rs @@ -41,12 +41,9 @@ impl DistributedExchangeRule { &self, execution_plan: Arc, ) -> datafusion::error::Result>> { - if let Some(coalesce) = execution_plan - .as_any() - .downcast_ref::() - { + if let Some(coalesce) = execution_plan.downcast_ref::() { let input = coalesce.input(); - if input.as_any().downcast_ref::().is_none() + if input.downcast_ref::().is_none() && !matches!(nearest_exchange_status(input), ExchangeStatus::Unresolved) { let exchange_exec = ExchangeExec::new( @@ -59,12 +56,11 @@ impl DistributedExchangeRule { execution_plan.with_new_children(vec![Arc::new(exchange_exec)])?, )); } - } else if let Some(sort_preserving_merge) = execution_plan - .as_any() - .downcast_ref::( - ) { + } else if let Some(sort_preserving_merge) = + execution_plan.downcast_ref::() + { let input = sort_preserving_merge.input(); - if input.as_any().downcast_ref::().is_none() + if input.downcast_ref::().is_none() && !matches!(nearest_exchange_status(input), ExchangeStatus::Unresolved) { let exchange_exec = ExchangeExec::new( @@ -77,8 +73,7 @@ impl DistributedExchangeRule { execution_plan.with_new_children(vec![Arc::new(exchange_exec)])?, )); } - } else if let Some(repartition) = - execution_plan.as_any().downcast_ref::() + } else if let Some(repartition) = execution_plan.downcast_ref::() && let execution_plan::Partitioning::Hash(_, _) = repartition.partitioning() { let input = repartition.input(); @@ -108,7 +103,6 @@ impl PhysicalOptimizerRule for DistributedExchangeRule { if result .data - .as_any() .downcast_ref::() .is_some() { @@ -139,7 +133,7 @@ impl PhysicalOptimizerRule for DistributedExchangeRule { /// (short-circuits), `Resolved` if every branch that has an exchange has a resolved /// one, and `None` if no exchange is found anywhere. fn nearest_exchange_status(plan: &Arc) -> ExchangeStatus { - if let Some(exchange) = plan.as_any().downcast_ref::() { + if let Some(exchange) = plan.downcast_ref::() { if exchange.shuffle_created() && !exchange.inactive_stage { ExchangeStatus::Resolved } else { @@ -237,25 +231,18 @@ mod tests { let result = rule.optimize(coalesce, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let coalesce_out = adaptive .input() - .as_any() .downcast_ref::() .unwrap(); let child = coalesce_out.children()[0]; assert!( - child.as_any().downcast_ref::().is_some(), + child.downcast_ref::().is_some(), "direct child should remain ExchangeExec" ); assert!( - child.children()[0] - .as_any() - .downcast_ref::() - .is_none(), + child.children()[0].downcast_ref::().is_none(), "ExchangeExec should not wrap another ExchangeExec" ); } @@ -273,18 +260,13 @@ mod tests { let result = rule.optimize(outer, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let outer_coalesce = adaptive .input() - .as_any() .downcast_ref::() .unwrap(); assert!( outer_coalesce.children()[0] - .as_any() .downcast_ref::() .is_none(), "should not inject ExchangeExec when unresolved exchange is in subtree" @@ -303,18 +285,13 @@ mod tests { let result = rule.optimize(outer, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let outer_coalesce = adaptive .input() - .as_any() .downcast_ref::() .unwrap(); assert!( outer_coalesce.children()[0] - .as_any() .downcast_ref::() .is_some(), "should inject ExchangeExec when subtree only has resolved exchanges" @@ -330,20 +307,13 @@ mod tests { let result = rule.optimize(input, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let spm = adaptive .input() - .as_any() .downcast_ref::() .expect("child should be SortPreservingMergeExec"); assert!( - spm.children()[0] - .as_any() - .downcast_ref::() - .is_some(), + spm.children()[0].downcast_ref::().is_some(), "SortPreservingMergeExec should have ExchangeExec injected as its child" ); } @@ -355,23 +325,14 @@ mod tests { let result = rule.optimize(input, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let spm = adaptive .input() - .as_any() .downcast_ref::() .unwrap(); let child = spm.children()[0]; - assert!(child.as_any().downcast_ref::().is_some()); - assert!( - child.children()[0] - .as_any() - .downcast_ref::() - .is_none() - ); + assert!(child.downcast_ref::().is_some()); + assert!(child.children()[0].downcast_ref::().is_none()); } #[test] @@ -384,20 +345,13 @@ mod tests { let result = rule.optimize(input, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let spm = adaptive .input() - .as_any() .downcast_ref::() .unwrap(); assert!( - spm.children()[0] - .as_any() - .downcast_ref::() - .is_none(), + spm.children()[0].downcast_ref::().is_none(), "should not inject ExchangeExec when unresolved exchange is in subtree" ); } @@ -415,13 +369,9 @@ mod tests { let result = rule.optimize(repartition, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); let exchange = adaptive .input() - .as_any() .downcast_ref::() .expect("Hash RepartitionExec should be replaced with ExchangeExec"); assert!( @@ -440,16 +390,9 @@ mod tests { let result = rule.optimize(repartition, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); assert!( - adaptive - .input() - .as_any() - .downcast_ref::() - .is_some(), + adaptive.input().downcast_ref::().is_some(), "RoundRobin repartition should be kept as-is (not replaced)" ); } @@ -468,16 +411,9 @@ mod tests { let result = rule.optimize(repartition, &config()).unwrap(); - let adaptive = result - .as_any() - .downcast_ref::() - .unwrap(); + let adaptive = result.downcast_ref::().unwrap(); assert!( - adaptive - .input() - .as_any() - .downcast_ref::() - .is_some(), + adaptive.input().downcast_ref::().is_some(), "Hash repartition should be kept when input has an unresolved exchange" ); } @@ -489,10 +425,7 @@ mod tests { let rule = DistributedExchangeRule::default(); let result = rule.optimize(leaf_exec(), &config()).unwrap(); assert!( - result - .as_any() - .downcast_ref::() - .is_some(), + result.downcast_ref::().is_some(), "optimize should always wrap the result in AdaptiveDatafusionExec" ); } @@ -506,13 +439,11 @@ mod tests { let result = rule.optimize(adaptive, &config()).unwrap(); let outer = result - .as_any() .downcast_ref::() .expect("result should be AdaptiveDatafusionExec"); assert!( outer .input() - .as_any() .downcast_ref::() .is_none(), "existing AdaptiveDatafusionExec should not be wrapped in another one" @@ -605,15 +536,12 @@ mod tests { ) .unwrap(); let exchange1 = result1 - .as_any() .downcast_ref::() .unwrap() .input() - .as_any() .downcast_ref::() .unwrap() .children()[0] - .as_any() .downcast_ref::() .unwrap(); assert_eq!( @@ -628,15 +556,12 @@ mod tests { ) .unwrap(); let exchange2 = result2 - .as_any() .downcast_ref::() .unwrap() .input() - .as_any() .downcast_ref::() .unwrap() .children()[0] - .as_any() .downcast_ref::() .unwrap(); assert_eq!( diff --git a/ballista/scheduler/src/state/aqe/optimizer_rule/propagate_empty.rs b/ballista/scheduler/src/state/aqe/optimizer_rule/propagate_empty.rs index e888976673..de5b9e5c50 100644 --- a/ballista/scheduler/src/state/aqe/optimizer_rule/propagate_empty.rs +++ b/ballista/scheduler/src/state/aqe/optimizer_rule/propagate_empty.rs @@ -34,7 +34,7 @@ use std::sync::Arc; macro_rules! is_empty_exec { ($e:expr) => { - $e.as_any().downcast_ref::().is_some() + $e.downcast_ref::().is_some() }; } @@ -57,41 +57,41 @@ impl PropagateEmptyExecRule { fn transform( plan: Arc, ) -> datafusion::error::Result>> { - if let Some(filter) = plan.as_any().downcast_ref::() + if let Some(filter) = plan.downcast_ref::() && is_empty_exec!(filter.input()) { Ok(Transformed::yes(filter.input().clone())) - } else if let Some(coalesce) = plan.as_any().downcast_ref::() + } else if let Some(coalesce) = plan.downcast_ref::() && is_empty_exec!(coalesce.input()) { Ok(Transformed::yes(coalesce.input().clone())) - } else if let Some(exchange) = plan.as_any().downcast_ref::() + } else if let Some(exchange) = plan.downcast_ref::() && is_empty_exec!(exchange.input()) { Ok(Transformed::yes(exchange.input().clone())) - } else if let Some(projection) = plan.as_any().downcast_ref::() + } else if let Some(projection) = plan.downcast_ref::() && is_empty_exec!(projection.input()) { empty_exec!(projection) - } else if let Some(limit) = plan.as_any().downcast_ref::() + } else if let Some(limit) = plan.downcast_ref::() && is_empty_exec!(limit.input()) { Ok(Transformed::yes(limit.input().clone())) - } else if let Some(limit) = plan.as_any().downcast_ref::() + } else if let Some(limit) = plan.downcast_ref::() && is_empty_exec!(limit.input()) { Ok(Transformed::yes(limit.input().clone())) - } else if let Some(aggregation) = plan.as_any().downcast_ref::() + } else if let Some(aggregation) = plan.downcast_ref::() && is_empty_exec!(aggregation.input()) { empty_exec!(aggregation) - } else if let Some(hash_join) = plan.as_any().downcast_ref::() + } else if let Some(hash_join) = plan.downcast_ref::() // TODO: - we need other joins, this one is used for testing cancellation && hash_join.join_type == Inner && (is_empty_exec!(hash_join.left) || is_empty_exec!(hash_join.right)) { empty_exec!(hash_join) - } else if let Some(exchange) = plan.as_any().downcast_ref::() { + } else if let Some(exchange) = plan.downcast_ref::() { let stats = exchange.partition_statistics(None)?; match stats.num_rows { Precision::Exact(0) => empty_exec!(plan), diff --git a/ballista/scheduler/src/state/aqe/planner.rs b/ballista/scheduler/src/state/aqe/planner.rs index 03361dc3c5..0107a0cc6b 100644 --- a/ballista/scheduler/src/state/aqe/planner.rs +++ b/ballista/scheduler/src/state/aqe/planner.rs @@ -166,8 +166,8 @@ impl AdaptivePlanner { .as_ref() .map(|stage| { ( - stage.as_any().downcast_ref::(), - stage.as_any().downcast_ref::(), + stage.downcast_ref::(), + stage.downcast_ref::(), ) }) { Some((Some(stage), None)) => { @@ -338,7 +338,7 @@ impl AdaptivePlanner { if !runnable_stages.is_empty() { let mut runnable = Vec::new(); for exec in runnable_stages.into_iter() { - match exec.as_any().downcast_ref::() { + match exec.downcast_ref::() { Some(exchange) if exchange.inactive_stage => continue, Some(exchange) if exchange.stage_id().is_none() => { exchange.set_stage_id(self.stage_id_generator); @@ -361,9 +361,7 @@ impl AdaptivePlanner { } Ok(Some(runnable)) - } else if let Some(root) = - self.plan.as_any().downcast_ref::() - { + } else if let Some(root) = self.plan.downcast_ref::() { // shuffle writer has finished // there is no more runnable stages if root.shuffle_created() { @@ -405,8 +403,7 @@ impl AdaptivePlanner { runnable_stages .into_iter() .map(|exec| { - exec.as_any() - .downcast_ref::() + exec.downcast_ref::() .ok_or_else(|| { datafusion::common::DataFusionError::Plan( "ExchangeExec expected".into(), @@ -473,7 +470,7 @@ impl AdaptivePlanner { node: &Arc, runnable_stages: &mut Vec>, ) -> bool { - if let Some(exchange) = node.as_any().downcast_ref::() + if let Some(exchange) = node.downcast_ref::() && exchange.shuffle_created() { // we found exchange which has partitions resolved or this stage is not @@ -481,7 +478,7 @@ impl AdaptivePlanner { // all runnable children has been run false - } else if let Some(exchange) = node.as_any().downcast_ref::() + } else if let Some(exchange) = node.downcast_ref::() && !exchange.shuffle_created() { // we found exchange which has not been resolved (run) diff --git a/ballista/scheduler/src/state/aqe/test/alter_stages.rs b/ballista/scheduler/src/state/aqe/test/alter_stages.rs index 419a780696..7661eacc0e 100644 --- a/ballista/scheduler/src/state/aqe/test/alter_stages.rs +++ b/ballista/scheduler/src/state/aqe/test/alter_stages.rs @@ -37,7 +37,6 @@ use datafusion::physical_plan::test::exec::StatisticsExec; use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, PhysicalExpr, PlanProperties, }; -use std::any::Any; use std::collections::HashSet; use std::fmt::Formatter; use std::sync::Arc; @@ -210,17 +209,17 @@ async fn should_support_join_re_ordering() -> datafusion::error::Result<()> { planner.finalise_stage_internal(1, small_statistics_exchange())?; // join ordering changes as build side is bigger than probe side - // after exchange statistic updated. + // after exchange statistic updated. DataFusion 54 fuses the trailing + // ProjectionExec into the HashJoinExec via the `projection` field. assert_plan!(planner.current_plan(), @ r" AdaptiveDatafusionExec: is_final=false, plan_id=2, stage_id=pending, stage_resolved=false - ProjectionExec: expr=[big_col@1 as big_col, big_col@0 as big_col] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)], projection=[big_col@1, big_col@0] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] "); let stages = planner.runnable_stages()?.unwrap(); @@ -228,28 +227,26 @@ async fn should_support_join_re_ordering() -> datafusion::error::Result<()> { assert_plan!(planner.current_plan(), @ r" AdaptiveDatafusionExec: is_final=true, plan_id=2, stage_id=2, stage_resolved=false - ProjectionExec: expr=[big_col@1 as big_col, big_col@0 as big_col] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)], projection=[big_col@1, big_col@0] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] "); planner.finalise_stage_internal(2, small_statistics_exchange())?; assert_plan!(planner.current_plan(), @ r" AdaptiveDatafusionExec: is_final=true, plan_id=2, stage_id=2, stage_resolved=true - ProjectionExec: expr=[big_col@1 as big_col, big_col@0 as big_col] - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] - ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true - CooperativeExec - MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(big_col@0, big_col@0)], projection=[big_col@1, big_col@0] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=1, stage_id=1, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] + ExchangeExec: partitioning=Hash([big_col@0], 2), plan_id=0, stage_id=0, stage_resolved=true + CooperativeExec + MockPartitionedScan: num_partitions=2, statistics=[Rows=Exact(262144), Bytes=Exact(2097152), [(Col[0]:)]] "); Ok(()) @@ -649,10 +646,6 @@ impl ExecutionPlan for MockPartitionedScan { "MockPartitionedScan" } - fn as_any(&self) -> &dyn Any { - self - } - fn properties(&self) -> &Arc { &self.plan_properties } @@ -679,7 +672,7 @@ impl ExecutionPlan for MockPartitionedScan { fn partition_statistics( &self, _partition: Option, - ) -> datafusion::common::Result { - Ok(self.statistics.clone()) + ) -> datafusion::common::Result> { + Ok(Arc::new(self.statistics.clone())) } } diff --git a/ballista/scheduler/src/state/aqe/test/plan_to_stages.rs b/ballista/scheduler/src/state/aqe/test/plan_to_stages.rs index 9e1313d3cf..a9dc152738 100644 --- a/ballista/scheduler/src/state/aqe/test/plan_to_stages.rs +++ b/ballista/scheduler/src/state/aqe/test/plan_to_stages.rs @@ -25,8 +25,8 @@ use crate::state::aqe::test::{ use ballista_core::execution_plans::SortShuffleWriterExec; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::common::ColumnStatistics; -use datafusion::physical_plan::Statistics; use datafusion::physical_plan::test::exec::StatisticsExec; +use datafusion::physical_plan::{ExecutionPlan, Statistics}; use std::collections::HashSet; use std::sync::Arc; @@ -451,7 +451,7 @@ async fn should_use_sort_shuffle_when_enabled() -> datafusion::error::Result<()> let plan = stages.first().unwrap().plan.as_ref(); assert!( - plan.as_any() + (plan as &dyn ExecutionPlan) .downcast_ref::() .is_some(), "expected SortShuffleWriterExec when sort shuffle is enabled, got plan: {plan:?}" @@ -478,7 +478,7 @@ async fn should_use_sort_shuffle_by_default() -> datafusion::error::Result<()> { let plan = stages.first().unwrap().plan.as_ref(); assert!( - plan.as_any() + (plan as &dyn ExecutionPlan) .downcast_ref::() .is_some(), "expected SortShuffleWriterExec by default, got plan: {plan:?}" diff --git a/ballista/scheduler/src/state/distributed_explain.rs b/ballista/scheduler/src/state/distributed_explain.rs index fc31580329..b42a930889 100644 --- a/ballista/scheduler/src/state/distributed_explain.rs +++ b/ballista/scheduler/src/state/distributed_explain.rs @@ -194,7 +194,7 @@ pub(crate) async fn handle_explain_plan( plan: Arc, ) -> ballista_core::error::Result> { if let LogicalPlan::Explain(explain_plan) = &logical_plan - && let Some(explain) = plan.as_any().downcast_ref::() + && let Some(explain) = plan.downcast_ref::() { let inner_plan = explain_plan.plan.clone(); let plans = explain.stringified_plans(); diff --git a/ballista/scheduler/src/state/execution_graph.rs b/ballista/scheduler/src/state/execution_graph.rs index 8f0c30cfa5..86222476ac 100644 --- a/ballista/scheduler/src/state/execution_graph.rs +++ b/ballista/scheduler/src/state/execution_graph.rs @@ -1625,14 +1625,12 @@ impl ExecutionPlanVisitor for ExecutionStageBuilder { plan: &dyn ExecutionPlan, ) -> std::result::Result { // Handle both ShuffleWriterExec and SortShuffleWriterExec - if let Some(shuffle_write) = plan.as_any().downcast_ref::() { + if let Some(shuffle_write) = plan.downcast_ref::() { self.current_stage_id = shuffle_write.stage_id(); - } else if let Some(shuffle_write) = - plan.as_any().downcast_ref::() - { + } else if let Some(shuffle_write) = plan.downcast_ref::() { self.current_stage_id = shuffle_write.stage_id(); } else if let Some(unresolved_shuffle) = - plan.as_any().downcast_ref::() + plan.downcast_ref::() { if let Some(output_links) = self.output_links.get_mut(&unresolved_shuffle.stage_id) @@ -1702,18 +1700,14 @@ impl TaskDescription { /// Returns the number of output partitions this task will produce. pub fn get_output_partition_number(&self) -> usize { // Try ShuffleWriterExec first - if let Some(shuffle_writer) = - self.plan.as_any().downcast_ref::() - { + if let Some(shuffle_writer) = self.plan.downcast_ref::() { return shuffle_writer .shuffle_output_partitioning() .map(|partitioning| partitioning.partition_count()) .unwrap_or(1); } // Try SortShuffleWriterExec - if let Some(shuffle_writer) = - self.plan.as_any().downcast_ref::() - { + if let Some(shuffle_writer) = self.plan.downcast_ref::() { return shuffle_writer .shuffle_output_partitioning() .partition_count(); @@ -1887,15 +1881,18 @@ mod test { let mut join_graph = test_join_plan(4).await; // With the improvement of https://github.com/apache/arrow-datafusion/pull/4122, - // unnecessary RepartitionExec can be removed + // unnecessary RepartitionExec can be removed. DataFusion 54 took this + // a step further and now broadcasts one side of the join, so the + // graph has a single leaf stage (the scan of "left") with 2 tasks + // instead of the previous Y-shape with two 2-task leaves. assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 0); - // Call revive to move the two leaf Resolved stages to Running + // Call revive to move the leaf Resolved stage to Running join_graph.revive(); assert_eq!(join_graph.stage_count(), 4); - assert_eq!(join_graph.available_tasks(), 4); + assert_eq!(join_graph.available_tasks(), 2); // Complete the first stage revive_graph_and_complete_next_stage_with_executor(&mut join_graph, &executor1)?; @@ -1917,9 +1914,14 @@ mod test { let reset = join_graph.reset_stages_on_lost_executor(&executor1.id)?; - // Two stages were reset, 1 Running stage rollback to Unresolved and 1 Completed stage move to Running - assert_eq!(reset.0.len(), 2); - assert_eq!(join_graph.available_tasks(), 2); + // With the new linear plan, the running stage (stage 3) reads from + // stage 2 which was completed by executor2, so losing executor1 only + // resets the tasks that executor1 itself ran (1 completed + 1 + // in-flight on stage 3). No upstream stages are rolled back because + // their outputs are already consumed. After the reset, stage 3 has + // all 4 tasks pending again (2 reset + 2 that hadn't been popped). + assert_eq!(reset.0.len(), 1); + assert_eq!(join_graph.available_tasks(), 4); drain_tasks(&mut join_graph)?; assert!(join_graph.is_successful(), "Failed to complete join plan"); @@ -1930,36 +1932,31 @@ mod test { #[tokio::test] async fn test_reset_resolved_stage_executor_lost() -> Result<()> { let executor1 = mock_executor("executor-id1".to_string()); - let executor2 = mock_executor("executor-id2".to_string()); + let _executor2 = mock_executor("executor-id2".to_string()); let mut join_graph = test_join_plan(4).await; assert_eq!(join_graph.stage_count(), 4); assert_eq!(join_graph.available_tasks(), 0); - // Call revive to move the two leaf Resolved stages to Running + // Call revive to move the leaf Resolved stage to Running. See + // test_reset_completed_stage_executor_lost for why DataFusion 54 + // produces a single leaf instead of two. join_graph.revive(); assert_eq!(join_graph.stage_count(), 4); - assert_eq!(join_graph.available_tasks(), 4); + assert_eq!(join_graph.available_tasks(), 2); - // Complete the first stage + // Complete the first stage with executor1. Do NOT complete the second + // stage: this leaves stage 2 in Resolved state holding inputs that + // live on executor1, which is exactly the scenario this test wants + // to exercise. assert_eq!(revive_graph_and_complete_next_stage(&mut join_graph)?, 2); - // Complete the second stage - assert_eq!( - revive_graph_and_complete_next_stage_with_executor( - &mut join_graph, - &executor2 - )?, - 2 - ); - - // There are 0 tasks pending schedule now - assert_eq!(join_graph.available_tasks(), 0); - let reset = join_graph.reset_stages_on_lost_executor(&executor1.id)?; - // Two stages were reset, 1 Resolved stage rollback to Unresolved and 1 Completed stage move to Running + // Stage 2 (Resolved) rolls back to Unresolved because its input came + // from executor1, and stage 1 (Successful) is resubmitted so its + // output can be recomputed. assert_eq!(reset.0.len(), 2); assert_eq!(join_graph.available_tasks(), 2); diff --git a/ballista/scheduler/src/state/execution_graph_dot.rs b/ballista/scheduler/src/state/execution_graph_dot.rs index e1a24c4da5..9f48160ddb 100644 --- a/ballista/scheduler/src/state/execution_graph_dot.rs +++ b/ballista/scheduler/src/state/execution_graph_dot.rs @@ -148,7 +148,7 @@ fn write_plan_recursive( let node_name = format!("{prefix}_{i}"); let display_name = get_operator_name(plan); - if let Some(reader) = plan.as_any().downcast_ref::() { + if let Some(reader) = plan.downcast_ref::() { for part in &reader.partition { for loc in part { state @@ -156,7 +156,7 @@ fn write_plan_recursive( .insert(node_name.clone(), loc.partition_id.stage_id); } } - } else if let Some(reader) = plan.as_any().downcast_ref::() { + } else if let Some(reader) = plan.downcast_ref::() { state.readers.insert(node_name.clone(), reader.stage_id); } @@ -230,9 +230,9 @@ fn sanitize(str: &str, max_len: Option) -> String { } #[allow(deprecated)] fn get_operator_name(plan: &dyn ExecutionPlan) -> String { - if let Some(exec) = plan.as_any().downcast_ref::() { + if let Some(exec) = plan.downcast_ref::() { format!("Filter: {}", exec.predicate()) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { let expr = exec .expr() .iter() @@ -241,7 +241,7 @@ fn get_operator_name(plan: &dyn ExecutionPlan) -> String { .collect::>() .join(", "); format!("Projection: {}", sanitize_dot_label(&expr)) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { let sort_expr = exec .expr() .iter() @@ -257,7 +257,7 @@ fn get_operator_name(plan: &dyn ExecutionPlan) -> String { .collect::>() .join(", "); format!("Sort: {}", sanitize_dot_label(&sort_expr)) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { let group_exprs_with_alias = exec.group_expr().expr(); let group_expr = group_exprs_with_alias .iter() @@ -277,19 +277,19 @@ aggr=[{}]", sanitize_dot_label(&group_expr), sanitize_dot_label(&aggr_expr) ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!("CoalesceBatches [batchSize={}]", exec.target_batch_size()) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!( "CoalescePartitions [{}]", format_partitioning(exec.properties().output_partitioning().clone()) ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!( "RepartitionExec [{}]", format_partitioning(exec.properties().output_partitioning().clone()) ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { let join_expr = exec .on() .iter() @@ -308,49 +308,46 @@ filter_expr={}", sanitize_dot_label(&join_expr), sanitize_dot_label(&filter_expr) ) - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.downcast_ref::().is_some() { "CrossJoin".to_string() - } else if plan.as_any().downcast_ref::().is_some() { + } else if plan.downcast_ref::().is_some() { "Union".to_string() - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!("UnresolvedShuffleExec [stage_id={}]", exec.stage_id) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!("ShuffleReader [{} partitions]", exec.partition.len()) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!( "ShuffleWriter [{} partitions]", exec.input_partition_count() ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!( "SortShuffleWriter [{} partitions]", exec.input_partition_count() ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { - let config = if let Some(config) = - exec.data_source().as_any().downcast_ref::() - { - get_file_scan(config) - } else if let Some(_config) = exec - .data_source() - .as_any() - .downcast_ref::() - { - "Memory".to_string() - } else { - "Unknown".to_string() - }; + } else if let Some(exec) = plan.downcast_ref::() { + let config = + if let Some(config) = exec.data_source().downcast_ref::() { + get_file_scan(config) + } else if let Some(_config) = + exec.data_source().downcast_ref::() + { + "Memory".to_string() + } else { + "Unknown".to_string() + }; let parts = exec.properties().output_partitioning().partition_count(); format!("DataSourceExec: ({config}) [{parts} partitions]") - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!( "GlobalLimit(skip={}, fetch={:?})", exec.skip(), exec.fetch() ) - } else if let Some(exec) = plan.as_any().downcast_ref::() { + } else if let Some(exec) = plan.downcast_ref::() { format!("LocalLimit({})", exec.fetch()) } else { debug!("Unknown physical operator when producing DOT graph: {plan:?}"); @@ -426,53 +423,43 @@ mod tests { let dot = ExecutionGraphDot::generate(&graph) .map_err(|e| BallistaError::Internal(format!("{e:?}")))?; + // DataFusion 54's physical planner picks a more efficient join plan + // for this query: `baz` is scanned inside the join stage instead of + // being given its own shuffle stage. That collapses the previous + // 5-stage plan into 3 stages. let expected = r#"digraph G { subgraph cluster0 { label = "Stage 1 [Resolved]"; - stage_1_0 [shape=box, label="SortShuffleWriter [2 partitions]"] + stage_1_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_1_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] stage_1_0_0 -> stage_1_0 } subgraph cluster1 { label = "Stage 2 [Resolved]"; - stage_2_0 [shape=box, label="SortShuffleWriter [2 partitions]"] + stage_2_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_2_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] stage_2_0_0 -> stage_2_0 } subgraph cluster2 { label = "Stage 3 [Unresolved]"; - stage_3_0 [shape=box, label="SortShuffleWriter [48 partitions]"] + stage_3_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_3_0_0 [shape=box, label="HashJoin -join_expr=a@0 = a@0 +join_expr=b@1 = b@3 filter_expr="] stage_3_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] stage_3_0_0_0 -> stage_3_0_0 - stage_3_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1 [shape=box, label="HashJoin +join_expr=a@0 = a@0 +filter_expr="] + stage_3_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1_0 -> stage_3_0_0_1 + stage_3_0_0_1_1 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] + stage_3_0_0_1_1 -> stage_3_0_0_1 stage_3_0_0_1 -> stage_3_0_0 stage_3_0_0 -> stage_3_0 } - subgraph cluster3 { - label = "Stage 4 [Resolved]"; - stage_4_0 [shape=box, label="SortShuffleWriter [2 partitions]"] - stage_4_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] - stage_4_0_0 -> stage_4_0 - } - subgraph cluster4 { - label = "Stage 5 [Unresolved]"; - stage_5_0 [shape=box, label="ShuffleWriter [48 partitions]"] - stage_5_0_0 [shape=box, label="HashJoin -join_expr=b@3 = b@1 -filter_expr="] - stage_5_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] - stage_5_0_0_0 -> stage_5_0_0 - stage_5_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=4]"] - stage_5_0_0_1 -> stage_5_0_0 - stage_5_0_0 -> stage_5_0 - } stage_1_0 -> stage_3_0_0_0 - stage_2_0 -> stage_3_0_0_1 - stage_3_0 -> stage_5_0_0_0 - stage_4_0 -> stage_5_0_0_1 + stage_2_0 -> stage_3_0_0_1_0 } "#; assert_eq!(expected, &dot); @@ -486,13 +473,19 @@ filter_expr="] .map_err(|e| BallistaError::Internal(format!("{e:?}")))?; let expected = r#"digraph G { - stage_3_0 [shape=box, label="SortShuffleWriter [48 partitions]"] + stage_3_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_3_0_0 [shape=box, label="HashJoin -join_expr=a@0 = a@0 +join_expr=b@1 = b@3 filter_expr="] stage_3_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] stage_3_0_0_0 -> stage_3_0_0 - stage_3_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1 [shape=box, label="HashJoin +join_expr=a@0 = a@0 +filter_expr="] + stage_3_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1_0 -> stage_3_0_0_1 + stage_3_0_0_1_1 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] + stage_3_0_0_1_1 -> stage_3_0_0_1 stage_3_0_0_1 -> stage_3_0_0 stage_3_0_0 -> stage_3_0 } @@ -507,46 +500,43 @@ filter_expr="] let dot = ExecutionGraphDot::generate(&graph) .map_err(|e| BallistaError::Internal(format!("{e:?}")))?; + // DataFusion 54 collapses the join graph for this query into a single + // distributed stage that absorbs the third scan as the inner side of a + // broadcast hash join. The previous 4-stage shape was a planning + // artifact, not a Ballista requirement. let expected = r#"digraph G { subgraph cluster0 { label = "Stage 1 [Resolved]"; - stage_1_0 [shape=box, label="SortShuffleWriter [2 partitions]"] + stage_1_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_1_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] stage_1_0_0 -> stage_1_0 } subgraph cluster1 { label = "Stage 2 [Resolved]"; - stage_2_0 [shape=box, label="SortShuffleWriter [2 partitions]"] + stage_2_0 [shape=box, label="ShuffleWriter [2 partitions]"] stage_2_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] stage_2_0_0 -> stage_2_0 } subgraph cluster2 { - label = "Stage 3 [Resolved]"; - stage_3_0 [shape=box, label="SortShuffleWriter [2 partitions]"] - stage_3_0_0 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] - stage_3_0_0 -> stage_3_0 - } - subgraph cluster3 { - label = "Stage 4 [Unresolved]"; - stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"] - stage_4_0_0 [shape=box, label="HashJoin -join_expr=a@1 = a@0 + label = "Stage 3 [Unresolved]"; + stage_3_0 [shape=box, label="ShuffleWriter [2 partitions]"] + stage_3_0_0 [shape=box, label="HashJoin +join_expr=a@0 = a@1 filter_expr="] - stage_4_0_0_0 [shape=box, label="HashJoin + stage_3_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] + stage_3_0_0_0 -> stage_3_0_0 + stage_3_0_0_1 [shape=box, label="HashJoin join_expr=a@0 = a@0 filter_expr="] - stage_4_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] - stage_4_0_0_0_0 -> stage_4_0_0_0 - stage_4_0_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] - stage_4_0_0_0_1 -> stage_4_0_0_0 - stage_4_0_0_0 -> stage_4_0_0 - stage_4_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] - stage_4_0_0_1 -> stage_4_0_0 - stage_4_0_0 -> stage_4_0 + stage_3_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1_0 -> stage_3_0_0_1 + stage_3_0_0_1_1 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] + stage_3_0_0_1_1 -> stage_3_0_0_1 + stage_3_0_0_1 -> stage_3_0_0 + stage_3_0_0 -> stage_3_0 } - stage_1_0 -> stage_4_0_0_0_0 - stage_2_0 -> stage_4_0_0_0_1 - stage_3_0 -> stage_4_0_0_1 + stage_1_0 -> stage_3_0_0_0 + stage_2_0 -> stage_3_0_0_1_0 } "#; assert_eq!(expected, &dot); @@ -556,25 +546,25 @@ filter_expr="] #[tokio::test] async fn query_stage_optimized() -> Result<()> { let graph = test_graph_optimized().await?; - let dot = ExecutionGraphDot::generate_for_query_stage(&graph, 4) + let dot = ExecutionGraphDot::generate_for_query_stage(&graph, 3) .map_err(|e| BallistaError::Internal(format!("{e:?}")))?; let expected = r#"digraph G { - stage_4_0 [shape=box, label="ShuffleWriter [48 partitions]"] - stage_4_0_0 [shape=box, label="HashJoin -join_expr=a@1 = a@0 + stage_3_0 [shape=box, label="ShuffleWriter [2 partitions]"] + stage_3_0_0 [shape=box, label="HashJoin +join_expr=a@0 = a@1 filter_expr="] - stage_4_0_0_0 [shape=box, label="HashJoin + stage_3_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] + stage_3_0_0_0 -> stage_3_0_0 + stage_3_0_0_1 [shape=box, label="HashJoin join_expr=a@0 = a@0 filter_expr="] - stage_4_0_0_0_0 [shape=box, label="UnresolvedShuffleExec [stage_id=1]"] - stage_4_0_0_0_0 -> stage_4_0_0_0 - stage_4_0_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] - stage_4_0_0_0_1 -> stage_4_0_0_0 - stage_4_0_0_0 -> stage_4_0_0 - stage_4_0_0_1 [shape=box, label="UnresolvedShuffleExec [stage_id=3]"] - stage_4_0_0_1 -> stage_4_0_0 - stage_4_0_0 -> stage_4_0 + stage_3_0_0_1_0 [shape=box, label="UnresolvedShuffleExec [stage_id=2]"] + stage_3_0_0_1_0 -> stage_3_0_0_1 + stage_3_0_0_1_1 [shape=box, label="DataSourceExec: (Memory) [2 partitions]"] + stage_3_0_0_1_1 -> stage_3_0_0_1 + stage_3_0_0_1 -> stage_3_0_0 + stage_3_0_0 -> stage_3_0 } "#; assert_eq!(expected, &dot); diff --git a/ballista/scheduler/src/state/execution_stage.rs b/ballista/scheduler/src/state/execution_stage.rs index bb702c2898..ea2c6b6c07 100644 --- a/ballista/scheduler/src/state/execution_stage.rs +++ b/ballista/scheduler/src/state/execution_stage.rs @@ -1005,11 +1005,11 @@ impl Debug for FailedStage { /// will be different. Here, we should use the input partition count. fn get_stage_partitions(plan: Arc) -> usize { // Try ShuffleWriterExec first - if let Some(shuffle_writer) = plan.as_any().downcast_ref::() { + if let Some(shuffle_writer) = plan.downcast_ref::() { return shuffle_writer.input_partition_count(); } // Try SortShuffleWriterExec - if let Some(shuffle_writer) = plan.as_any().downcast_ref::() { + if let Some(shuffle_writer) = plan.downcast_ref::() { return shuffle_writer.input_partition_count(); } // Fallback to output partitioning diff --git a/ballista/scheduler/src/test_utils.rs b/ballista/scheduler/src/test_utils.rs index 93ca29d752..2e900f4d71 100644 --- a/ballista/scheduler/src/test_utils.rs +++ b/ballista/scheduler/src/test_utils.rs @@ -19,7 +19,6 @@ use ballista_core::JobStatusSubscriber; use ballista_core::error::{BallistaError, Result}; use ballista_core::extension::SessionConfigExt; use datafusion::catalog::Session; -use std::any::Any; use std::collections::HashMap; use std::future::Future; use std::sync::Arc; @@ -82,10 +81,6 @@ pub struct ExplodingTableProvider; #[async_trait] impl TableProvider for ExplodingTableProvider { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { Arc::new(Schema::empty()) }