From 81394a28673b5ef7f567b347ff4265c2b9607cda Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Thu, 28 May 2026 00:06:00 +0800 Subject: [PATCH] [fix](be) Preserve null probe rows in mark anti join ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Correlated NOT IN subqueries under disjunction can be rewritten to a mark null-aware left anti join with additional join conjuncts. When the probe join key is NULL, the hash table lookup advanced the probe index before the caller could run the null-probe handling path. As a result, the probe row was skipped before the mark column was evaluated by the outer disjunction, producing incomplete query results. This change keeps the probe index on the NULL row so the null-aware join path can emit the correct mark value. ### Release note Fix incorrect results for correlated NOT IN subqueries combined with disjunctions. ### Check List (For Author) - Test: - Regression test: `doris-local-regression.sh --network 10.26.20.3/24 run -d correctness -s test_subquery_in_disjunction -forceGenOut` - Regression test: `doris-local-regression.sh --network 10.26.20.3/24 run -d correctness -s test_subquery_in_disjunction` - Manual test: verified the NOT IN + OR reproducer before and after the fix on a local FE/BE cluster - Build: `./build.sh --be` - Behavior changed: Yes. Corrects query result semantics for affected null-aware mark anti joins. - Does this need documentation: No --- .../exec/common/hash_table/join_hash_table.h | 5 +- .../test_subquery_in_disjunction.out | 12 +++ .../test_subquery_in_disjunction.groovy | 81 ++++++++++++++++--- 3 files changed, 83 insertions(+), 15 deletions(-) diff --git a/be/src/exec/common/hash_table/join_hash_table.h b/be/src/exec/common/hash_table/join_hash_table.h index 9b63040330bdf2..bc737352f41e90 100644 --- a/be/src/exec/common/hash_table/join_hash_table.h +++ b/be/src/exec/common/hash_table/join_hash_table.h @@ -480,7 +480,8 @@ class JoinHashTable { /// If the probe key is null if constexpr (has_null_map) { if (null_map[probe_idx]) { - probe_idx++; + build_idx = 0; + picking_null_keys = false; break; } } @@ -512,4 +513,4 @@ class JoinHashTable { template using JoinHashMap = JoinHashTable; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/regression-test/data/correctness/test_subquery_in_disjunction.out b/regression-test/data/correctness/test_subquery_in_disjunction.out index 65dbd980472118..7b253ee410ddb4 100644 --- a/regression-test/data/correctness/test_subquery_in_disjunction.out +++ b/regression-test/data/correctness/test_subquery_in_disjunction.out @@ -66,6 +66,18 @@ 1 2 3 10 20 30 +-- !not_in_nullable_mark_join -- +1 0 +11 \N + +-- !not_in_nullable_mark_join_in_disjunction -- +1 0 +11 \N + +-- !not_in_nullable_mark_join_in_disjunction_build_null -- +1 0 +11 \N + -- !mark_join_with_other_conjuncts1 -- 1 2 1 3 diff --git a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy index 2decf0583f2800..384103d2c1415c 100644 --- a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy +++ b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy @@ -97,22 +97,21 @@ suite("test_subquery_in_disjunction") { SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1; """ - // TODO: enable this after DORIS-7051 and DORIS-7052 is fixed - // qt_hash_join_with_other_conjuncts5 """ - // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1; - // """ + qt_hash_join_with_other_conjuncts5 """ + SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1; + """ - // qt_hash_join_with_other_conjuncts6 """ - // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1; - // """ + qt_hash_join_with_other_conjuncts6 """ + SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1; + """ - // qt_hash_join_with_other_conjuncts7 """ - // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1; - // """ + qt_hash_join_with_other_conjuncts7 """ + SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1; + """ - // qt_hash_join_with_other_conjuncts8 """ - // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1; - // """ + qt_hash_join_with_other_conjuncts8 """ + SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1; + """ qt_same_subquery_in_conjuncts """ SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1; @@ -122,6 +121,62 @@ suite("test_subquery_in_disjunction") { SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 IN (SELECT c2 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1; """ + sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_outer """ + sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_inner """ + sql """ + CREATE TABLE `test_sq_dj_nullable_outer` ( + `id` int(11) NULL, + `a` int(11) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + CREATE TABLE `test_sq_dj_nullable_inner` ( + `id` int(11) NULL, + `a` int(11) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ INSERT INTO test_sq_dj_nullable_outer VALUES (1, 0), (11, NULL) """ + sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (1, 10) """ + + order_qt_not_in_nullable_mark_join """ + SELECT id, a FROM test_sq_dj_nullable_outer o + WHERE o.a NOT IN ( + SELECT i.a FROM test_sq_dj_nullable_inner i + WHERE i.id > o.id AND i.a IS NOT NULL + ) + ORDER BY id; + """ + + order_qt_not_in_nullable_mark_join_in_disjunction """ + SELECT id, a FROM test_sq_dj_nullable_outer o + WHERE o.a NOT IN ( + SELECT i.a FROM test_sq_dj_nullable_inner i + WHERE i.id > o.id AND i.a IS NOT NULL + ) OR o.id IN (1, 11) + ORDER BY id; + """ + + sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (20, NULL) """ + + order_qt_not_in_nullable_mark_join_in_disjunction_build_null """ + SELECT id, a FROM test_sq_dj_nullable_outer o + WHERE o.a NOT IN ( + SELECT i.a FROM test_sq_dj_nullable_inner i + WHERE i.id > o.id + ) OR o.id IN (1, 11) + ORDER BY id; + """ + // test mark join that one probe row matches multiple build rows sql """drop table if exists sub_query_correlated_subquery1;""" sql """create table if not exists sub_query_correlated_subquery1