Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions be/src/exec/common/hash_table/join_hash_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,8 @@ class JoinHashTable {
/// If the probe key is null
if constexpr (has_null_map) {
if (null_map[probe_idx]) {
probe_idx++;
build_idx = 0;
picking_null_keys = false;
break;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still does not preserve the NULL probe row correctly when the build side contains NULL keys. For null-aware joins with other conjuncts, HashJoinBuild::run() sets keep_null_key = true, so build_idx_map[probe_idx] can be first[bucket_size] for a NULL probe. After this change the loop breaks with that nonzero build_idx, then line 492 applies probe_idx -= (build_idx != 0), returning the previous probe row instead of the NULL row. The caller therefore misses _process_probe_null_key(probe_idx) for this row, or resumes probing the previous row against the NULL bucket. A repro is the new nullable test shape without filtering NULLs out of the subquery build side, e.g. inserting an inner (1, NULL) and removing i.a IS NOT NULL. Please clear build_idx before breaking on a NULL probe, or otherwise skip the continuation rewind for this case, and add coverage with build-side NULL keys.

}
}
Expand Down Expand Up @@ -512,4 +513,4 @@ class JoinHashTable {

template <typename Key, typename Hash, bool DirectMapping>
using JoinHashMap = JoinHashTable<Key, Hash, DirectMapping>;
} // namespace doris
} // namespace doris
12 changes: 12 additions & 0 deletions regression-test/data/correctness/test_subquery_in_disjunction.out
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@
1 2 3
10 20 30

-- !not_in_nullable_mark_join --
1 0
11 \N

-- !not_in_nullable_mark_join_in_disjunction --
1 0
11 \N

-- !not_in_nullable_mark_join_in_disjunction_build_null --
1 0
11 \N

-- !mark_join_with_other_conjuncts1 --
1 2
1 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,21 @@ suite("test_subquery_in_disjunction") {
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
"""

// TODO: enable this after DORIS-7051 and DORIS-7052 is fixed
// qt_hash_join_with_other_conjuncts5 """
// SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
// """
qt_hash_join_with_other_conjuncts5 """
SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
"""

// qt_hash_join_with_other_conjuncts6 """
// SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
// """
qt_hash_join_with_other_conjuncts6 """
SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
"""

// qt_hash_join_with_other_conjuncts7 """
// SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
// """
qt_hash_join_with_other_conjuncts7 """
SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
"""

// qt_hash_join_with_other_conjuncts8 """
// SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
// """
qt_hash_join_with_other_conjuncts8 """
SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
"""

qt_same_subquery_in_conjuncts """
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
Expand All @@ -122,6 +121,62 @@ suite("test_subquery_in_disjunction") {
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 IN (SELECT c2 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
"""

sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_outer """
sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_inner """
sql """
CREATE TABLE `test_sq_dj_nullable_outer` (
`id` int(11) NULL,
`a` int(11) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
"""
sql """
CREATE TABLE `test_sq_dj_nullable_inner` (
`id` int(11) NULL,
`a` int(11) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
"""
sql """ INSERT INTO test_sq_dj_nullable_outer VALUES (1, 0), (11, NULL) """
sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (1, 10) """

order_qt_not_in_nullable_mark_join """
SELECT id, a FROM test_sq_dj_nullable_outer o
WHERE o.a NOT IN (
SELECT i.a FROM test_sq_dj_nullable_inner i
WHERE i.id > o.id AND i.a IS NOT NULL
)
ORDER BY id;
"""

order_qt_not_in_nullable_mark_join_in_disjunction """
SELECT id, a FROM test_sq_dj_nullable_outer o
WHERE o.a NOT IN (
SELECT i.a FROM test_sq_dj_nullable_inner i
WHERE i.id > o.id AND i.a IS NOT NULL
) OR o.id IN (1, 11)
ORDER BY id;
"""

sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (20, NULL) """

order_qt_not_in_nullable_mark_join_in_disjunction_build_null """
SELECT id, a FROM test_sq_dj_nullable_outer o
WHERE o.a NOT IN (
SELECT i.a FROM test_sq_dj_nullable_inner i
WHERE i.id > o.id
) OR o.id IN (1, 11)
ORDER BY id;
"""

// test mark join that one probe row matches multiple build rows
sql """drop table if exists sub_query_correlated_subquery1;"""
sql """create table if not exists sub_query_correlated_subquery1
Expand Down
Loading