Skip to content

Commit 9346491

Browse files
author
Peng Ren
committed
Fix sql parser bugs
1 parent 4af6f16 commit 9346491

File tree

3 files changed

+247
-6
lines changed

3 files changed

+247
-6
lines changed

pymongosql/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from .connection import Connection
88

9-
__version__: str = "0.4.5"
9+
__version__: str = "0.4.6"
1010

1111
# Globals https://www.python.org/dev/peps/pep-0249/#globals
1212
apilevel: str = "2.0"

pymongosql/sql/handler.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
"NOT IN": "$nin",
2727
}
2828

29+
# Pattern to count all comparison operations in ANTLR getText() output (no spaces).
30+
# Includes standard operators and SQL keywords (IN, LIKE, BETWEEN, IS [NOT] NULL).
31+
_COMPARISON_COUNT_PATTERN = re.compile(
32+
r">=|<=|!=|<>|=|<|>|IN\(|LIKE['\"]|BETWEEN|ISNOTNULL|ISNULL",
33+
re.IGNORECASE,
34+
)
35+
2936

3037
class ContextUtilsMixin:
3138
"""Mixin providing common context utility methods"""
@@ -194,8 +201,10 @@ def can_handle(self, ctx: Any) -> bool:
194201
text = self.get_context_text(ctx)
195202
text_upper = text.upper()
196203

197-
# Count comparison operators
198-
comparison_count = sum(1 for op in COMPARISON_OPERATORS if op in text)
204+
# Count comparison operator *occurrences* (not distinct types) so that
205+
# "B=trueANDA=1" is correctly seen as having 2 comparisons even though
206+
# both use the same "=" operator. Also counts IN(, LIKE, BETWEEN, etc.
207+
comparison_count = len(_COMPARISON_COUNT_PATTERN.findall(text))
199208

200209
# If there are multiple comparisons and logical operators, it's a logical expression
201210
has_logical_ops = any(op in text_upper for op in LOGICAL_OPERATORS)
@@ -550,8 +559,23 @@ def _find_operator_positions(self, text: str, operator: str) -> List[int]:
550559
and i + len(operator) < len(text)
551560
and text[i + len(operator)].isalpha()
552561
):
553-
i += len(operator)
554-
continue
562+
# ANTLR getText() concatenates tokens without spaces, so
563+
# "B=true AND A=1" becomes "B=trueANDA=1". We must still
564+
# recognise AND/OR here when the preceding text ends with a
565+
# SQL literal keyword (true/false/null) that is itself
566+
# preceded by a comparison operator (=, <, >, !, etc.).
567+
# This avoids false positives like "category" containing "OR".
568+
before_upper = text[:i].upper()
569+
is_value_boundary = False
570+
for keyword in ("TRUE", "FALSE", "NULL"):
571+
if before_upper.endswith(keyword):
572+
prefix = text[: i - len(keyword)]
573+
if prefix and prefix[-1] in ("=", "<", ">", "!"):
574+
is_value_boundary = True
575+
break
576+
if not is_value_boundary:
577+
i += len(operator)
578+
continue
555579

556580
# Check parentheses and quote depth
557581
if self._is_at_valid_split_position(text, i):
@@ -583,7 +607,7 @@ def _has_logical_operators(self, ctx: Any) -> bool:
583607
# Count comparison operator occurrences, not just distinct operator types
584608
# so that "a = 1 OR b = 2" counts as 2 comparisons and is treated
585609
# as a logical expression instead of a single comparison.
586-
comparison_count = len(re.findall(r"(>=|<=|!=|<>|=|<|>)", text))
610+
comparison_count = len(_COMPARISON_COUNT_PATTERN.findall(text))
587611
has_logical_ops = any(op in text for op in ["AND", "OR"])
588612
return has_logical_ops and comparison_count >= 2
589613
except Exception:
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# -*- coding: utf-8 -*-
2+
import datetime
3+
4+
from pymongosql.sql.parser import SQLParser
5+
6+
7+
class TestWhereClauseFieldOrdering:
8+
"""Test that WHERE clause conditions produce correct filters regardless of field order.
9+
10+
Regression tests for a bug where boolean values (true/false/null) adjacent to
11+
AND/OR operators in ANTLR getText() output caused the logical operator to be
12+
missed, e.g. "B=trueANDA=1" was not split on AND because the word boundary
13+
check treated "trueAND" as a single word.
14+
"""
15+
16+
# --- Boolean + Numeric (various orderings) ---
17+
18+
def test_bool_first_3_conditions(self):
19+
sql = "SELECT * FROM col WHERE active=true AND age=30 AND score=100"
20+
plan = SQLParser(sql).get_execution_plan()
21+
assert plan.filter_stage == {"$and": [{"active": True}, {"age": 30}, {"score": 100}]}
22+
23+
def test_bool_middle_3_conditions(self):
24+
sql = "SELECT * FROM col WHERE age=30 AND active=true AND score=100"
25+
plan = SQLParser(sql).get_execution_plan()
26+
assert plan.filter_stage == {"$and": [{"age": 30}, {"active": True}, {"score": 100}]}
27+
28+
def test_bool_last_3_conditions(self):
29+
sql = "SELECT * FROM col WHERE age=30 AND score=100 AND active=true"
30+
plan = SQLParser(sql).get_execution_plan()
31+
assert plan.filter_stage == {"$and": [{"age": 30}, {"score": 100}, {"active": True}]}
32+
33+
# --- Boolean + String ---
34+
35+
def test_bool_and_string_bool_first(self):
36+
sql = "SELECT * FROM col WHERE active=true AND name='John'"
37+
plan = SQLParser(sql).get_execution_plan()
38+
assert plan.filter_stage == {"$and": [{"active": True}, {"name": "John"}]}
39+
40+
def test_bool_and_string_string_first(self):
41+
sql = "SELECT * FROM col WHERE name='John' AND active=true"
42+
plan = SQLParser(sql).get_execution_plan()
43+
assert plan.filter_stage == {"$and": [{"name": "John"}, {"active": True}]}
44+
45+
def test_false_and_string(self):
46+
sql = "SELECT * FROM col WHERE deleted=false AND status='pending'"
47+
plan = SQLParser(sql).get_execution_plan()
48+
assert plan.filter_stage == {"$and": [{"deleted": False}, {"status": "pending"}]}
49+
50+
# --- Boolean + String + Numeric (4 conditions) ---
51+
52+
def test_4_mixed_types(self):
53+
sql = "SELECT * FROM col WHERE active=true AND name='Alice' AND age=25 AND score>90"
54+
plan = SQLParser(sql).get_execution_plan()
55+
assert plan.filter_stage == {"$and": [{"active": True}, {"name": "Alice"}, {"age": 25}, {"score": {"$gt": 90}}]}
56+
57+
def test_4_mixed_types_bool_last(self):
58+
sql = "SELECT * FROM col WHERE name='Alice' AND age=25 AND score>90 AND active=true"
59+
plan = SQLParser(sql).get_execution_plan()
60+
assert plan.filter_stage == {"$and": [{"name": "Alice"}, {"age": 25}, {"score": {"$gt": 90}}, {"active": True}]}
61+
62+
# --- Datetime with value function ---
63+
64+
def test_bool_and_datetime_func(self):
65+
sql = "SELECT * FROM col WHERE active=true AND created_at>str_to_datetime('2024-01-01','%Y-%m-%d')"
66+
plan = SQLParser(sql).get_execution_plan()
67+
expected_dt = datetime.datetime(2024, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
68+
assert plan.filter_stage == {"$and": [{"active": True}, {"created_at": {"$gt": expected_dt}}]}
69+
70+
def test_datetime_func_and_bool(self):
71+
sql = "SELECT * FROM col WHERE created_at>str_to_datetime('2024-01-01','%Y-%m-%d') AND active=true"
72+
plan = SQLParser(sql).get_execution_plan()
73+
expected_dt = datetime.datetime(2024, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
74+
assert plan.filter_stage == {"$and": [{"created_at": {"$gt": expected_dt}}, {"active": True}]}
75+
76+
# --- Bracketed / parenthesized groups ---
77+
78+
def test_brackets_bool_and_num_or_string(self):
79+
sql = "SELECT * FROM col WHERE (active=true AND age>25) OR status='admin'"
80+
plan = SQLParser(sql).get_execution_plan()
81+
assert plan.filter_stage == {"$or": [{"$and": [{"active": True}, {"age": {"$gt": 25}}]}, {"status": "admin"}]}
82+
83+
def test_brackets_string_or_bool_and_num(self):
84+
sql = "SELECT * FROM col WHERE status='admin' OR (active=true AND age>25)"
85+
plan = SQLParser(sql).get_execution_plan()
86+
assert plan.filter_stage == {"$or": [{"status": "admin"}, {"$and": [{"active": True}, {"age": {"$gt": 25}}]}]}
87+
88+
def test_brackets_bool_and_string_and_num(self):
89+
sql = "SELECT * FROM col WHERE (active=true AND name='John') AND age>30"
90+
plan = SQLParser(sql).get_execution_plan()
91+
assert plan.filter_stage == {"$and": [{"$and": [{"active": True}, {"name": "John"}]}, {"age": {"$gt": 30}}]}
92+
93+
def test_nested_brackets(self):
94+
sql = "SELECT * FROM col WHERE ((active=true AND age>25) OR status='admin') AND score>50"
95+
plan = SQLParser(sql).get_execution_plan()
96+
assert plan.filter_stage == {
97+
"$and": [
98+
{"$or": [{"$and": [{"active": True}, {"age": {"$gt": 25}}]}, {"status": "admin"}]},
99+
{"score": {"$gt": 50}},
100+
]
101+
}
102+
103+
# --- 5+ conditions ---
104+
105+
def test_5_conditions_all_and(self):
106+
sql = "SELECT * FROM col WHERE active=true AND name='Bob' AND age>20 AND score<100 AND tier=3"
107+
plan = SQLParser(sql).get_execution_plan()
108+
assert plan.filter_stage == {
109+
"$and": [
110+
{"active": True},
111+
{"name": "Bob"},
112+
{"age": {"$gt": 20}},
113+
{"score": {"$lt": 100}},
114+
{"tier": 3},
115+
]
116+
}
117+
118+
def test_5_conditions_bool_scattered(self):
119+
sql = "SELECT * FROM col WHERE name='Eve' AND active=true AND age=28 AND deleted=false AND score>=50"
120+
plan = SQLParser(sql).get_execution_plan()
121+
assert plan.filter_stage == {
122+
"$and": [
123+
{"name": "Eve"},
124+
{"active": True},
125+
{"age": 28},
126+
{"deleted": False},
127+
{"score": {"$gte": 50}},
128+
]
129+
}
130+
131+
# --- OR with multiple booleans ---
132+
133+
def test_or_with_3_bool_conditions(self):
134+
sql = "SELECT * FROM col WHERE active=true OR deleted=false OR verified=true"
135+
plan = SQLParser(sql).get_execution_plan()
136+
assert plan.filter_stage == {"$or": [{"active": True}, {"deleted": False}, {"verified": True}]}
137+
138+
# --- Complex mixed AND/OR with brackets ---
139+
140+
def test_brackets_two_and_groups_with_or(self):
141+
sql = "SELECT * FROM col WHERE (active=true AND age>25) OR (deleted=false AND status='archived')"
142+
plan = SQLParser(sql).get_execution_plan()
143+
assert plan.filter_stage == {
144+
"$or": [
145+
{"$and": [{"active": True}, {"age": {"$gt": 25}}]},
146+
{"$and": [{"deleted": False}, {"status": "archived"}]},
147+
]
148+
}
149+
150+
def test_bool_and_bracketed_or(self):
151+
sql = "SELECT * FROM col WHERE active=true AND (name='John' OR age>30)"
152+
plan = SQLParser(sql).get_execution_plan()
153+
assert plan.filter_stage == {"$and": [{"active": True}, {"$or": [{"name": "John"}, {"age": {"$gt": 30}}]}]}
154+
155+
# --- Comparison operators with booleans ---
156+
157+
def test_bool_not_equal_and_comparison(self):
158+
sql = "SELECT * FROM col WHERE active!=false AND age>25"
159+
plan = SQLParser(sql).get_execution_plan()
160+
assert plan.filter_stage == {"$and": [{"active": {"$ne": False}}, {"age": {"$gt": 25}}]}
161+
162+
# --- null mixed with bool ---
163+
164+
def test_null_bool_and_string(self):
165+
sql = "SELECT * FROM col WHERE deleted_at=null AND active=true AND name='test'"
166+
plan = SQLParser(sql).get_execution_plan()
167+
assert plan.filter_stage == {"$and": [{"deleted_at": None}, {"active": True}, {"name": "test"}]}
168+
169+
# --- LIKE with bool ---
170+
171+
def test_like_and_bool(self):
172+
sql = "SELECT * FROM col WHERE name LIKE '%john%' AND active=true"
173+
plan = SQLParser(sql).get_execution_plan()
174+
f = plan.filter_stage
175+
assert "$and" in f
176+
assert {"active": True} in f["$and"]
177+
assert any("name" in cond and "$regex" in cond.get("name", {}) for cond in f["$and"])
178+
179+
def test_bool_and_like(self):
180+
sql = "SELECT * FROM col WHERE active=true AND name LIKE '%john%'"
181+
plan = SQLParser(sql).get_execution_plan()
182+
f = plan.filter_stage
183+
assert "$and" in f
184+
assert {"active": True} in f["$and"]
185+
assert any("name" in cond and "$regex" in cond.get("name", {}) for cond in f["$and"])
186+
187+
# --- IN with bool ---
188+
189+
def test_in_and_bool(self):
190+
sql = "SELECT * FROM col WHERE status IN ('a','b','c') AND active=true"
191+
plan = SQLParser(sql).get_execution_plan()
192+
f = plan.filter_stage
193+
assert "$and" in f
194+
assert {"active": True} in f["$and"]
195+
assert any("status" in cond and "$in" in cond.get("status", {}) for cond in f["$and"])
196+
197+
def test_bool_and_in(self):
198+
sql = "SELECT * FROM col WHERE active=true AND status IN ('a','b','c')"
199+
plan = SQLParser(sql).get_execution_plan()
200+
f = plan.filter_stage
201+
assert "$and" in f
202+
assert {"active": True} in f["$and"]
203+
assert any("status" in cond and "$in" in cond.get("status", {}) for cond in f["$and"])
204+
205+
# --- 6 conditions with brackets ---
206+
207+
def test_6_conditions_with_brackets(self):
208+
sql = (
209+
"SELECT * FROM col WHERE (active=true AND deleted=false) "
210+
"AND (age>20 AND age<60) AND (name='X' OR name='Y')"
211+
)
212+
plan = SQLParser(sql).get_execution_plan()
213+
f = plan.filter_stage
214+
flat = str(f)
215+
for key in ["active", "deleted", "age", "name"]:
216+
assert key in flat, f"Missing expected key '{key}' in filter: {f}"
217+
assert "$or" in flat

0 commit comments

Comments
 (0)