Skip to content

Commit 119d626

Browse files
author
Peng Ren
committed
Ensure result set returning the correct data type to superset
1 parent bec839d commit 119d626

9 files changed

Lines changed: 584 additions & 97 deletions

File tree

pymongosql/cursor.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,17 @@ def execute(self: _T, operation: str, parameters: Optional[Any] = None) -> _T:
106106
# For SELECT/QUERY operations, use the execution plan directly
107107
if isinstance(self._current_execution_plan, QueryExecutionPlan):
108108
execution_plan_for_rs = self._current_execution_plan
109-
self._result_set = self._result_set_class(
109+
110+
# Use PreProcessedResultSet for rows from intermediate storage (SQLite)
111+
# These rows are already formatted and don't need projection processing
112+
if getattr(execution_plan_for_rs, "from_intermediate_storage", False):
113+
from .result_set import PreProcessedResultSet
114+
115+
result_set_class = PreProcessedResultSet
116+
else:
117+
result_set_class = self._result_set_class
118+
119+
self._result_set = result_set_class(
110120
command_result=result,
111121
execution_plan=execution_plan_for_rs,
112122
database=self.connection.database,

pymongosql/result_set.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,5 +426,26 @@ def _format_result(self, doc: Dict[str, Any]) -> Dict[str, Any]:
426426
return doc
427427

428428

429+
class PreProcessedResultSet(ResultSet):
430+
"""Result set for pre-formatted rows that don't need projection processing.
431+
432+
Used when rows are already projected and formatted (e.g., from SQLite intermediate storage).
433+
Skips the document projection step and goes directly to formatting as tuples.
434+
"""
435+
436+
def _process_and_cache_batch(self, batch: List[Dict[str, Any]]) -> None:
437+
"""Process and cache a batch of pre-processed documents.
438+
439+
Unlike the base ResultSet, this skips projection processing and directly formats results.
440+
"""
441+
if not batch:
442+
return
443+
# Skip projection processing - rows are already in final form
444+
# Just format to output format (tuple)
445+
formatted_batch = [self._format_result(doc) for doc in batch]
446+
self._cached_results.extend(formatted_batch)
447+
self._total_fetched += len(batch)
448+
449+
429450
# For backward compatibility
430451
MongoResultSet = ResultSet

pymongosql/sql/projection_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def convert_value(self, value: Any, format_param: Optional[str] = None) -> Any:
9292
return value
9393

9494
def get_type_code(self) -> str:
95-
return "date"
95+
return "datetime"
9696

9797

9898
class TimestampFunction(ProjectionFunction):
@@ -171,7 +171,7 @@ def convert_value(self, value: Any, format_param: Optional[str] = None) -> Any:
171171
return value
172172

173173
def get_type_code(self) -> str:
174-
return "timestamp"
174+
return "datetime"
175175

176176

177177
class DatetimeFunction(ProjectionFunction):

pymongosql/superset_mongodb/detector.py

Lines changed: 80 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,50 @@ class QueryInfo:
1919
class SubqueryDetector:
2020
"""Detects and analyzes SQL subqueries in query strings"""
2121

22-
# Pattern to detect wrapped subqueries: SELECT ... FROM (SELECT ...) AS alias
23-
WRAPPED_SUBQUERY_PATTERN = re.compile(
24-
r"SELECT\s+.*?\s+FROM\s*\(\s*(SELECT\s+.*?)\s*\)\s+(?:AS\s+)?(\w+)",
25-
re.IGNORECASE | re.DOTALL,
26-
)
27-
2822
# Pattern to detect simple SELECT start
2923
SELECT_PATTERN = re.compile(r"^\s*SELECT\s+", re.IGNORECASE)
3024

25+
@classmethod
26+
def _extract_balanced_subquery(cls, query: str) -> Optional[Tuple[str, str]]:
27+
"""
28+
Extract subquery with balanced parentheses.
29+
30+
Returns:
31+
Tuple of (subquery_text, alias) or None if not found
32+
"""
33+
# Find FROM ( pattern
34+
from_match = re.search(r"FROM\s*\(\s*", query, re.IGNORECASE)
35+
if not from_match:
36+
return None
37+
38+
start_pos = from_match.end()
39+
paren_count = 1
40+
pos = start_pos
41+
42+
# Balance parentheses to find the matching closing paren
43+
while pos < len(query) and paren_count > 0:
44+
if query[pos] == "(":
45+
paren_count += 1
46+
elif query[pos] == ")":
47+
paren_count -= 1
48+
pos += 1
49+
50+
if paren_count != 0:
51+
return None # Unbalanced parentheses
52+
53+
# Extract subquery text (between opening and closing parens)
54+
subquery_text = query[start_pos : pos - 1].strip()
55+
56+
# Extract alias after the closing paren
57+
rest_of_query = query[pos:].strip()
58+
alias_match = re.match(r"(?:AS\s+)?(\w+)", rest_of_query, re.IGNORECASE)
59+
if alias_match:
60+
alias = alias_match.group(1)
61+
else:
62+
alias = "subquery_result"
63+
64+
return subquery_text, alias
65+
3166
@classmethod
3267
def detect(cls, query: str) -> QueryInfo:
3368
"""
@@ -41,14 +76,10 @@ def detect(cls, query: str) -> QueryInfo:
4176
"""
4277
query = query.strip()
4378

44-
# Check for wrapped subquery pattern (most common Superset case)
45-
match = cls.WRAPPED_SUBQUERY_PATTERN.search(query)
46-
if match:
47-
subquery_text = match.group(1)
48-
subquery_alias = match.group(2)
49-
50-
if subquery_alias is None or subquery_alias == "":
51-
subquery_alias = "subquery_result"
79+
# Check for wrapped subquery pattern using balanced parentheses
80+
result = cls._extract_balanced_subquery(query)
81+
if result:
82+
subquery_text, subquery_alias = result
5283

5384
return QueryInfo(
5485
has_subquery=True,
@@ -91,48 +122,45 @@ def extract_outer_query(cls, query: str) -> Optional[Tuple[str, str]]:
91122
if not info.is_wrapped:
92123
return None
93124

94-
# Pattern to capture: SELECT <columns> FROM ( <subquery> ) AS <alias> <rest>
95-
# Matches both SELECT col1, col2 and SELECT col1 AS alias1, col2 AS alias2 formats
96-
pattern = re.compile(
97-
r"(SELECT\s+.+?)\s+FROM\s*\(\s*(?:select|SELECT)\s+.+?\s*\)\s+(?:AS\s+)?(\w+)(.*)",
98-
re.IGNORECASE | re.DOTALL,
99-
)
100-
101-
match = pattern.search(query)
102-
if match:
103-
select_clause = match.group(1).strip()
104-
table_alias = match.group(2)
105-
rest_of_query = match.group(3).strip()
106-
107-
if rest_of_query:
108-
outer = f"{select_clause} FROM {table_alias} {rest_of_query}"
109-
else:
110-
outer = f"{select_clause} FROM {table_alias}"
111-
112-
return outer, table_alias
113-
114-
# If pattern doesn't match exactly, fall back to preserving SELECT clause
115-
# Extract from SELECT to FROM keyword
116-
select_match = re.search(r"(SELECT\s+.+?)\s+FROM", query, re.IGNORECASE | re.DOTALL)
117-
if not select_match:
125+
# Use balanced parenthesis extraction to find subquery boundaries
126+
result = cls._extract_balanced_subquery(query)
127+
if not result:
118128
return None
119129

120-
select_clause = select_match.group(1).strip()
121-
122-
# Extract table alias and rest of query after the closing paren
123-
rest_match = re.search(r"\)\s+(?:AS\s+)?(\w+)(.*)", query, re.IGNORECASE | re.DOTALL)
124-
if rest_match:
125-
table_alias = rest_match.group(1)
126-
rest_of_query = rest_match.group(2).strip()
130+
_, table_alias = result
127131

128-
if rest_of_query:
129-
outer = f"{select_clause} FROM {table_alias} {rest_of_query}"
130-
else:
131-
outer = f"{select_clause} FROM {table_alias}"
132-
133-
return outer, table_alias
132+
# Find the FROM ( pattern to locate where subquery starts
133+
from_match = re.search(r"FROM\s*\(", query, re.IGNORECASE)
134+
if not from_match:
135+
return None
134136

135-
return None
137+
# Extract SELECT clause (everything before FROM ()
138+
select_clause = query[: from_match.start()].strip()
139+
140+
# Find where the subquery ends (matching closing paren)
141+
start_pos = from_match.end()
142+
paren_count = 1
143+
pos = start_pos
144+
145+
while pos < len(query) and paren_count > 0:
146+
if query[pos] == "(":
147+
paren_count += 1
148+
elif query[pos] == ")":
149+
paren_count -= 1
150+
pos += 1
151+
152+
# Extract rest of query after the closing paren and alias
153+
rest_of_query = query[pos:].strip()
154+
# Remove the AS alias part if present
155+
rest_of_query = re.sub(r"^(?:AS\s+)?\w+\s*", "", rest_of_query, flags=re.IGNORECASE).strip()
156+
157+
# Construct outer query with table alias replacing subquery
158+
if rest_of_query:
159+
outer = f"{select_clause} FROM {table_alias} {rest_of_query}"
160+
else:
161+
outer = f"{select_clause} FROM {table_alias}"
162+
163+
return outer, table_alias
136164

137165
@classmethod
138166
def is_simple_select(cls, query: str) -> bool:

0 commit comments

Comments
 (0)