@@ -19,15 +19,50 @@ class QueryInfo:
1919class SubqueryDetector :
2020 """Detects and analyzes SQL subqueries in query strings"""
2121
22- # Pattern to detect wrapped subqueries: SELECT ... FROM (SELECT ...) AS alias
23- WRAPPED_SUBQUERY_PATTERN = re .compile (
24- r"SELECT\s+.*?\s+FROM\s*\(\s*(SELECT\s+.*?)\s*\)\s+(?:AS\s+)?(\w+)" ,
25- re .IGNORECASE | re .DOTALL ,
26- )
27-
2822 # Pattern to detect simple SELECT start
2923 SELECT_PATTERN = re .compile (r"^\s*SELECT\s+" , re .IGNORECASE )
3024
25+ @classmethod
26+ def _extract_balanced_subquery (cls , query : str ) -> Optional [Tuple [str , str ]]:
27+ """
28+ Extract subquery with balanced parentheses.
29+
30+ Returns:
31+ Tuple of (subquery_text, alias) or None if not found
32+ """
33+ # Find FROM ( pattern
34+ from_match = re .search (r"FROM\s*\(\s*" , query , re .IGNORECASE )
35+ if not from_match :
36+ return None
37+
38+ start_pos = from_match .end ()
39+ paren_count = 1
40+ pos = start_pos
41+
42+ # Balance parentheses to find the matching closing paren
43+ while pos < len (query ) and paren_count > 0 :
44+ if query [pos ] == "(" :
45+ paren_count += 1
46+ elif query [pos ] == ")" :
47+ paren_count -= 1
48+ pos += 1
49+
50+ if paren_count != 0 :
51+ return None # Unbalanced parentheses
52+
53+ # Extract subquery text (between opening and closing parens)
54+ subquery_text = query [start_pos : pos - 1 ].strip ()
55+
56+ # Extract alias after the closing paren
57+ rest_of_query = query [pos :].strip ()
58+ alias_match = re .match (r"(?:AS\s+)?(\w+)" , rest_of_query , re .IGNORECASE )
59+ if alias_match :
60+ alias = alias_match .group (1 )
61+ else :
62+ alias = "subquery_result"
63+
64+ return subquery_text , alias
65+
3166 @classmethod
3267 def detect (cls , query : str ) -> QueryInfo :
3368 """
@@ -41,14 +76,10 @@ def detect(cls, query: str) -> QueryInfo:
4176 """
4277 query = query .strip ()
4378
44- # Check for wrapped subquery pattern (most common Superset case)
45- match = cls .WRAPPED_SUBQUERY_PATTERN .search (query )
46- if match :
47- subquery_text = match .group (1 )
48- subquery_alias = match .group (2 )
49-
50- if subquery_alias is None or subquery_alias == "" :
51- subquery_alias = "subquery_result"
79+ # Check for wrapped subquery pattern using balanced parentheses
80+ result = cls ._extract_balanced_subquery (query )
81+ if result :
82+ subquery_text , subquery_alias = result
5283
5384 return QueryInfo (
5485 has_subquery = True ,
@@ -91,48 +122,45 @@ def extract_outer_query(cls, query: str) -> Optional[Tuple[str, str]]:
91122 if not info .is_wrapped :
92123 return None
93124
94- # Pattern to capture: SELECT <columns> FROM ( <subquery> ) AS <alias> <rest>
95- # Matches both SELECT col1, col2 and SELECT col1 AS alias1, col2 AS alias2 formats
96- pattern = re .compile (
97- r"(SELECT\s+.+?)\s+FROM\s*\(\s*(?:select|SELECT)\s+.+?\s*\)\s+(?:AS\s+)?(\w+)(.*)" ,
98- re .IGNORECASE | re .DOTALL ,
99- )
100-
101- match = pattern .search (query )
102- if match :
103- select_clause = match .group (1 ).strip ()
104- table_alias = match .group (2 )
105- rest_of_query = match .group (3 ).strip ()
106-
107- if rest_of_query :
108- outer = f"{ select_clause } FROM { table_alias } { rest_of_query } "
109- else :
110- outer = f"{ select_clause } FROM { table_alias } "
111-
112- return outer , table_alias
113-
114- # If pattern doesn't match exactly, fall back to preserving SELECT clause
115- # Extract from SELECT to FROM keyword
116- select_match = re .search (r"(SELECT\s+.+?)\s+FROM" , query , re .IGNORECASE | re .DOTALL )
117- if not select_match :
125+ # Use balanced parenthesis extraction to find subquery boundaries
126+ result = cls ._extract_balanced_subquery (query )
127+ if not result :
118128 return None
119129
120- select_clause = select_match .group (1 ).strip ()
121-
122- # Extract table alias and rest of query after the closing paren
123- rest_match = re .search (r"\)\s+(?:AS\s+)?(\w+)(.*)" , query , re .IGNORECASE | re .DOTALL )
124- if rest_match :
125- table_alias = rest_match .group (1 )
126- rest_of_query = rest_match .group (2 ).strip ()
130+ _ , table_alias = result
127131
128- if rest_of_query :
129- outer = f"{ select_clause } FROM { table_alias } { rest_of_query } "
130- else :
131- outer = f"{ select_clause } FROM { table_alias } "
132-
133- return outer , table_alias
132+ # Find the FROM ( pattern to locate where subquery starts
133+ from_match = re .search (r"FROM\s*\(" , query , re .IGNORECASE )
134+ if not from_match :
135+ return None
134136
135- return None
137+ # Extract SELECT clause (everything before FROM ()
138+ select_clause = query [: from_match .start ()].strip ()
139+
140+ # Find where the subquery ends (matching closing paren)
141+ start_pos = from_match .end ()
142+ paren_count = 1
143+ pos = start_pos
144+
145+ while pos < len (query ) and paren_count > 0 :
146+ if query [pos ] == "(" :
147+ paren_count += 1
148+ elif query [pos ] == ")" :
149+ paren_count -= 1
150+ pos += 1
151+
152+ # Extract rest of query after the closing paren and alias
153+ rest_of_query = query [pos :].strip ()
154+ # Remove the AS alias part if present
155+ rest_of_query = re .sub (r"^(?:AS\s+)?\w+\s*" , "" , rest_of_query , flags = re .IGNORECASE ).strip ()
156+
157+ # Construct outer query with table alias replacing subquery
158+ if rest_of_query :
159+ outer = f"{ select_clause } FROM { table_alias } { rest_of_query } "
160+ else :
161+ outer = f"{ select_clause } FROM { table_alias } "
162+
163+ return outer , table_alias
136164
137165 @classmethod
138166 def is_simple_select (cls , query : str ) -> bool :
0 commit comments