Skip to content

Commit f97287a

Browse files
committed
fix: cleanup sql query handing, import serialized expression code
1 parent 524aab5 commit f97287a

6 files changed

Lines changed: 428 additions & 188 deletions

File tree

pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ dependencies = [
1515
"sentry-sdk>=2.16.0",
1616
"zstandard>=0.23.0",
1717
"fuzzywuzzy>=0.18.0",
18-
"duckdb-query-tools @ git+https://github.com/Query-farm/duckdb-query-tools.git@master",
19-
"sqlglot>=25.24.5",
20-
"duckdb>=1.1.1",
21-
"immutables>=0.21",
2218
"python-levenshtein>=0.26.0",
2319
"mypy-boto3-s3>=1.35.42",
2420
"msgpack>=1.1.0",

requirements-dev.lock

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ coverage==7.8.0
3434
# via pytest-cov
3535
cryptography==44.0.3
3636
# via moto
37-
duckdb==1.2.2
38-
# via duckdb-query-tools
39-
# via query-farm-server-base
40-
duckdb-query-tools @ git+https://github.com/Query-farm/duckdb-query-tools.git@5e27018771eefce8e4690e2f758ff84bb18f7823
41-
# via query-farm-server-base
4237
execnet==2.1.1
4338
# via pytest-xdist
4439
filelock==3.18.0
@@ -47,8 +42,6 @@ fuzzywuzzy==0.18.0
4742
# via query-farm-server-base
4843
idna==3.10
4944
# via requests
50-
immutables==0.21
51-
# via query-farm-server-base
5245
iniconfig==2.1.0
5346
# via pytest
5447
jinja2==3.1.6
@@ -118,9 +111,6 @@ sentry-sdk==2.27.0
118111
# via query-farm-server-base
119112
six==1.17.0
120113
# via python-dateutil
121-
sqlglot==26.16.4
122-
# via duckdb-query-tools
123-
# via query-farm-server-base
124114
structlog==25.3.0
125115
# via query-farm-server-base
126116
tblib==3.1.0

requirements.lock

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,8 @@ certifi==2025.4.26
2323
# via sentry-sdk
2424
click==8.1.8
2525
# via query-farm-server-base
26-
duckdb==1.2.2
27-
# via duckdb-query-tools
28-
# via query-farm-server-base
29-
duckdb-query-tools @ git+https://github.com/Query-farm/duckdb-query-tools.git@5e27018771eefce8e4690e2f758ff84bb18f7823
30-
# via query-farm-server-base
3126
fuzzywuzzy==0.18.0
3227
# via query-farm-server-base
33-
immutables==0.21
34-
# via query-farm-server-base
3528
jmespath==1.0.1
3629
# via boto3
3730
# via botocore
@@ -63,9 +56,6 @@ sentry-sdk==2.27.0
6356
# via query-farm-server-base
6457
six==1.17.0
6558
# via python-dateutil
66-
sqlglot==26.16.4
67-
# via duckdb-query-tools
68-
# via query-farm-server-base
6959
structlog==25.3.0
7060
# via query-farm-server-base
7161
typing-extensions==4.13.2
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
from typing import Any
2+
3+
4+
def _quote_string(value: str) -> str:
5+
return f"'{value}'"
6+
7+
8+
comparison_type_to_operator: dict[str, str] = {
9+
"COMPARE_EQUAL": "=",
10+
"COMPARE_NOTEQUAL": "!=",
11+
"COMPARE_LESSTHAN": "<",
12+
"COMPARE_GREATERTHAN": ">",
13+
"COMPARE_LESSTHANOREQUALTO": "<=",
14+
"COMPARE_GREATERTHANOREQUALTO": ">=",
15+
"COMPARE_DISTINCT_FROM": "IS DISTINCT FROM",
16+
"COMPARE_NOT_DISTINCT_FROM": "IS NOT DISTINCT FROM",
17+
}
18+
19+
20+
def comparison_type_to_string_(comparison_type: str) -> str:
21+
result = comparison_type_to_operator.get(comparison_type)
22+
if result is not None:
23+
return result
24+
raise NotImplementedError(f"Comparison type {comparison_type} is not supported")
25+
26+
27+
simple_types = {
28+
"BIGINT",
29+
"BIT",
30+
"BLOB",
31+
"BOOLEAN",
32+
"DATE",
33+
"DOUBLE",
34+
"FLOAT",
35+
"HUGEINT",
36+
"INTEGER",
37+
"INTERVAL",
38+
"NULL",
39+
"SMALLINT",
40+
"TIME",
41+
"TIME WITH TIME ZONE",
42+
"TIMESTAMP_MS",
43+
"TIMESTAMP_NS",
44+
"TIMESTAMP_S",
45+
"TINYINT",
46+
"UBIGINT",
47+
"UHUGEINT",
48+
"UINTEGER",
49+
"USMALLINT",
50+
"UTINYINT",
51+
"UUID",
52+
"VARCHAR",
53+
"VARINT",
54+
}
55+
56+
57+
def _type_to_sql_type(type: dict[str, Any]) -> str:
58+
"""
59+
Convert the serialied type information into SQL that can be
60+
used to recreate the type.
61+
"""
62+
if type["id"] == "STRUCT":
63+
return (
64+
"STRUCT("
65+
+ ",".join(
66+
[
67+
f'"{child["first"]}" {_type_to_sql_type(child["second"])}'
68+
for child in type["type_info"]["child_types"]
69+
]
70+
)
71+
+ ")"
72+
)
73+
elif type["id"] in simple_types:
74+
return type["id"]
75+
elif type["id"] == "DECIMAL":
76+
return f"DECIMAL({type['type_info']['width']}, {type['type_info']['scale']})"
77+
elif type["id"] == "LIST":
78+
return _type_to_sql_type(type["type_info"]["child_type"]) + "[]"
79+
else:
80+
raise NotImplementedError(f"Type {type['id']} is not supported")
81+
82+
83+
def expression_to_string(
84+
*, expression: dict[str, Any], bound_column_names: list[str], bound_column_types: dict[str, Any]
85+
) -> str:
86+
"""
87+
Convert a DuckDB serialized expression back into SQL, with the types of the
88+
columns tracked.
89+
"""
90+
91+
def e_to_s(expr: dict[str, Any]) -> str:
92+
return expression_to_string(
93+
expression=expr,
94+
bound_column_names=bound_column_names,
95+
bound_column_types=bound_column_types,
96+
)
97+
98+
if expression["expression_class"] == "BOUND_COLUMN_REF":
99+
column_name = bound_column_names[expression["binding"]["column_index"]]
100+
bound_column_types[column_name] = expression["return_type"]
101+
return f'"{column_name}"'
102+
elif expression["expression_class"] == "BOUND_CAST":
103+
return (
104+
f"CAST({e_to_s(expression['child'])} AS {_type_to_sql_type(expression['return_type'])})"
105+
)
106+
elif expression["expression_class"] == "BOUND_CONSTANT":
107+
if expression["value"]["is_null"]:
108+
return "null"
109+
if expression["value"]["type"]["id"] in "VARCHAR":
110+
return _quote_string(expression["value"]["value"])
111+
elif expression["value"]["type"]["id"] == "BOOLEAN":
112+
return "True" if expression["value"]["value"] else "False"
113+
elif expression["value"]["type"]["id"] == "NULL":
114+
return "null"
115+
elif expression["value"]["type"]["id"] in (
116+
"DATE",
117+
"DECIMAL",
118+
"BIGINT",
119+
"INTEGER",
120+
"FLOAT",
121+
"DOUBLE",
122+
):
123+
return str(expression["value"]["value"])
124+
elif expression["value"]["type"]["id"] == "TIMESTAMP_S":
125+
return f"make_timestamp({expression['value']['value']}::bigint*1000000)"
126+
elif expression["value"]["type"]["id"] == "TIMESTAMP WITH TIME ZONE":
127+
return f"make_timestamp({expression['value']['value']}::bigint)"
128+
elif expression["value"]["type"]["id"] == "LIST":
129+
if expression["type"] == "VALUE_CONSTANT":
130+
# So the children in this case aren't expressions, they are constants.
131+
return (
132+
"["
133+
+ ", ".join(
134+
[
135+
e_to_s(
136+
{
137+
"type": "VALUE_CONSTANT",
138+
"expression_class": "BOUND_CONSTANT",
139+
"value": child,
140+
}
141+
)
142+
for child in expression["value"]["value"]["children"]
143+
]
144+
)
145+
+ "]"
146+
)
147+
else:
148+
return (
149+
"["
150+
+ ", ".join(
151+
[e_to_s(child) for child in expression["value"]["value"]["children"]]
152+
)
153+
+ "]"
154+
)
155+
elif expression["value"]["type"]["id"] == "STRUCT":
156+
if expression["type"] == "VALUE_CONSTANT":
157+
names = [
158+
child["first"]
159+
for child in expression["value"]["type"]["type_info"]["child_types"]
160+
]
161+
values = expression["value"]["value"]["children"]
162+
return (
163+
"{"
164+
+ ",".join(
165+
[
166+
f"'{name}':"
167+
+ e_to_s(
168+
{
169+
"type": "VALUE_CONSTANT",
170+
"expression_class": "BOUND_CONSTANT",
171+
"value": value,
172+
}
173+
)
174+
for name, value in zip(names, values, strict=True)
175+
]
176+
)
177+
+ "}"
178+
)
179+
else:
180+
raise NotImplementedError("STRUCTs that aren't value constants are not supported")
181+
else:
182+
raise NotImplementedError(
183+
f"Constant type {expression['value']['type']['id']} is not supported"
184+
)
185+
elif expression["expression_class"] == "BOUND_COMPARISON":
186+
return f"{e_to_s(expression['left'])} {comparison_type_to_string_(expression['type'])} {e_to_s(expression['right'])}"
187+
elif expression["expression_class"] == "BOUND_OPERATOR":
188+
if expression["type"] in ("OPERATOR_IS_NULL", "OPERATOR_IS_NOT_NULL"):
189+
operation = "IS NULL" if expression["type"] == "OPERATOR_IS_NULL" else "IS NOT NULL"
190+
return e_to_s(expression["children"][0]) + " " + operation
191+
elif expression["type"] in ("COMPARE_IN", "COMPARE_NOT_IN"):
192+
first, *rest = expression["children"]
193+
operation = "IN" if expression["type"] == "COMPARE_IN" else "NOT IN"
194+
return f"{e_to_s(first)} {operation} ({', '.join([e_to_s(child) for child in rest])})"
195+
elif expression["type"] == "OPERATOR_NOT":
196+
assert len(expression["children"]) == 1
197+
return f"NOT {e_to_s(expression['children'][0])}"
198+
else:
199+
raise NotImplementedError(f"Operator type {expression['type']} is not supported")
200+
elif expression["expression_class"] == "BOUND_FUNCTION":
201+
if expression["name"] == "struct_pack":
202+
return (
203+
expression["name"]
204+
+ "("
205+
+ ", ".join(
206+
[
207+
f"{child_type['first']} := {e_to_s(child)}"
208+
for child, child_type in zip(
209+
expression["children"],
210+
expression["function_data"]["variable_return_type"]["type_info"][
211+
"child_types"
212+
],
213+
strict=True,
214+
)
215+
]
216+
)
217+
+ ")"
218+
)
219+
else:
220+
return f"{expression['name']}({', '.join([e_to_s(child) for child in expression['children']])})"
221+
elif expression["expression_class"] == "BOUND_CASE":
222+
case_checks = [
223+
f"WHEN {e_to_s(case_check['when_expr'])} THEN {e_to_s(case_check['then_expr'])}"
224+
for case_check in expression["case_checks"]
225+
]
226+
if expression["else_expr"] is not None:
227+
case_checks.append(f"ELSE {e_to_s(expression['else_expr'])}")
228+
229+
return "CASE " + " ".join(case_checks) + " END"
230+
elif expression["expression_class"] == "BOUND_BETWEEN":
231+
return f"{e_to_s(expression['input'])} BETWEEN {e_to_s(expression['lower'])} AND {e_to_s(expression['upper'])}"
232+
elif expression["expression_class"] == "BOUND_CONJUNCTION":
233+
if expression["type"] == "CONJUNCTION_AND":
234+
operator = "AND"
235+
elif expression["type"] == "CONJUNCTION_OR":
236+
operator = "OR"
237+
else:
238+
raise NotImplementedError(f"Conjunction type {expression['type']} is not supported")
239+
240+
return f"({f' {operator} '.join([e_to_s(child) for child in expression['children']])})"
241+
else:
242+
raise NotImplementedError(
243+
f"Expression class {expression['expression_class']} is not supported"
244+
)
245+
246+
247+
def convert_to_sql(
248+
source: list[dict[str, Any]], bound_column_names: list[str]
249+
) -> tuple[str, dict[str, Any]]:
250+
bound_column_types: dict[str, Any] = {}
251+
sql = " AND ".join(
252+
[
253+
expression_to_string(
254+
expression=filter,
255+
bound_column_names=bound_column_names,
256+
bound_column_types=bound_column_types,
257+
)
258+
for filter in source
259+
]
260+
)
261+
return sql, bound_column_types
262+
263+
264+
def convert_type_to_sql(fields_with_type_info: dict[str, Any]) -> dict[str, str]:
265+
return {
266+
field_name: _type_to_sql_type(type_info)
267+
for field_name, type_info in fields_with_type_info.items()
268+
}

0 commit comments

Comments
 (0)