-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPDColumn.py
More file actions
315 lines (248 loc) · 8.66 KB
/
PDColumn.py
File metadata and controls
315 lines (248 loc) · 8.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import copy
class PDColumn(object):
# General list available to all PDColumn instances
aggregate_list = (
'_sum', '_avg', '_count', '_first', '_last', '_max', '_min'
)
# Unary mathematical operators.
#
# These will be available both as chained methods, e.g.,
# col.abs() and as built in functions, e.g., abs(col)
unary_list = (
'_abs', '_ceil', '_floor', '_round', '_not'
)
# Binary operators.
#
# These will be available using raw python syntax when possible, e.g.,
# col1 + col2, with the exception of methods like concat which have no
# raw equivalent.
binary_list = (
'_add', '_sub', '_mul', '_div', '_mod', '_concat', '_or', '_and',
'_eq', '_ne', '_lt', '_gt', '_le', '_ge', '_in', '_between', '_like'
)
def __init__(self, name='Column', table=None):
"""
Initializes column to be empty. Optional table argument allows
creator to specify the base table if necessary.
"""
self.agg = None
self.unary_op = None
self._binary_op = None
self.null = None
if name == '*':
self._count = True
else:
self._count = False
self.table = table
self.name = name
self._children = []
self.ops = []
################################################################
# Evaluation methods
################################################################
def __str__(self):
"""
Returns string representation of this column (in terms of table+column
for users).
"""
s = self.name
if self.table:
s += '(' + str(self.table.name) + ')'
if self.agg:
s += ' agg:' + self.agg
if self.unary_op:
s += ' unary:' + self.unary_op
if self._binary_op:
s += ' binary:' + self._binary_op
return s
def _repr_helper(self, level=0):
s = "\t" * level
s += str(self)
s += "\n"
for child in self._children:
if isinstance(child, PDColumn):
s += child._repr_helper(level=level + 1)
else:
s += "\t" * (level + 1) + repr(child)
return s
def __repr__(self):
"""
Returns a string representation of this column (in terms of AST for
devs).
"""
return str(self)
def __unicode__(self):
"""
Same as __str__.
"""
return str(self)
################################################################
# Aggregation Methods
#
# Invariant: Only one can be active at a time.
# Can only be used in SELECT and HAVING.
################################################################
def has_aggregate(self):
"""
Returns true if column has an aggregate function set. False otherwise.
"""
return bool(self.agg)
def _set_aggregate(self, agg):
"""
Helper function to set aggregate function, validating and doing any
bookkeeping necessary.
"""
if self.has_aggregate():
raise Exception('Attempting to assign multiple aggregate functions \
to same column')
elif agg not in PDColumn.aggregate_list:
raise Exception('Attempting to assign invalid aggregate function')
else:
new_col = copy.copy(self)
new_col.agg = agg
setattr(new_col, agg, True)
new_col.ops.append(agg)
return new_col
def sum(self):
return self._set_aggregate('_sum')
def avg(self):
return self._set_aggregate('_avg')
def count(self):
return self._set_aggregate('_count')
def first(self):
return self._set_aggregate('_first')
def last(self):
return self._set_aggregate('_last')
def max(self):
return self._set_aggregate('_max')
def min(self):
return self._set_aggregate('_min')
################################################################
# Unary Math Methods
################################################################
def has_unary(self):
"""
Returns true if column has an aggregate function set. False otherwise.
"""
return bool(self.unary_op)
def _set_unary(self, op):
"""
Helper function to set unary math ops, validating and doing any
bookkeeping necessary.
"""
if op not in PDColumn.unary_list:
raise Exception('Attempting to assign invalid unary function')
else:
new_col = copy.copy(self)
new_col.unary_op = op
setattr(new_col, op, True)
new_col.ops.append(op)
return new_col
def __abs__(self):
return self._set_unary('_abs')
def abs(self):
return self._set_unary('_abs')
def __ceil__(self):
return self._set_unary('_ceil')
def ceil(self):
return self._set_unary('_ceil')
def __floor__(self):
return self._set_unary('_floor')
def floor(self):
return self._set_unary('_floor')
def __round__(self):
return self._set_unary('_round')
def round(self):
return self._set_unary('_round')
def __invert__(self):
return self._set_unary('_not')
################################################################
# Binary Math Methods
#
# Returns a new PDColumn object that represents the combined
# operation. For instance, col1 + col2 will return a new
# PDColumn instance that points
#
# Invariant: Only one binary op exists, exactly 2 children
################################################################
def has_binary(self):
"""
Returns true if column has an binary op set. False otherwise.
"""
return bool(self._binary_op)
def _set_binary(self, op, other):
"""
Helper function to set binary ops, validating and doing any
bookkeeping necessary.
"""
if op not in PDColumn.binary_list:
raise Exception('Attempting to assign invalid binary function')
else:
new_col = PDColumn()
new_col._binary_op = op
setattr(new_col, op, True)
c1 = copy.copy(self)
c2 = copy.copy(other)
new_col._children.append(c1)
new_col._children.append(c2)
return new_col
def __add__(self, other):
return self._set_binary('_add', other)
def __sub__(self, other):
return self._set_binary('_sub', other)
def __mul__(self, other):
return self._set_binary('_mul', other)
def __div__(self, other):
return self._set_binary('_div', other)
def __mod__(self, other):
return self._set_binary('_mod', other)
def __and__(self, other):
return self._set_binary('_and', other)
def __or__(self, other):
return self._set_binary('_or', other)
def concat(self, other):
return self._set_binary('_concat', other)
def __eq__(self, other):
return self._set_binary('_eq', other)
def __ne__(self, other):
return self._set_binary('_ne', other)
def __lt__(self, other):
return self._set_binary('_lt', other)
def __gt__(self, other):
return self._set_binary('_gt', other)
def __le__(self, other):
return self._set_binary('_le', other)
def __ge__(self, other):
return self._set_binary('_ge', other)
# in_ is used because 'in' is a keyword in python. You can override
# the __contains__ operator, but it always casts results to bools.
def in_(self, other):
return self._set_binary('_in', other)
def between(self, other):
return self._set_binary('_between', other)
def like(self, other):
return self._set_binary('_like', other)
################################################################
# Null Checking Methods
#
# Invariant: Only one null checking method can be called
################################################################
def has_null(self):
"""
Returns true if column has null set. False otherwise.
"""
return self.null is not None
def _set_is_null(self, null):
"""
Helper function to set is null, validating and doing any
bookkeeping necessary.
"""
if self.has_null():
raise Exception('Already checking column for null values')
new_col = copy.copy(self)
new_col.null = null
return new_col
def is_null(self):
return self._set_is_null(True)
def not_null(self):
return self._set_is_null(False)