-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvectorizer.py
More file actions
38 lines (34 loc) · 838 Bytes
/
vectorizer.py
File metadata and controls
38 lines (34 loc) · 838 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
## Vectorizar test set
def vectorize_statement2(statement,dictionary):
n = len(dictionary)
res = [0] * n
for word in statement:
lo = 0
hi = n
while lo < hi - 1: # hi se pasa, lo no
mid = (lo + hi)/2
if word < dictionary[mid]:
hi = mid
else:
lo = mid
if dictionary[lo] == word:
res[lo] = 1
return res
def vectorize_statement(statement,dictionary):
n = len(dictionary)
res = [0] * n
for word in statement[2:]:
lo = 0
hi = n
while lo < hi - 1: # hi se pasa, lo no
mid = (lo + hi)/2
if word < dictionary[mid]:
hi = mid
else:
lo = mid
if dictionary[lo] == word:
res[lo] = 1
return statement[0:2] + res
def vectorize_statements(statements_stemmed, dictionary):
print("Vectorizing statements ...")
return map(lambda x: vectorize_statement(x,dictionary), statements_stemmed)