-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmatrix_creation.py
More file actions
153 lines (129 loc) · 6.16 KB
/
matrix_creation.py
File metadata and controls
153 lines (129 loc) · 6.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python3
import csv
from collections import deque
import sys
import argparse
import numpy as np
def get_connected_equations(word_equations_dict):
"""
Uses a bfs method to find the connected variables in the dictionary.
:param word_equations_dict: A dictionary mapping each word to a dictionary mapping variables to their factors.
:return:
"""
temp_dict = dict(word_equations_dict)
connected_word_equations_dict = {}
queue = deque(["high_prop"])
while queue:
top = queue.popleft()
try:
connected_word_equations_dict.update({top: temp_dict[top]})
except KeyError:
sys.exit("high_prop is not in equations csv")
connected_words = list(temp_dict[top].keys())
temp_dict.pop(top)
for word in connected_words:
if word in temp_dict and word not in queue:
queue.append(word)
return connected_word_equations_dict
def create_dict_from_equations_file(equations_csv_path, include_deduced):
"""
Creates a dictionary mapping each word to a dictionary mapping variables to their factors.
:param equations_csv_path: A string with the path to the csv containing the equations
:param include_deduced: If true, includes all words. Else, only includes words connected to the variable high_prop.
:return: A dictionary mapping each word to a dictionary mapping variables to their factors.
"""
word_equation_dict = {} # map from word to variable to an int representing the factor
with open(equations_csv_path, 'r') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
word = row["Word"]
variable = row["Variable"]
factor = row["Factor"]
deduced = row["Deduced"]
if deduced == "Yes" and not include_deduced:
continue
if word in word_equation_dict:
word_equation_dict[word][variable] = float(factor)
else:
word_equation_dict[word] = {variable: float(factor)}
return word_equation_dict
def build_matrix(equations_csv_path, variables, connected_equations_dict):
"""
Creates a m x n matrix, where m is the number of equations in the equations csv file and n is the number of words.
:param equations_csv_path: A string with the path to the csv containing the equations.
:param variables: A list of the words that correspond to the matrix columns.
:param connected_equations_dict: A dictionary mapping each word to a dictionary mapping variables to their factors.
All the entries in this parameter are interconnected.
:return: A m x n matrix.
"""
size = len(variables)
matrix = [[0] * size for i in range(size)]
with open(equations_csv_path, 'r') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
word = row["Word"]
variable = row["Variable"]
factor = float(row["Factor"])
deduced = row["Deduced"]
if deduced != "Yes" and word in connected_equations_dict:
matrix[variables.index(word)][variables.index(word)] += 1
matrix[variables.index(word)][variables.index(variable)] += -1.0 * factor
matrix[variables.index("high_prop")][variables.index("high_prop")] = 1
return matrix
def order_adjectives(property_name, equations_csv_path, results_path, include_all):
"""
Orders the adjectives using least squares linear regression.
:param equations_csv_path: A string with the path to the csv containing the equations.
:param include_all: If true, includes all words. Else, only includes words connected to the variable high_prop.
:return: A list of (adj, score) tuples in order of ascending score.
"""
all_word_equations_dict = create_dict_from_equations_file(equations_csv_path, True)
connected_word_equations_dict = get_connected_equations(all_word_equations_dict)
if include_all:
variables = sorted(all_word_equations_dict.keys())
equations_dict = all_word_equations_dict
else:
variables = sorted(connected_word_equations_dict.keys())
equations_dict = connected_word_equations_dict
A = build_matrix(equations_csv_path, variables, equations_dict)
num_rows = len(A)
b = [0] * num_rows
b[variables.index("high_prop")] = 10
b = np.array(b)
# find the least squares
x = np.round(np.linalg.lstsq(A, b)[0], 2)
word_score_tuples = list(zip(variables, x))
# sort the attributes
sorted_word_score_tuples = sorted(word_score_tuples, key=lambda tup: tup[1])
with open(results_path, 'w') as csvfile:
A_indices = ['A' + str(i) for i in range(num_rows)]
fieldnames = A_indices + ['x', 'b', 'results']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
variables_header = dict(zip(A_indices, variables))
writer.writerow(variables_header)
for i in range(num_rows):
map = dict(zip(A_indices, A[i]))
map.update({'x': variables[i], 'b': b[i],
'results': (sorted_word_score_tuples[i][0], "%.2f" % sorted_word_score_tuples[i][1])})
print(map['results'][0], i + 1, map['results'][1], sep=",")
writer.writerow(map)
return sorted_word_score_tuples
if __name__ == '__main__':
# example:
# > python3 matrix_creation.py temperature
# creates the file temperature_results.csv or overwrites existing file
parser = argparse.ArgumentParser()
parser.add_argument("input_term", help='A string containing an attribute i.e. "temperature"')
parser.add_argument("equations_path", help="""
Input path to the equations file.
Expected csv header: Word,Variable,Factor,Definition,Deduced
Output from `equation_creation.py`
""")
parser.add_argument("--output", help="Output path for the equations csv file. Defaults to `input_term`_results.csv", type=str)
args = parser.parse_args()
if args.output is None:
output = args.input_term + "_results.csv"
else:
output = args.output
ordered_adjectives = order_adjectives(args.input_term, args.equations_path, output, False)