-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathexperiments.py
More file actions
159 lines (136 loc) · 5.55 KB
/
experiments.py
File metadata and controls
159 lines (136 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#Perform experiments
#Select network
#Select methods
#Study performance of all methods on the network
import numpy as np
import data.simulate_networks as sim
import local_prediction.local_moi as moi
import local_prediction.hoc_prediction as hoc
import matrix_completion.matrix_prediction as mf
import cluster.clustering as clust
import analytics.stats as stats
import time
#Run experiments on different algorithms on the same network
def run_experiment():
simulated = False
real = True
use_moi = True
use_hoc = True
use_svp = True
use_sgd_sh = False
use_sgd_sig = False
use_als = True
adj_matrix = None
if simulated:
cluster_sizes = [100,200,300,400]
sparsity_level = 0.01175
noise_prob = 0
print "creating adjacency matrix..."
adj_matrix = sim.sample_network(cluster_sizes, sparsity_level, noise_prob)
elif real:
data_file_name = "data/Preprocessed Data/small_network.npy"
#data_file_name = "data/Preprocessed Data/wiki_elections_csr.npy"
try:
adj_matrix = np.load(data_file_name).item()
except Exception as e:
raise ValueError("could not load adj matrix from file: ", e)
if use_moi:
print "performing MOI..."
max_cycle_order_moi = 10
discount = [0.5**i for i in range(3, max_cycle_order_moi + 1)]
#max_cycle_order_moi = np.inf
#discount = 0.0001
num_folds = 5
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
moi.kfoldcv_moi(adj_matrix, discount, max_cycle_order_moi, num_folds)
print "MOI results: "
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
if use_hoc:
print "performing HOC..."
max_cycle_order_hoc = 5
num_folds = 10
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
hoc.hoc_learning_pipeline(adj_matrix, max_cycle_order_hoc)
print "HOC results:"
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
alg = ""
alg_params = None
#settings if using SGD
if use_sgd_sh or use_sgd_sig:
#Parameters used for this experiment
#https://www.cs.uic.edu/~liub/KDD-cup-2007/proceedings/Regular-Paterek.pdf
learning_rate = 1000#0.05 for square hinge
tol = adj_matrix.nnz/10
max_iter = 20
reg_param = 10#0.5 for square hinge
dim = 100
num_folds_mf = 10
#Bundle up these parameters and use this algorithm
if use_sgd_sh:
loss_type = "squarehinge" #"sigmoid"
alg_params = (learning_rate, loss_type, tol, max_iter, reg_param, dim)
alg = "sgd"
print "performing SGD with square-hinge loss..."
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
print "SGD_SH results:"
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
if use_sgd_sig:
loss_type = "sigmoid"
alg_params = (learning_rate, loss_type, tol, max_iter, reg_param, dim)
alg = "sgd"
print "performing SGD with sigmoid loss..."
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
print "SGD_SIG results:"
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
#settings if using als
if use_als:
#Parameters used for this experiment
max_iter = 2
dim = 40
#Bundle up these parameters and use this algorithm
alg_params = (max_iter, dim)
alg = "als"
num_folds_mf = 10
print "performing ALS..."
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
print "ALS results:"
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
#settings if using SVP
if use_svp:
#Parameters used for this experiment
rank = 40
tol = 100
max_iter = 5
step_size = 1
#Bundle up these parameters and use this algorithm
alg_params = (rank, tol, max_iter, step_size)
alg = "svp"
num_folds_mf = 10
print "performing SVP..."
avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
print "SVP results:"
print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
print
if __name__ == "__main__":
run_experiment()