Signed-Network-Analysis/experiments.py at master · markheimann/Signed-Network-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#Perform experiments
#Select network
#Select methods
#Study performance of all methods on the network

import numpy as np

import data.simulate_networks as sim
import local_prediction.local_moi as moi
import local_prediction.hoc_prediction as hoc
import matrix_completion.matrix_prediction as mf
import cluster.clustering as clust
import analytics.stats as stats

import time

#Run experiments on different algorithms on the same network
def run_experiment():
  simulated = False
  real = True

  use_moi = True
  use_hoc = True
  use_svp = True
  use_sgd_sh = False
  use_sgd_sig = False
  use_als = True

  adj_matrix = None
  if simulated:
    cluster_sizes = [100,200,300,400]
    sparsity_level = 0.01175
    noise_prob = 0
    print "creating adjacency matrix..."
    adj_matrix = sim.sample_network(cluster_sizes, sparsity_level, noise_prob)

  elif real:
    data_file_name = "data/Preprocessed Data/small_network.npy"
    #data_file_name = "data/Preprocessed Data/wiki_elections_csr.npy"
    try:
      adj_matrix = np.load(data_file_name).item()
    except Exception as e:
      raise ValueError("could not load adj matrix from file: ", e)

  if use_moi:
    print "performing MOI..."
    max_cycle_order_moi = 10
    discount = [0.5**i for i in range(3, max_cycle_order_moi + 1)]
    #max_cycle_order_moi = np.inf
    #discount = 0.0001
    num_folds = 5
    avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
          moi.kfoldcv_moi(adj_matrix, discount, max_cycle_order_moi, num_folds)
    print "MOI results: "
    print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
    print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
    print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
    print

  if use_hoc:
    print "performing HOC..."
    max_cycle_order_hoc = 5
    num_folds = 10
    avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
              hoc.hoc_learning_pipeline(adj_matrix, max_cycle_order_hoc)
    print "HOC results:"
    print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
    print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
    print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
    print

  alg = ""
  alg_params = None

  #settings if using SGD
  if use_sgd_sh or use_sgd_sig:
    #Parameters used for this experiment

    #https://www.cs.uic.edu/~liub/KDD-cup-2007/proceedings/Regular-Paterek.pdf
    learning_rate = 1000#0.05 for square hinge
    tol = adj_matrix.nnz/10
    max_iter = 20
    reg_param = 10#0.5 for square hinge
    dim = 100
    num_folds_mf = 10

    #Bundle up these parameters and use this algorithm
    if use_sgd_sh:
      loss_type = "squarehinge" #"sigmoid"
      alg_params = (learning_rate, loss_type, tol, max_iter, reg_param, dim)
      alg = "sgd"

      print "performing SGD with square-hinge loss..."
      avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
              mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
      print "SGD_SH results:"
      print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
      print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
      print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
      print
    if use_sgd_sig:
      loss_type = "sigmoid"
      alg_params = (learning_rate, loss_type, tol, max_iter, reg_param, dim)
      alg = "sgd"

      print "performing SGD with sigmoid loss..."
      avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
              mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
      print "SGD_SIG results:"
      print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
      print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
      print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
      print
  #settings if using als
  if use_als:
    #Parameters used for this experiment
    max_iter = 2
    dim = 40

    #Bundle up these parameters and use this algorithm
    alg_params = (max_iter, dim)
    alg = "als"

    num_folds_mf = 10

    print "performing ALS..."
    avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
            mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
    print "ALS results:"
    print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
    print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
    print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
    print

  #settings if using SVP
  if use_svp:
    #Parameters used for this experiment
    rank = 40
    tol = 100
    max_iter = 5
    step_size = 1

    #Bundle up these parameters and use this algorithm
    alg_params = (rank, tol, max_iter, step_size)
    alg = "svp"

    num_folds_mf = 10

    print "performing SVP..."
    avg_acc, stderr_acc, avg_fpr, stderr_fpr, avg_time, stderr_time = \
          mf.kfold_CV_pipeline(adj_matrix, alg, alg_params, num_folds_mf)
    print "SVP results:"
    print("Accuracy: average %f with standard error %f" % (avg_acc, stderr_acc))
    print("False positive rate: average %f with standard error %f" % (avg_fpr, stderr_fpr))
    print("Model running time: average %f with standard error %f" % (avg_time, stderr_time))
    print

if __name__ == "__main__":
  run_experiment()