-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathfull_eval.py
More file actions
77 lines (57 loc) · 3 KB
/
full_eval.py
File metadata and controls
77 lines (57 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, average_precision_score, \
roc_auc_score, precision_score, recall_score
thres = 0.5
def f1_score(probs, labels, thres, average='micro'):
'''Returns (precision, recall, F1 score) from a batch of predictions (thresholded probabilities)
given a batch of labels (for macro-averaging across batches)'''
preds = (probs >= thres).astype(np.int32)
p, r, f, _ = precision_recall_fscore_support(labels, preds, average=average,
warn_for=())
return p, r, f
def auc_pr(probs, labels, average='micro'):
'''Precision integrated over all thresholds (area under the precision-recall curve)'''
if average == 'macro' or average is None:
sums = labels.sum(0)
nz_indices = np.logical_and(sums != labels.shape[0], sums != 0)
probs = probs[:, nz_indices]
labels = labels[:, nz_indices]
return average_precision_score(labels, probs, average=average)
def auc_roc(probs, labels, average='micro'):
'''Area under the ROC curve'''
if average == 'macro' or average is None:
sums = labels.sum(0)
nz_indices = np.logical_and(sums != labels.shape[0], sums != 0)
probs = probs[:, nz_indices]
labels = labels[:, nz_indices]
return roc_auc_score(labels, probs, average=average)
def precision_at_k(probs, labels, k, average='micro'):
indices = np.argpartition(-probs, k-1, axis=1)[:, :k]
preds = np.zeros(probs.shape, dtype=np.int)
preds[np.arange(preds.shape[0])[:, np.newaxis], indices] = 1
return precision_score(labels, preds, average=average)
def recall_at_k(probs, labels, k, average='micro'):
indices = np.argpartition(-probs, k-1, axis=1)[:, :k]
preds = np.zeros(probs.shape, dtype=np.int)
preds[np.arange(preds.shape[0])[:, np.newaxis], indices] = 1
return recall_score(labels, preds, average=average)
def full_evaluate(pred, gold, thres=0.5):
pred = np.array(pred)
gold = np.array(gold)
out = 'f1 micro:'+ str(f1_score(pred, gold, thres, average='micro'))+'\n'
out += 'f1 macro: '+ str(f1_score(pred, gold, thres, average='macro'))+'\n'
out += 'auc_pr micro: '+str(auc_pr(pred, gold, average='micro'))+'\n'
out += 'auc_pr macro: '+str(auc_pr(pred, gold, average='macro'))+'\n'
out += 'auc_roc micro: ' + str(auc_roc(pred, gold, average='micro'))+'\n'
out += 'auc_roc macro: ' + str(auc_roc(pred, gold, average='macro'))+'\n'
out += 'precision_at_k 8: ' + str(precision_at_k(pred, gold, 8, average='micro'))+'\n'
out += 'precision_at_k 40: ' + str(precision_at_k(pred, gold, 40, average='micro'))+'\n'
out += 'recall_at_k 8: ' + str(recall_at_k(pred, gold, 8, average='micro'))+'\n'
out += 'recall_at_k 40: ' + str(recall_at_k(pred, gold, 40, average='micro'))+'\n'
return out
if __name__ == '__main__':
exp1 = [0.3, 0.9]
exp2 = [0.8, 0.8]
gold1 = [1, 1]
gold2 = [0, 0]
evaluate([exp1, exp2], [gold1, gold2])