AML-Project/AML_model_evaluator.py at master · lorrc/AML-Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import pandas as pd
from pandas import read_csv
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import classification_report
from matplotlib.ticker import FixedLocator, FixedFormatter


##Evaluates the performance of a binary classifier against labelled data.
# @param data - a dataframe containing 3 columns: class (binary values), predicted(binary values) and
#probability (the predicition the model has made for each value, prior to converting to a binary classification, number between 0 and 1).
# @returns - Confusion matrix plot, ROC curve plot, and a small report.

def model_evaluator(data):

    #partition processed data into vectors
    actualClass = data.label
    predictedClass = data.predicted
    probability = data.probabilty

    #build a confusion matrix
    cm = confusion_matrix(actualClass, predictedClass, labels = [0,1])


    TruePositive = cm[1, 1]
    TrueNegative = cm[0,0]
    FalsePositive = cm[0,1]
    FalseNegative = cm[1,0]

    numberOfPositives = TruePositive + FalseNegative
    numberOfNegatives = TrueNegative + FalsePositive

    #calculate the Null accuracy
    null_accuracy = 1 - actualClass.mean()

    #define the model accuracy
    model_accuracy = metrics.accuracy_score(actualClass, predictedClass)

    #Generate a metrics report
    report = metrics.classification_report(actualClass, predictedClass, output_dict = True)

    #calculate the model performance over the null accuracy
    performance_over_null = model_accuracy - null_accuracy

    #Calculate the Specificity of the model
    specificity = TrueNegative / (TrueNegative + FalsePositive)

    #Calculate the True positive rate, false positive rate, and thresholds to plot a rock curve
    fpr, tpr, thresholds = metrics.roc_curve(actualClass, probability)

    #Calculate the Area under the ROC Curve
    rocAuc = metrics.roc_auc_score(actualClass, probability)

    #generate figure
    fig = plt.figure(figsize = (12, 20))
    spec = gridspec.GridSpec(ncols=2, nrows=1, wspace=0.5, width_ratios=[1, 1], figure=fig)

    #plot confusion matrix in pos 0,0
    confusionMatrixLabels = ['Normal Traffic', 'Intrusion']
    confusionMatrixColourMap = plt.cm.Blues
    confusionMatrix = fig.add_subplot(spec[0,0])
    confusionMatrix.set_aspect('equal')
    confusionMatrix.imshow(cm, interpolation = 'nearest', cmap = confusionMatrixColourMap)
    confusionMatrix.set(ylabel ='True class', xlabel ='Predicted class')
    #confusionMatrix.xlabel(labelpad=5)

    confusionMatrix.set_xticks(np.arange(0,2))
    formatter = FixedFormatter(['Normal Traffic', 'Intrusion'])
    locator = FixedLocator([0,1])

    confusionMatrix.yaxis.set_major_formatter(formatter)
    confusionMatrix.yaxis.set_major_locator(locator)
    confusionMatrix.xaxis.set_major_formatter(formatter)
    confusionMatrix.xaxis.set_major_locator(locator)

    #confusionMatrix.set_yticks(np.arange(0,2))
    #confusionMatrix.set_xticklabels(np.arange(0,1), confusionMatrixLabels, fontdict = None)

    tot = sum(df.label)
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            confusionMatrix.text(j, i, (format(cm[i, j])),ha ='center', va="baseline", color="white" if cm[i,j] > tot else 'black', size = 'larger')


    cmLabels = ['TN', 'FP', 'FN', 'TP' ]
    a = 0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            confusionMatrix.text(j + 0.3, i + 0.4, (cmLabels[a]), ha ='center', va="baseline", color="white" if cm[i,j] > tot else 'black', size = 'larger')
            if a < 4:
                a += 1
    a=0
    for i in range(cm.shape[a]):
        if a == 0:
            confusionMatrix.text(j+0.8, i, ('Total:\n %d' % (numberOfNegatives)), ha ='center', va="center", color = 'black', size = 'larger')
            a += 1
        else:
            confusionMatrix.text(j+0.8, i, ('Total:\n %d' % (numberOfPositives)), ha ='center', va="center", color = 'black', size = 'larger')

    #plot roc curve in position 0,1
    rocCurve = fig.add_subplot(spec[0, 1])
    rocCurve.set_aspect('equal')
    rocCurve.plot(fpr, tpr, color='red', lw=2, label = 'ROC area = %0.5f)' % rocAuc )
    rocCurve.set(xlabel = 'True Positive Rate (Sensitivity)', ylabel = 'True Positive Rate (Sensitivity)' )
    rocCurve.legend(loc="lower right")


    #print report
    print('The performance of this model over the null accuracy is %2.2f%%\nModel Sensitivity: %2.6f%% \nModel Specificity: %2.6f%% \nModel F1 Score: %2.6f'
      % ((performance_over_null *100), (report['1']['recall']*100), (specificity*100), (report['1']['f1-score'])))