-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_rep.py
More file actions
126 lines (115 loc) · 5.05 KB
/
data_rep.py
File metadata and controls
126 lines (115 loc) · 5.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#implementation from https://www.python-graph-gallery.com/heatmap/ and
#https://www.kite.com/python/docs/seaborn.heatmap
#https://blog.quantinsti.com/creating-heatmap-using-python-seaborn/
#https://towardsdatascience.com/heatmap-basics-with-pythons-seaborn-fb92ea280a6c
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#datafile = "output_files/analysis/analysis1-5.txt"
#outfile = "output_files/analysis/heatmap1-5.png"
def main():
#heatmap(datafile, outfile)
return 1
#TO DO: make one heatmap file and just pass in analysis name
def pcaHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2,3])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='correlation')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'PCA Rank Correlations\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def zachKNNHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='accuracy')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'Zach KNN Average Accuracy\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def KNNHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='accuracy')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'KNN Average Mean Squared Error\nAcross K Folds\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def randomForestHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2,3])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='mean')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'Random Forests Average Mean Squared Error\nAcross K Folds\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def SVRHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2,3])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='mean')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'SVR Average Mean Squared Error Across K Folds\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def dummyHeatmap(datafile, outfile):
df = pd.read_csv(datafile, header=1, usecols=[0,1,2,3])
shapedDF = df.pivot(index='alpha2', columns='alpha1', values='mean')
print(shapedDF)
fig, ax = plt.subplots(figsize=(11, 9))
# plot heatmap
seaborn.heatmap(shapedDF, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = 'Dummy Average Mean Squared Error Across K Folds\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
return
def vsDummyHeatmap(analysisName, realfile, dummyfile, outfile):
real_df = pd.read_csv(realfile, header=1, usecols=[0,1,2])
dummy_df = pd.read_csv(dummyfile, header=1, usecols=[0,1,2])
if analysisName == 'PCA':
return
if (analysisName == 'ZachKNN') or (analysisName == 'KNN'):
real_df['accuracy'] = dummy_df['mean']/real_df['accuracy']
shaped_result = real_df.pivot(index='alpha2', columns='alpha1', values='accuracy')
print(shaped_result)
fig, ax = plt.subplots(figsize=(11, 9))
seaborn.heatmap(shaped_result, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = analysisName+' Average Mean Squared Error \nDivided by Dummy Mean Squared Error\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
if (analysisName == 'RandomForest') or (analysisName == 'SVR'):
real_df['mean'] = dummy_df['mean']/real_df['mean']
shaped_result = real_df.pivot(index='alpha2', columns='alpha1', values='mean')
print(shaped_result)
fig, ax = plt.subplots(figsize=(11, 9))
seaborn.heatmap(shaped_result, cmap="Blues", linewidth=0.3,
cbar_kws={"shrink": .8}).invert_yaxis()
title = analysisName+' Average Mean Squared Error \nDivided by Dummy Mean Squared Error\n'.upper()
plt.title(title, loc='left')
plt.savefig(outfile)
else:
print("unable to make vsDummy heatmap for", analysisName)
return
main()