-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlecture_12_overfitting.py
More file actions
148 lines (115 loc) · 5.44 KB
/
lecture_12_overfitting.py
File metadata and controls
148 lines (115 loc) · 5.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import time
import numpy as np
import pandas as pd
from Models.Experiments import run_experiments, generate_legendre_polynomial
import matplotlib.pyplot as plt
import seaborn as sns
import multiprocessing as mp
USE_MULTIPROCESSING = True
N_THREADS = 8
N_EXPERIMENTS = 10000
def plot_legendre_polynomials(max_n: int = 5):
# Create a figure
fig, ax = plt.subplots(figsize=(10, 8), tight_layout=True)
fig.suptitle('Legendre Polynomials', fontsize=16)
# Plot the polynomials
x = np.linspace(-1, 1, 1000)
for i in range(max_n):
polynomial = generate_legendre_polynomial(degree=i)
ax.plot(x, polynomial(x), label=f'$P_{i}(x)$')
# Add a legend
ax.legend()
# Save the figure
fig.savefig('Figures/lecture_12_legendre_polynomials.png')
# Show the figure
plt.show()
def experiments_noise_level():
# Get the current time
start_time = time.time()
# Set up the parameters for the experiments
n_points = np.arange(40, 121, 5)
noise_levels = np.arange(0.0, 2.1, 0.1)
print(f'Running {len(n_points) * len(noise_levels)} experiments...')
if USE_MULTIPROCESSING:
# Run the experiments in parallel
settings = [(n_point, noise_level, 10, N_EXPERIMENTS)
for noise_level in noise_levels for n_point in n_points]
with mp.Pool(N_THREADS) as pool:
values = pool.starmap(run_experiments, settings)
results = np.array(values).reshape(len(noise_levels), len(n_points))
else:
# Create a matrix to store the results
results = np.zeros((len(noise_levels), len(n_points)))
# Run the experiments
experiment_count = 1
for i, noise_level in enumerate(noise_levels):
for j, n_point in enumerate(n_points):
print(f'Running experiment {experiment_count} of {len(n_points) * len(noise_levels)}')
results[i, j] = run_experiments(n_points=n_point, noise_level=noise_level, target_complexity=10)
experiment_count += 1
# Flip the results to match the heatmap
results = results[::-1, :]
# Save the results as a csv file
pd.DataFrame(results, index=noise_levels[::-1], columns=n_points).to_csv('Experiments/noise_level.csv',
index_label='Noise Level')
# Plot the results
fig, ax = plt.subplots(figsize=(10, 8), tight_layout=True)
fig.suptitle('Experiments with Noise Level $\\sigma^2$', fontsize=16)
sns.heatmap(results, ax=ax,
xticklabels=[str(round(n, 2)) for n in n_points],
yticklabels=[str(round(n, 2)) for n in noise_levels[::-1]],
cmap='coolwarm')
ax.set(xlabel='Number of points', ylabel='Noise level',
title='Average $E_{out}(g_{10}) - E_{out}(g_{2})$')
plt.show()
# Save the figure
fig.savefig('Figures/lecture_12_noise_level.png')
# Print the time it took to run the experiments
print(f'Time taken: {round(time.time() - start_time, 2)}')
def experiments_target_complexity():
# Get the current time
start_time = time.time()
# Set up the parameters for the experiments
n_points = np.arange(40, 121, 5)
target_complexities = np.arange(0, 31, 2)
print(f'Running {len(n_points) * len(target_complexities)} experiments...')
if USE_MULTIPROCESSING:
# Run the experiments in parallel
settings = [(n_point, 0.5, target_complexity, N_EXPERIMENTS)
for target_complexity in target_complexities for n_point in n_points]
with mp.Pool(N_THREADS) as pool:
values = pool.starmap(run_experiments, settings)
results = np.array(values).reshape(len(target_complexities), len(n_points))
else:
# Create a matrix to store the results
results = np.zeros((len(target_complexities), len(n_points)))
# Run the experiments
experiment_count = 1
for i, target_complexity in enumerate(target_complexities):
for j, n_point in enumerate(n_points):
print(f'Running experiment {experiment_count} of {len(n_points) * len(target_complexities)}')
results[i, j] = run_experiments(n_points=n_point, noise_level=0.0, target_complexity=target_complexity)
experiment_count += 1
# Flip the results to match the heatmap
results = results[::-1, :]
# Save the results as a csv file
pd.DataFrame(results, index=target_complexities[::-1], columns=n_points).to_csv('Experiments/target_complexity.csv',
index_label='Target Complexity')
# Plot the results
fig, ax = plt.subplots(figsize=(10, 8), tight_layout=True)
fig.suptitle('Experiments with Target Complexity $Q_{f}$', fontsize=16)
sns.heatmap(results, ax=ax,
xticklabels=[str(round(n, 2)) for n in n_points],
yticklabels=[str(round(n, 2)) for n in target_complexities[::-1]],
cmap='coolwarm')
ax.set(xlabel='Number of points', ylabel='Target complexity',
title='Average $E_{out}(g_{10}) - E_{out}(g_{2})$')
plt.show()
# Save the figure
fig.savefig('Figures/lecture_12_target_complexity.png')
# Print the time it took to run the experiments
print(f'Time taken: {round(time.time() - start_time, 2)}')
if __name__ == '__main__':
plot_legendre_polynomials()
experiments_noise_level()
experiments_target_complexity()