-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcustom.py
More file actions
127 lines (88 loc) · 3.92 KB
/
custom.py
File metadata and controls
127 lines (88 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import time
from collections import OrderedDict
import numpy as np
# specifying the gpu to use
# import theano.sandbox.cuda
# theano.sandbox.cuda.use('gpu1')
import theano
import theano.tensor as T
import lasagne
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
# Given a dataset and a model, this function trains the model on the dataset for several epochs
# (There is no default trainer function in Lasagne yet)
def train(train_fn,val_fn,
model,
batch_size,
LR_start,LR_decay,
num_epochs,
X_train,y_train,
X_val,y_val,
X_test,y_test,
save_path=None,
shuffle_parts=1):
# A function which shuffles a dataset
def shuffle(X,y):
# print(len(X))
chunk_size = len(X)/shuffle_parts
shuffled_range = range(chunk_size)
X_buffer = np.copy(X[0:chunk_size])
y_buffer = np.copy(y[0:chunk_size])
for k in range(shuffle_parts):
np.random.shuffle(shuffled_range)
for i in range(chunk_size):
X_buffer[i] = X[k*chunk_size+shuffled_range[i]]
y_buffer[i] = y[k*chunk_size+shuffled_range[i]]
X[k*chunk_size:(k+1)*chunk_size] = X_buffer
y[k*chunk_size:(k+1)*chunk_size] = y_buffer
return X,y
# This function trains the model a full epoch (on the whole dataset)
def train_epoch(X,y,LR):
loss = 0
batches = len(X)/batch_size
for i in range(batches):
loss += train_fn(X[i*batch_size:(i+1)*batch_size],y[i*batch_size:(i+1)*batch_size],LR)
loss/=batches
return loss
# This function tests the model a full epoch (on the whole dataset)
def val_epoch(X,y):
err = 0
loss = 0
batches = len(X)/batch_size
for i in range(batches):
new_loss, new_err = val_fn(X[i*batch_size:(i+1)*batch_size], y[i*batch_size:(i+1)*batch_size])
err += new_err
loss += new_loss
err = err / batches * 100
loss /= batches
return err, loss
# shuffle the train set
X_train,y_train = shuffle(X_train,y_train)
best_val_err = 100
best_epoch = 1
LR = LR_start
# We iterate over epochs:
for epoch in range(num_epochs):
start_time = time.time()
train_loss = train_epoch(X_train,y_train,LR)
X_train,y_train = shuffle(X_train,y_train)
val_err, val_loss = val_epoch(X_val,y_val)
# test if validation error went down
if val_err <= best_val_err:
best_val_err = val_err
best_epoch = epoch+1
test_err, test_loss = val_epoch(X_test,y_test)
if save_path is not None:
np.savez(save_path, *lasagne.layers.get_all_param_values(model))
epoch_duration = time.time() - start_time
# Then we print the results for this epoch:
print("Epoch "+str(epoch + 1)+" of "+str(num_epochs)+" took "+str(epoch_duration)+"s")
print(" LR: "+str(LR))
print(" training loss: "+str(train_loss))
print(" validation loss: "+str(val_loss))
print(" validation error rate: "+str(val_err)+"%")
print(" best epoch: "+str(best_epoch))
print(" best validation error rate: "+str(best_val_err)+"%")
print(" test loss: "+str(test_loss))
print(" test error rate: "+str(test_err)+"%")
# decay the LR
LR *= LR_decay