-
Notifications
You must be signed in to change notification settings - Fork 0
/
modelReorganization.py
416 lines (338 loc) · 15.2 KB
/
modelReorganization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
import numpy as np
import tensorflow as tf
import h5py
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import sys
# Functions for initializing neural nets parameters
def init_weight_variable(shape, nameIn):
initial = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
return tf.Variable(initial, name=nameIn)
def init_bias_variable(shape, nameIn):
initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
return tf.Variable(initial, name=nameIn)
def conv2d(x, W):
return tf.nn.conv2d(x, W, [1, 1, 1, 1], 'VALID')
def monitorStepSize(cost_function, vars):
'''
Monitor the relative size of the steps and weights - see how much, as a percentage,
the weights are changing per training step.
- cost_function --- the cost function to evaluate to find the gradient
- vars --- Holds the tensorflow variables that you want to track
'''
def printGradsAndVars(grads_and_vars, X, y_):
'''
grads_and_vars is a list of tuples, where each tuple has two elements.
The first element in each tuple is a tf.Tensor that holds gradients.
The second element in each tuple is a tf.Variable that holds the variables that
the gradients in the first element correspond to
Code stolen from: https://stackoverflow.com/questions/40654881/how-to-print-current-variables-gradients-values-with-vars-names-in-tensorflow
'''
# assumes an interactive session is already started, so can evaluate the gradient tensor
# instead of needing a sess.run() call
def printNestedList(lst):
if not type(lst) is np.ndarray:
return
count = 0
for el in lst:
if not type(el) is np.ndarray:
print(type(el))
else:
printNestedList(el)
#self, X_train, y_train, X_val, y_val, num_freq, filter_row, filter_col, k1, k2, learningRate, pooling_strategy):
# set up property that makes it only be set once
# we'll use this to avoid adding tensors to the graph multiple times
import functools
def lazy_property(function):
attribute = '_cache_' + function.__name__
@property
@functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
class Model:
def __init__(self, num_freq, filepath, filter_row, filter_col, k1, k2, learningRate, pooling_strategy, debug):
'''
Initializer for the model
'''
# start by loading the data
[self.X_train, self.y_train, self.X_val, self.y_val] = Model.loadData(filepath)
# store the parameters sent to init that define our model
self.num_freq = num_freq
self.filter_row = filter_row
self.filter_col = filter_col
self.k1 = k1
self.k2 = k2
self.learningRate = learningRate
self.pooling_strategy = pooling_strategy
self.debug = debug
# find num_training_vec, total_features, num_frames, num_classes, and l from the shape of the data
# and store them
self.storeParamsFromData()
# Set-up and store the input and output placeholders
x = tf.placeholder(tf.float32, [None, self.total_features])
y_ = tf.placeholder(tf.float32, [None, self.num_classes])
self.x = x
self.y_ = y_
# Setup and store tensor that performs the one-hot encoding
y_train_OHEnc = tf.one_hot(self.y_train.copy(), self.num_classes)
y_val_OHEnc = tf.one_hot(self.y_val.copy(), self.num_classes)
self.y_train_OHEnc = y_train_OHEnc
self.y_val_OHEnc = y_val_OHEnc
# create each lazy_property
# each lazy_property will add tensors to the graph
self.y_conv
self.cross_entropy
self.train_step
self.accuracy
# properties for use in debugging
if self.debug:
self.grads_and_vars
# print to the user that the network has been set up, along with its properties
print("Setting up Neural net with %g x %g filters, k1 = %g, k2 = %g, learningRate = %g"%(filter_row, filter_col, k1, k2, learningRate))
def storeParamsFromData(self):
'''
Calculate and store parameters from the raw data
total_features: The number of CQT coefficients total (incldues all context frames)
num_training_vec: The number of training examples in your dataset
num_frames: The number of context frames in each training example (total_features / num_freq)
num_classes: The number of songs we're distinguishing between in our output
l: The length of our second convolutional kernel - for now, its equal to num_frames
'''
# Neural-network model set-up
# calculating some values which will be nice as we set up the model
num_training_vec, total_features = self.X_train.shape
num_frames = int(total_features / self.num_freq)
print('-- Num frames: {}'.format(num_frames))
num_classes = int(max(self.y_train.max(), self.y_val.max()) + 1)
l = num_frames
# store what will be helpful later
self.total_features = total_features
self.num_training_vec = num_training_vec
self.num_frames = num_frames
self.num_classes = num_classes
self.l = l
@lazy_property
def y_conv(self):
# reshape the input into the form of a spectrograph
x_image = tf.reshape(self.x, [-1, self.num_freq, self.num_frames, 1])
x_image = tf.identity(x_image, name="x_image")
# first convolutional layer parameters
W_conv1 = init_weight_variable([self.filter_row, self.filter_col, 1, self.k1], "W_conv1")
b_conv1 = init_bias_variable([k1], "b_conv1")
# tensor that computes the output of the first convolutional layer
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_conv1 = tf.identity(h_conv1, name="h_conv_1")
# flatten out the output of the first convolutional layer to pass to the softmax layer
h_conv1_flat = tf.reshape(h_conv1, [-1, (self.num_freq - self.filter_row + 1) * (self.num_frames - self.filter_col + 1) * self.k1])
h_conv1_flat = tf.identity(h_conv1_flat, name="h_conv1_flat")
# softmax layer parameters
W_sm = init_weight_variable([(self.num_freq - self.filter_row + 1) * (self.num_frames - self.filter_col + 1) * self.k1, self.num_classes], "W_sm")
b_sm = init_bias_variable([self.num_classes], "b_sm")
# the output of the layer - un-normalized and without a non-linearity
# since cross_entropy_with_logits takes care of that
y_conv = tf.matmul(h_conv1_flat, W_sm) + b_sm
y_conv = tf.identity(y_conv, name="y_conv")
return y_conv # would want to softmax it to get an actual prediction
@lazy_property
def cross_entropy(self):
'''
Create a tensor that computes the cross entropy cost
Use the placeholder y_ as the labels, with input y_conv
Note that softmax_cross_entropy_with_logits takes care of normalizing
y_conv to make it a probability distribution
This tensor can be accessed using: self.cross_entropy
'''
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y_conv))
cross_entropy = tf.identity(cross_entropy, name="cross_entropy")
return cross_entropy
@lazy_property
def optimizer(self):
'''
Create a tensor that represents the optimizer. This tensor can
be accessed using: self.optimizer
'''
optimizer = tf.train.AdamOptimizer(learning_rate = self.learningRate)
return optimizer
@lazy_property
def train_step(self):
'''
Creates a tensor that represents a single training step. This tensor
can be passed a feed_dict that has x and y_, and it will compute the gradients
and perform a single step.
This tensor can be accessed using: self.train_step
'''
return self.optimizer.minimize(self.cross_entropy)
@lazy_property
def accuracy(self):
'''
Create a tensor that computes the accuracy, using the placeholder y_ as the labeled data
and y_conv for the predictions of the network.
This tensor can be accessed using: self.accuracy
'''
correct_prediction = tf.equal(tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
'''
Properties that we'll use to do debugging print outs
'''
@lazy_property
def grads_and_vars(self):
grads_and_vars = self.optimizer.compute_gradients(self.cross_entropy, tf.trainable_variables())
return grads_and_vars
def train(self, batch_size, num_epochs, debug_out='debug.txt'):
# set how often you want to print out the status of the NN
print_freq = 1
# Starting an interactive session and initializing the parameters
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
# replace it with the one-hot encoded one --- should I replace?
y_trainOH = sess.run(self.y_train_OHEnc)[:, 0, :]
y_valOH = sess.run(self.y_val_OHEnc)[:, 0, :]
# lists to record accuracy at several points during training
train_acc_list = []
val_acc_list = []
train_acc_on_batch_list = []
# lists to record the error at several points during training
train_err_list = []
val_err_list = []
train_err_on_batch_list = []
# track which epochs you record data during
epoch_numbers = []
# record the start time
t_start = time.time()
for epoch in range(num_epochs):
epochStart = time.time()
# train by systematically pulling batches of batch_size from
# the training set and taking a training step on each batch
for i in range(0, self.num_training_vec, batch_size):
batch_end_point = min(i + batch_size, self.num_training_vec)
train_batch_data = self.X_train[i : batch_end_point]
train_batch_label = y_trainOH[i : batch_end_point]
self.train_step.run(feed_dict={self.x: train_batch_data, self.y_: train_batch_label})
epochEnd = time.time()
# print and record data now that we've trained on our full training set
if (epoch + 1) % print_freq == 0:
# timing for the measurements of cost and accuracy
evaluationStart = time.time()
# compute training (on the most recent batch and the full data set)
# and validation cost and accuracy, then print them and add them to the list
# we start with accuracy:
train_acc = self.accuracy.eval(feed_dict={self.x:self.X_train, self.y_: y_trainOH})
train_acc_list.append(train_acc)
val_acc = self.accuracy.eval(feed_dict={self.x: self.X_val, self.y_: y_valOH})
val_acc_list.append(val_acc)
train_acc_on_batch = self.accuracy.eval(feed_dict={self.x:train_batch_data, self.y_:train_batch_label})
train_acc_on_batch_list.append(train_acc_on_batch)
# Now we compute the error on each set:
train_err = self.cross_entropy.eval(feed_dict={self.x: self.X_train, self.y_: y_trainOH})
train_err_list.append(train_err)
val_err = self.cross_entropy.eval(feed_dict={self.x: self.X_val, self.y_: y_valOH})
val_err_list.append(val_err)
train_err_on_batch = self.cross_entropy.eval(feed_dict={self.x:train_batch_data, self.y_:train_batch_label})
train_err_on_batch_list.append(train_err_on_batch)
# keep track of which epochs we have data for
epoch_numbers += [epoch]
# this marks the end of our evaluation
evaluationEnd = time.time()
# print a summary of our NN at this epoch
print("epoch: %d, time (train, evaluation): (%g, %g), t acc, v acc, t cost, v cost: %.5f, %.5f, %g, %g"%(epoch+1, epochEnd - epochStart, evaluationEnd - evaluationStart, train_acc, val_acc, train_err, val_err))
# debugging print outs
if self.debug:
# print out step / current value ratio for each parameter in our network
# based on training data from the most recent batch
# to the file with name debug_out
self.debug_WriteGradAndVar(train_batch_data, train_batch_label, epoch, debug_out)
# record the total time spent training the neural network
t_end = time.time()
print('--Time elapsed for training for %g epochs: %g'%(num_epochs, t_end - t_start))
# return the lists of logged data
return [train_acc_list, val_acc_list, train_err_list, val_err_list, train_acc_on_batch_list, train_err_on_batch_list, epoch_numbers]
def debug_WriteGradAndVar(self, xDebug, yDebug, epoch, debug_out):
file_object = open(debug_out, 'a+')
# record which epoch this is
file_object.write("Epoch: %d\n"%(epoch))
# find the current learning rate - this will be used with the gradient to find the step size
curLearningRate = self.optimizer._lr
# print each gradient and the variables they are associated with
# the gradients are stored in tuples, where the first element is a tensor
# that computes the gradient, and the second is the parameter that gradient
# is associated with
for gv in self.grads_and_vars:
curGrads = gv[0].eval(feed_dict={self.x: xDebug, self.y_: yDebug})
curSteps = curGrads * curLearningRate # scale down the graident by the learning rate
curVars = gv[1].eval()
# How much, compared to the magnitude of the weight, are we stepping
stepToVarRatio = np.absolute(np.divide(curSteps, curVars))
# print the name of the variable, then all the step ratios (step amount / current value)
# these values will have been averaged across the training examples
curName = gv[1].name
file_object.write("Variable: " + curName + "\n")
for index, step in np.ndenumerate(stepToVarRatio):
file_object.write(str(index) + ": " + str(step) + "\n")
# print summary statistics for this layer
maxVal = np.amax(stepToVarRatio)
thirdQuartile = np.percentile(stepToVarRatio, 75)
mean = np.mean(stepToVarRatio)
median = np.median(stepToVarRatio)
firstQuartile = np.percentile(stepToVarRatio, 25)
minVal = np.amin(stepToVarRatio)
file_object.write("Statistics: (%g, %g, %g, %g, %g, %g)\n"%(minVal, firstQuartile, median, mean, thirdQuartile, maxVal))
file_object.write("---------------------------------------\n")
# close the file
file_object.close()
@staticmethod
def loadData(filepath):
'''
Load and return four variables from the file with path filepath
X_train: input data for training
y_train: labels for X_train
X_val: input data for validation
y_val: labels for X_val
'''
print('==> Experiment 2l')
print('==> Loading data from {}'.format(filepath))
# benchmark
t_start = time.time()
# reading data
f = h5py.File(filepath)
X_train = np.array(f.get('trainingFeatures'))
y_train = np.array(f.get('trainingLabels'))
X_val = np.array(f.get('validationFeatures'))
y_val = np.array(f.get('validationLabels'))
t_end = time.time()
print('--Time elapsed for loading data: {t:.2f} \
seconds'.format(t = t_end - t_start))
del f
print('-- Number of training samples: {}'.format(X_train.shape[0]))
print('-- Number of validation samples: {}'.format(X_val.shape[0]))
print('Shape of X_train: %s'%str(X_train.shape))
print('Shape of y_train: %s'%str(y_train.shape))
print('Shape of X_val: %s'%str(X_val.shape))
print('Shape of y_val: %s'%str(y_val.shape))
return [X_train, y_train, X_val, y_val]
'''
Our main, example
'''
# read in command line parameters - filterRow, filterCol, k1, k2, learningRate, then the filename for the plot
try:
filterRow = int(sys.argv[1])
filterCol = int(sys.argv[2])
k1 = int(sys.argv[3])
k2 = int(sys.argv[4])
learningRate = float(sys.argv[5])
plotFile = str(sys.argv[6])
hardwareType = str(sys.argv[7]) # string that tells us what hardware it's running on - k80, p100, or CPU
except Exception, e:
print('-- {}'.format(e))
batchSize = 1000
numEpochs = 30
poolingStrategy = 'MAX'
filepath = '/pylon2/ci560sp/cstrong/exp2/taylorswift_smallDataset_71_7.mat'
m = Model(121, filepath, filterRow, filterCol, k1, k2, learningRate, poolingStrategy, True) # set it to debug
m.train(5000, 10)