# Attain required imports

In [1]:
import numpy as np 
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from keras.utils import to_categorical, plot_model #imports training engine/ ResNet class
from keras import backend as K
from time import time 
from models import ResNet
from collections import Counter
import matplotlib.pyplot as plt
import os
%matplotlib inline
from sklearn.metrics import classification_report, confusion_matrix #reports losses,scores and utils functions to measure classification report

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Load array data from npy files

In [2]:
X_train = np.load('X_train.npy')
Y_train = np.load('Y_train.npy')
X_val = np.load('X_val.npy')
Y_val = np.load('Y_val.npy') #training/ validation data

# Establish word classes

In [3]:
classes = ['yes', 'no', 
           'up', 'down', 
           'left', 'right', 
           'on', 'off', 
           'stop', 'go', 
           'silence', 'unknown']

all_classes = [x for x in classes[:11]]
for ind, cl in enumerate(os.listdir('train/audio/')):
    if cl not in classes:
        all_classes.append(cl)
print(all_classes) #words transcribed from audio recordings

['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'silence', 'dog', '.ipynb_checkpoints', 'bird', 'sheila', 'eight', 'five', 'cat', 'zero', 'six', 'house', 'tree', 'bed', 'wow', 'four', 'three', 'nine', 'two', 'seven', 'marvin', 'one', 'happy']


# Training data is unbalanced with larger unknown class. Weights will give higher penalties to misclassifications.

In [4]:
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}  

class_weights = get_class_weights(Y_train) #balances training data because of high amounts of "unknown" class

In [5]:
class_weights

{10: 101.98891966759003,
 11: 1.0,
 3: 17.574224343675418,
 6: 17.449289099526066,
 5: 17.441023211747986,
 1: 17.490736342042755,
 8: 17.253045923149017,
 2: 17.4080378250591,
 9: 17.432765151515152,
 7: 17.52403617325083,
 0: 17.399810964083176,
 4: 17.482431149097817}

# Declare input size and batch size

In [6]:
input_size = X_train.shape[1:]
batch_size = 196

# Declare output size and filter list

In [7]:
filters_list = [8,16,32]
output_size = 12

In [8]:
#adjust these strings for organizeing the saved files
date = '1003'
arch = 'resnet8_16_32'

# Build the model

In [9]:
#build the model
sr = ResNet(filters_list, input_size, output_size)
sr.build()
sr.m.compile(loss='categorical_crossentropy', 
             optimizer='adadelta', 
             metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


# Save png of model 

In [10]:
# to save a png of the model you need pydot and graphviz installed
plot_model(sr.m, 
           to_file = 'models/{}_{}.png'.format(arch,date), 
           show_shapes = True)

OSError: `pydot` failed to call GraphViz.Please install GraphViz (https://www.graphviz.org/) and ensure that its executables are in the $PATH.

# Establish where to save model checkpoints and logs

In [None]:
#callbacks, remember to make folders to store files 
checkpointer = ModelCheckpoint(filepath='models/{}_{}_best.h5'.format(arch, date),
                               verbose=0,
                               save_best_only=True)
   
earlystopping = EarlyStopping()

tensorboard = TensorBoard(log_dir = 'logs/{}_{}'.format(date, time()), 
                          histogram_freq = 0, 
                          write_graph = True, 
                          write_images = True)

# Store model history 

In [None]:
history = sr.m.fit(X_train, #train the models history object stores training data for later access
                   to_categorical(Y_train), 
                   batch_size = batch_size, 
                   epochs = 5, 
                   verbose = 1, shuffle = True, 
                   class_weight = class_weights,
                   validation_data = (X_val, to_categorical(Y_val)), 
                   callbacks = [checkpointer]) # add more callbacks if you want

# Save weights

In [None]:
sr.m.save_weights("models/{}_{}_last.h5".format(arch, date)) 

# Visualize training 

In [None]:
#%% visualize training
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('graphs/{}_{}_acc.png'.format(arch, date),bbox_inches='tight')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('graphs/{}_{}_loss.png'.format(arch, date), bbox_inches='tight')
plt.show() #plots training graphs then saves them

In [None]:
val_pred = sr.m.predict(X_val, batch_size = batch_size, verbose = 1) #predicts the validation classification and score

In [None]:
print(classification_report(Y_val, np.argmax(val_pred, axis = 1), target_names = classes, digits = 3))

In [None]:
print(confusion_matrix(Y_val, np.argmax(val_pred, axis = 1)))

# Build Connectionist Temporal Classification (CTC) model

In [None]:
#CTC Model or Connectionist Temporal Classification speech to text

from models import CTC, ctc_lambda_func #used in the CTC build method
from ctc_utils import char_map, index_map, text_to_int, get_intseq, get_ctc_params

In [None]:
# dummy loss
def ctc(y_true, y_pred):
    return y_pred

In [None]:
sr_ctc = CTC((122,85), 28)
sr_ctc.build()

In [None]:
sr_ctc.m.compile(loss = ctc, optimizer = 'adam', metrics = ['accuracy'])
sr_ctc.tm.compile(loss = ctc, optimizer = 'adam')

# Load Y train files

In [None]:
Y_train_all = np.load('Y_train_all.npy')
Y_val_all = np.load('Y_val_all.npy')

In [None]:
labels, input_length, label_length = get_ctc_params(Y = Y_train_all, classes_list = all_classes)
labels_val, input_length_val, label_length_val = get_ctc_params(Y = Y_val_all, classes_list = all_classes)

# Save checkpoints

In [None]:
checkpointer = ModelCheckpoint(filepath="models/ctc_{}_best.h5".format(date),
                               verbose=0,
                               save_best_only=True)

# Train the model and save history

In [None]:
#training sequence
history = sr_ctc.m.fit([np.squeeze(X_train), 
                            labels, 
                            input_length, 
                            label_length], 
                       np.zeros([len(Y_train_all)]), 
                       batch_size = 128, 
                       epochs = 10, 
                       validation_data = ([np.squeeze(X_val), 
                                           labels_val, 
                                           input_length_val, 
                                           label_length_val],
                                          np.zeros([len(Y_val_all)])), 
                       callbacks = [checkpointer], 
                       verbose = 1, shuffle = True)

In [None]:

sr_ctc.m.save_weights('models/ctc_{}.h5'.format(date))
sr_ctc.tm.load_weights('models/ctc_{}_best.h5'.format(date)) #save and load weights

# Plot results to test model for accuracy

In [None]:
# plots accuracy and loss
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('graphs/ctc_{}_acc.png'.format(date),bbox_inches='tight')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('graphs/ctc_{}_loss.png'.format(date), bbox_inches='tight')
plt.show()

# Decode the output of predictions into strings

In [None]:

def str_out(dataset = X_val):
    k_ctc_out = K.ctc_decode(sr_ctc.tm.predict(np.squeeze(dataset), 
                                                verbose = 1), 
                             np.array([28 for _ in dataset]))
    decoded_out = K.eval(k_ctc_out[0][0])
    str_decoded_out = []
    for i, _ in enumerate(decoded_out):     
        str_decoded_out.append("".join([index_map[c] for c in decoded_out[i] if not c == -1]))
        
    return str_decoded_out # decodes the output of predictions into strings

In [None]:
y_pred_val = str_out()

# Show comparison between predicted values and real values

In [None]:
print('PREDICTED: \t REAL:')
for i in range(10):
    print(y_pred_val[i], '\t\t',all_classes[Y_val_all[i]]) #what the predicted words were and what the computer deciphered

# Analyze Precision

In [None]:
print(classification_report([all_classes[Y_val_all[i]] for i, _ in enumerate(Y_val_all)], 
                            y_pred_val, labels = all_classes)) #depicts classification report

# Generate figure which analyzes results/accuracy

In [None]:

plt.figure(figsize = (8,8))
plt.imshow(confusion_matrix([all_classes[Y_val_all[i]] for i, _ in enumerate(Y_val_all)], 
                            y_pred_val, labels = all_classes))
plt.xticks(np.arange(0, len(all_classes)), all_classes, rotation = 'vertical', size = 12)
plt.yticks(np.arange(0, len(all_classes)), all_classes, size = 12)
plt.show()