<a href="https://colab.research.google.com/github/venetisgr/Vception/blob/master/vception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import os 

base = " " #string containing the location of our data
classes = os.listdir(base) #stores the names/location of each class
no_classes = len(classes)
print(no_classes)
#print(classes)

In [0]:
labels = {} #Here we will store the full path of each sample
IDs = []
i = 0 

for c in classes: #iterates through the available classes 
    c_images = os.listdir(base+c)
    
    for image in c_images:#iterates through the specific class folder
        IDs.append(c+"/"+image)  #'A.J._Buckley/0.npy', 'A.J._Buckley/1.npy'
        labels[c+"/"+image] = i  #{'A.J._Buckley/0.npy': 0, 'A.J._Buckley/1.npy': 0}
    i+=1 

In [0]:
import random #validation split

batch_size = 32

random.seed(7)
random.shuffle(IDs)

split = int(0.85 * len(IDs))

train_ids = IDs[0:split]

valid_ids = IDs[split:]


In [0]:
print(train_ids[1])
print(labels[train_ids[1]])

In [0]:
#We must know in order to break the loop

print(no_classes)
print(len(IDs))
print(len(train_ids))
print(len(valid_ids))

We need to create a generator that will feed our neural network the data in batches. Furthermore, it will be able to normalize our data.

In [0]:
import keras
from keras.utils import to_categorical


class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, IDs, labels, base, batch_size=32, dim=(64,601), n_channels=1,
                 n_classes=1211, shuffle=True, no_samples=None):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.IDs = IDs
        self.labels = labels
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.no_samples=no_samples
        self.on_epoch_end()
        self.base = base

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(self.no_samples / self.batch_size)

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels), dtype=np.float32)
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            temp = np.load(self.base + ID )
            temp = temp.astype(np.float32)
            #mean and std normalization
            mean = np.mean(temp, axis=1)
            std = np.std(temp, axis=1)
            temp = (temp-mean[0])/std[0]
            
            X[i,] = np.expand_dims(temp, axis=2)

            # Store class
            y[i] = self.labels[ID]
        
       
        

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [0]:
# example of one hot encoding
keras.utils.to_categorical(5, num_classes=10)

In [0]:
#We initialize our generators

train_generator = DataGenerator(train_ids, labels, base, batch_size = batch_size, n_classes=no_classes, no_samples=len(train_ids))
valid_generator = DataGenerator(valid_ids, labels, base, batch_size = batch_size, n_classes=no_classes, no_samples=len(valid_ids))

In [0]:
for x , y in train_generator:
    print(x.shape)
    print(y.shape)
    break

In [0]:
print(type(x[0,0,0,0]))

In [0]:
from __future__ import print_function

import keras
from keras.layers import Input, Lambda, Dense, Flatten
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator


import numpy as np


from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Input, Conv3D, PReLU,SeparableConv2D, add,GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import layers

import os
from glob import glob

#Model Architecture  Vception - [3,5,7]

In [0]:
#Model Architecture  Vception - [3,5,7]

channel_axis = -1

inpt = layers.Input(shape=(64, 601, 1)) #  mfcc,samples

#
x = Conv2D(32, (3, 3),strides=(2, 2),use_bias=False)(inpt)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation("relu")(x)

#
x = Conv2D(64, (3, 3),use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation("relu")(x)

##
residual = Conv2D(128, (1, 1),strides=(2, 2),padding='same',use_bias=False)(x)
residual = BatchNormalization(axis=channel_axis)(residual)
#no relu

#
x = SeparableConv2D(128, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu')(x)

x = SeparableConv2D(128, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
#no relu
x = MaxPooling2D((3, 3),strides=(2, 2),padding='same')(x)

#ADD
x = add([x, residual])

##
residual = Conv2D(256, (1, 1),strides=(2, 2),padding='same',use_bias=False)(x)
residual = BatchNormalization(axis=channel_axis)(residual)
#no relu

#
x = Activation('relu')(x)

x = SeparableConv2D(256, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu')(x)

x = SeparableConv2D(256, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
#no relu
x = MaxPooling2D((3, 3),strides=(2, 2),padding='same')(x)

#ADD
x = add([x, residual])

##
residual = Conv2D(728, (1, 1),strides=(2, 2),padding='same',use_bias=False)(x)
residual = BatchNormalization(axis=channel_axis)(residual)
#no relu

#
x = Activation('relu')(x)

x = SeparableConv2D(728, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu')(x)

x = SeparableConv2D(728, (3, 3), padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
#no relu
x = MaxPooling2D((3, 3),strides=(2, 2),padding='same')(x)

#ADD
x = add([x, residual])

##############################################################################

for i in range(8): #number of main block multiples
    
    residual = x
    
    x = Activation('relu')(x)
    
    
    #x1
    x1 = SeparableConv2D(728, (3, 3), padding='same',use_bias=False)(x)
    x1 = BatchNormalization(axis=channel_axis)(x1)
    x1 = Activation('relu')(x1)

    x1 = SeparableConv2D(728, (3, 3), padding='same',use_bias=False)(x1)
    x1 = BatchNormalization(axis=channel_axis)(x1)
    x1= Activation('relu')(x1)
    
    x1 = SeparableConv2D(728, (3, 3), padding='same',use_bias=False)(x1)
    x1 = BatchNormalization(axis=channel_axis)(x1)
    #no relu
    #no maxpool

    #x2
    x2 = SeparableConv2D(728, (5, 5), padding='same',use_bias=False)(x)
    x2 = BatchNormalization(axis=channel_axis)(x2)
    x2 = Activation('relu')(x2)

    x2 = SeparableConv2D(728, (5, 5), padding='same',use_bias=False)(x2)
    x2 = BatchNormalization(axis=channel_axis)(x2)
    x2= Activation('relu')(x2)
    
    x2 = SeparableConv2D(728, (5, 5), padding='same',use_bias=False)(x2)
    x2 = BatchNormalization(axis=channel_axis)(x2)
    #no relu
    #no maxpool
    
    #x3
    x3 = SeparableConv2D(728, (7, 7), padding='same',use_bias=False)(x)
    x3 = BatchNormalization(axis=channel_axis)(x3)
    x3 = Activation('relu')(x3)

    x3 = SeparableConv2D(728, (7, 7), padding='same',use_bias=False)(x3)
    x3 = BatchNormalization(axis=channel_axis)(x3)
    x3= Activation('relu')(x3)
    
    x3 = SeparableConv2D(728, (7, 7), padding='same',use_bias=False)(x3)
    x3 = BatchNormalization(axis=channel_axis)(x3)
    #no relu
    #no maxpool
    
    #ADD
    x = add([x1, x2, x3, residual])
    
###################################################################################################

##
residual = Conv2D(1024, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
residual = BatchNormalization(axis=channel_axis)(residual)

#
x = Activation('relu')(x)
x = SeparableConv2D(728, (3, 3),padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu', name='block13_sepconv2_act')(x)

x = SeparableConv2D(1024, (3, 3),padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)

x = MaxPooling2D((3, 3),strides=(2, 2),padding='same')(x)

#ADD
x = add([x, residual])

#
x = SeparableConv2D(1536, (3, 3),padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu')(x)

x = SeparableConv2D(2048, (3, 3),padding='same',use_bias=False)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('relu')(x)

### 

x = GlobalAveragePooling2D()(x)
x = Dense(1211, activation='softmax')(x) #1211 classes


In [0]:
#Compile the model
model = Model(inputs = inpt, outputs= x ) #we need to define the input and the output of the model

In [0]:
#Model Architecture Visualization

#model.summary()

For our learning rate, we are going to use cyclical learning rate

Full paper about it can be found here: https://arxiv.org/abs/1506.01186


In [0]:
#add cycliclr


from keras.callbacks import *

class CyclicLR(Callback):
    """This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or 
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each 
        cycle iteration.
    For more detail, please see paper.
    
    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    
    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```    
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore 
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where 
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored 
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on 
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    """

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        K.set_value(self.model.optimizer.lr, self.clr())

In [0]:
clr = CyclicLR(base_lr=0.00001, max_lr=0.00005, #we input the parameters for our cyclical learning rate
                        step_size=40000*4., mode='triangular2') #step size is the no iterations until it reaches its peak #5-6*epoch batches

In [0]:
#We initialize our callbacks

checkpoint = ModelCheckpoint("vception_cp.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 10,
                          verbose = 1,
                          restore_best_weights = True)

#reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
#                              factor = 0.2,
#                              patience = 3,
#                              verbose = 1,
#                              min_delta = 0.0001)

# we put our call backs into a callback list
#callbacks = [earlystop, checkpoint, reduce_lr]
callbacks = [earlystop, checkpoint, clr]

In [0]:
#We compile our model #gets overwritten from clr
adam = Adam(lr=0.00001) #default is 0.001

model.compile(loss ='categorical_crossentropy', #needs one hot encoding
             optimizer = adam,
             metrics = ["accuracy"])

#Default accuracy is Top-1

Training takes place in multiple stages, where the base learning rate of clr gets reduced 


In [0]:
#fit  #keras has a special fit function that takes as an input the generators
 
epochs = 50
#batch_size = 32 look on top
    
r = model.fit_generator(train_generator, validation_data = valid_generator, epochs = epochs,
                        steps_per_epoch = len(train_ids)//batch_size, #// rounds the result
                        validation_steps = len(valid_ids)//batch_size,
                        callbacks = callbacks, verbose = 1 
                       )
# r contains the results not the model itself

In [0]:
#Model saving for safety  

model.save("vception_1.h5")
print("Model Saved")

In [0]:
clr = CyclicLR(base_lr=0.000001, max_lr=0.00001,
                        step_size=40000*4., mode='triangular2') #step size is the no iterations until it reaches its peak #5-6*epoch batches

In [0]:
#fit  #keras has a special fit function that takes as an input the generators
 
epochs = 20
#batch_size = 32 look on top
    
r2 = model.fit_generator(train_generator, validation_data = valid_generator, epochs = epochs,
                        steps_per_epoch = len(train_ids)//batch_size, #// rounds the result
                        validation_steps = len(valid_ids)//batch_size,
                        callbacks = callbacks, verbose = 1 
                       )
# r contains the results not the model itself   

In [0]:
#Model saving for safety  

model.save("vception_2.h5")
print("Model Saved")