In [1]:
import os
import sys

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [2]:
import tensorflow as tf 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import utils as U
#import opts
import subprocess


In [3]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"    

## Configuration of the architecture 

--> to add in an external file 

In [4]:
#path to specify
#bashCommand = "python esc_gen.py . "
#process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
#output, error = process.communicate()

In [5]:
class Config:
    def __init__(self):
        """Initialize hyperparameters
        """          
        # General settings
        self.data='.'
        #self.data='/home/dl5/team10/Perla/data/ESC-50-master/audio_resampled'
        self.save=None #'Directory to save the results'
    
        # Dataset details
        self.dataset = 'esc50'
        self.nClasses = 50 #classes are encoding from 0 to 49
        self.nFolds = 5
        self.splits = range(1, self.nFolds + 1)
        self.nCrops=10
    
        # Model details
        
        #Define hyperparameters
        self.netType = 'envnet'
        self.fs = 16000
        self.inputLength = 24014
        self.nEpochs =600
        self.LR=0.005
        self.momentum=0.9
        self.weightDecay=5e-4
        self.momentum=0.9
        self.batchSize=64
        self.optimizer='nesterov'
        
        
        self.schedule=[0.5, 0.75]
        self.warmup=0
    
#         if self.save != 'None' and not os.path.isdir(self.save):
#             os.makedirs(self.save)
    
        self.display_info()
    
    def display_info(opt):
        print('+------------------------------+')
        print('| Sound classification')
        print('+------------------------------+')
        print('| dataset  : {}'.format(opt.dataset))
        print('| netType  : {}'.format(opt.netType))
        print('| nEpochs  : {}'.format(opt.nEpochs))
        print('| LRInit   : {}'.format(opt.LR))
        print('| schedule : {}'.format(opt.schedule))
        print('| warmup   : {}'.format(opt.warmup))
        print('| batchSize: {}'.format(opt.batchSize))
        print('| optimize: {}'.format(opt.optimizer))
        print('+------------------------------+')

In [6]:
param = Config()

+------------------------------+
| Sound classification
+------------------------------+
| dataset  : esc50
| netType  : envnet
| nEpochs  : 600
| LRInit   : 0.005
| schedule : [0.5, 0.75]
| warmup   : 0
| batchSize: 64
| optimize: nesterov
+------------------------------+


## Reading and Pre-processing the inputs

Pre-process functions

In [7]:
def one_hot_encoding(labels,param):
    b = np.zeros((len(labels), param.nClasses))
    b[np.arange(len(labels)), labels] = 1
    return b

In [8]:
def random_crop(sounds, size=param.inputLength):
    cropped_sounds=[]
    for s in sounds:
        org_size = len(s)
        start = random.randint(0, org_size - size)
        cropped_sounds.append(s[start: start + size])
    return cropped_sounds

In [9]:
def padding(sounds, pad=param.inputLength // 2):
    return [np.pad(s, pad, 'constant') for s in sounds]

In [None]:
def normalize(sounds, factor=32768.0):
    return [s/factor for s in sounds]

In [None]:
# For testing phase
def multi_crop(sounds,input_length=param.inputLength, n_crops=param.nCrops):
    multi_cropped_sounds=[]
    for s in sounds:
        stride = (len(s) - input_length) // (n_crops - 1)
        multi_cropped_sound = [s[stride * i: stride * i + input_length] for i in range(n_crops)]
        multi_cropped_sounds.append(np.array(multi_cropped_sound))
    return multi_cropped_sounds

TO DO : implement a function of pre-processing (prototype in ESC_vs0)

In [None]:
def setup(param, split):
    dataset = np.load(os.path.join(param.data,'wav16.npz'))
    # Split to train and val
    train_sounds = []
    train_labels = []
    val_sounds = []
    val_labels = []
    for i in range(1, param.nFolds + 1):
        sounds = dataset['fold{}'.format(i)].item()['sounds']
        labels = dataset['fold{}'.format(i)].item()['labels']
        if i == split:
            #val_sounds.extend(preprocess(param,sounds,False))
            val_sounds.extend(sounds)
            val_labels.extend(labels)
        else:
            #train_sounds.extend(preprocess(param,sounds,True))
            train_sounds.extend(sounds)
            train_labels.extend(labels)
            
    train_sounds=normalize(random_crop(padding(train_sounds)))        
    train_labels= one_hot_encoding(train_labels,param)
    
    val_sounds=normalize(random_crop(padding(val_sounds)))
    val_labels= one_hot_encoding(val_labels,param)
    
    return train_sounds,train_labels, val_sounds,val_labels

Test the function with only one split

In [None]:
train_sounds,train_labels,val_sounds,val_labels=setup(param,1)


In [None]:
train_sounds=np.asarray(train_sounds,dtype=np.float32)[:,None,:,None]
train_labels=np.asarray(train_labels)

Doesn't work

In [None]:
# Iterator setup
#train_iter = tf.data.Dataset.from_tensor_slices((np.asarray(train_sounds),np.asarray(train_labels))).repeat().batch(param.batchSize)

#train_iter = tf.data.Dataset.from_tensor_slices((np.asarray(train_sounds,dtype=np.float32)[:,None,:,None],np.asarray(train_labels))).shuffle(True).repeat().batch(param.batchSize)
#val_iter = tf.data.Dataset.from_tensor_slices((np.asarray(val_sounds,dtype=np.float32)[:,None,:,None],np.asarray(val_labels))).shuffle(False).batch(param.batchSize// param.nCrops)

## Definition of EnvNet

TO DO : create a class class EnvNet()

In [None]:
# Define Inputs
def add_placeholders():
    """Define placeholders = entries to computational graph"""
    # shape = (batch size, max length of sentence in batch)
    #self.word_ids = tf.placeholder(tf.int32, shape=[None, None],name="word_ids")
    X = tf.placeholder(tf.float32,[None, 1,param.inputLength,1],name="input_X")
    Y = tf.placeholder(tf.int32, [None, param.nClasses], name="input_Y")
    
    # hyperparameters
    #dropout = tf.placeholder(dtype=tf.float32, shape=[],
    #               name="dropout")
    #lr = tf.placeholder(dtype=tf.float32, shape=[],
    #                name="lr")
    #return dropout, lr
    return X,Y

def conv_bn_relu(inputTensor,out_channels,ksize,pad,initialW,bias,name,stride=(1,1)):
    with tf.variable_scope(name):
        conv = tf.layers.conv2d(inputs=inputTensor,filters=out_channels, kernel_size=ksize,padding=pad,strides=stride,kernel_initializer=initialW,use_bias=bias) #name to configure
        bn = tf.layers.batch_normalization(conv)
        relu=tf.nn.relu(bn)
    return  relu

def net(x,n_classes):
    conv1=conv_bn_relu(inputTensor=x, out_channels=40, ksize=[1,8],pad='valid',initialW=tf.initializers.truncated_normal,bias=False,name='conv1')
    conv2=conv_bn_relu(inputTensor=conv1, out_channels=40, ksize=[1,8],pad='valid',initialW=tf.initializers.truncated_normal,bias=False,name='conv2')
    pool2=tf.layers.max_pooling2d(conv2,pool_size=[1,160],strides=(1,160),padding='valid',name='pool2')
    #by using data_format channels last : we have : batch*N_h*N_w*N_c :(batch*1*150*40)
    x_perm=tf.transpose(pool2, perm=[0,3,2,1])
    conv3=conv_bn_relu(inputTensor=x_perm, out_channels=50, ksize=[8,13],pad='valid',initialW=tf.initializers.truncated_normal,bias=False,name='conv3')
    pool3=tf.layers.max_pooling2d(conv3,pool_size=[3,3],strides=(3,3),padding='valid',name='pool3')
    conv4=conv_bn_relu(inputTensor=pool3, out_channels=50, ksize=[1,5],pad='valid',initialW=tf.initializers.truncated_normal,bias=False,name='conv4')
    pool4=tf.layers.max_pooling2d(conv3,pool_size=[1,3],strides=(1,3),padding='valid',name='pool4')

    #besoin éventuel de vectoriser
    #flatten_pool4_out = tf.contrib.layers.flatten(pool4)
    sha=pool4.get_shape().as_list()
    flatten_pool4_out=tf.reshape(pool4,[-1,np.prod(sha[1:])])
    
    fc5=tf.layers.dense(flatten_pool4_out,4096,name='fc5')
    #fc5=tf.layers.dense(pool4,4096,name='fc5')
    fc5= tf.nn.dropout(tf.nn.relu(fc5),keep_prob=0.5)
    fc6=tf.layers.dense(fc5, 4096,name='fc6')
    fc6= tf.nn.dropout(tf.nn.relu(fc6),keep_prob=0.5)
    fc7=tf.layers.dense(fc6, n_classes,name='fc7')

    return fc7

def add_loss_op(logits,labels):
    losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
    loss = tf.reduce_mean(losses)
    return loss

def add_train_op(lr_method, lr,momentum, loss):
    """Defines self.train_op that performs an update on a batch

    Args:
        lr_method: (string) sgd method, for example "adam"
        lr: (tf.placeholder) tf.float32, learning rate
        loss: (tensor) tf.float32 loss to minimize
        clip: (python float) clipping of gradient. If < 0, no clipping

    """
    _lr_m = lr_method.lower() # lower to make sure

    with tf.variable_scope("train_step"):
        if _lr_m == 'adam': # sgd method
            optimizer = tf.train.AdamOptimizer(lr)
        elif _lr_m == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(lr)
        elif _lr_m == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(lr)
        elif _lr_m == 'rmsprop':
            optimizer = tf.train.RMSPropOptimizer(lr)
        elif _lr_m == 'nesterov':
            optimizer = tf.train.MomentumOptimizer(lr,momentum=momentum,use_nesterov=True)
        else:
            raise NotImplementedError("Unknown method {}".format(_lr_m))

        train_op = optimizer.minimize(loss)
        #optimizer.add_hook(chainer.optimizer.WeightDecay(opt.weightDecay))
        #update hook function for regularization called right after the gradient computation
        return train_op


In [None]:
def build(param):
    x,y=add_placeholders()   
    logits=net(x,param.nClasses)
    loss=add_loss_op(logits,y)
    train_op=add_train_op(param.optimizer,param.LR,param.momentum,loss)
    #prediction = tf.nn.softmax(logits, name="prediction")
    #correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
    #accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy")
    return x,y,loss, train_op
    #, prediction,accuracy

In [None]:
#tf.reset_default_graph()
#iter = train_iter.make_one_shot_iterator()
#x, y = iter.get_next()
#loss, train_op = build(x,y,param)

In [None]:
#with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
#    sess.run(tf.global_variables_initializer())
#    for i in range(param.nEpochs):
#        _, loss_value = sess.run([train_op, loss])
#        print("Iter: {}, Loss: {:.4f}".format(i, loss_value))

In [None]:
tf.reset_default_graph()
x,y,loss, train_op=build(param)
init =tf.global_variables_initializer()
#the model
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.device("/device:GPU:0"):
        sess.run(init)
        #x,y,loss, train_op, prediction,accuracy=build(param)*        
        #Run epoch
        for epoch in range(1, param.nEpochs + 1):
            n_batches = (param.fs // param.batchSize) + 1 
            for i in range(n_batches):
                if i==n_batches-1 :
                    if param.fs-1>i*param.batchSize-1:
                        batch_x=train_sounds[i*param.batchSize:1]
                        batch_y=train_labels[i*param.batchSize:]
                    else :
                        continue
                else:
                    batch_x=train_sounds[i*param.batchSize:(i+1)*param.batchSize-1]
                    batch_y=train_labels[i*param.batchSize:(i+1)*param.batchSize-1,]

                _, train_loss = sess.run((train_op, loss),feed_dict={x: batch_x, y: batch_y})
                #val_top1 = trainer.val() 

            if epoch % 10 == 0:

                print('| Epoch: {}/{} | Train: LR {}  Loss {:.3f}\n'.format(
                        epoch, param.nEpochs, param.LR, train_loss))

| Epoch: 10/600 | Train: LR 0.005  Loss nan

| Epoch: 20/600 | Train: LR 0.005  Loss nan

| Epoch: 30/600 | Train: LR 0.005  Loss nan



TO DO : Execute K-fold cross-validation

In [None]:
train_sounds.shape

In [None]:
train_sounds[0*param.batchSize:1,]

In [None]:
for split in opt.splits:
        print('+-- Split {} --+'.format(split))
#         train(opt, split)