In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, Reshape, Conv2D, MaxPooling2D, Flatten
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from sklearn.metrics import confusion_matrix
import random
import timeit
from sklearn.utils import class_weight
start = timeit.default_timer()


#Constant
EMOTION_ANNOTATORS = {'anger': 0, 'happiness' : 1, 'sadness' : 2, 'neutral' : 3, 'frustration' : 4, 'excited': 5,
           'fear' : 6,'surprise' : 7,'disgust' : 8, 'other' : 9}

EMOTION = {'ang': 0, 'hap' : 1, 'sad' : 2, 'neu' : 3, 'fru' : 4, 'exc': 5,
           'fea' : 6,'sur' : 7,'dis' : 8, 'oth' : 9, 'xxx':10}

METHOD = {'audio_feature':0, 'LSTM':1}

#Method for classification
method = METHOD['LSTM']

#If data is processed and saved into files, just reload, dont need to re-process
isRawDataProcessed = True

#Development mode. Only run with small data.
dev = False








dev = True
size_batch = 10
window_size = 25
num_class = 6
if method == METHOD['LSTM']: 
    ##Loading  data from files
    filehandlerInput = open('processed-data/input.obj', 'rb')
    filehandlerOutput = open('processed-data/output.obj', 'rb')
    input = pickle.load(filehandlerInput)
    output = pickle.load(filehandlerOutput)
    
    #if in development mode, just use the small data!
    if (dev):
        data_size = 4000
    else:
        data_size = len(input)
    
    print("\nThe number of samples ", data_size)
    
    # Input : (size of samples, ?, f_range = 1025)
    input = [list(map(list, zip(*i))) for i in input[0:data_size]]
    

    #Normalize input
    max_val = -1
    for i in input:
        b = [max(x) for x in i]
        c = max(b)
        if c >  max_val:
            max_val = c

    print("\nThis is max_value of input: ", max_val)
    
    #Normalize
    for i in range(0, len(input)):
        for j in range(0, len(input[i])):
            input[i][j] = [x / max_val for x in input[i][j]]
            
    
    print("\nFinished normalize input.")

    #Widen the width of the data with the window_size
    for j in range(0,len(input)):
        t = []
        for i in range(0, len(input[j]), window_size):
            flat_list = [item for sublist in input[j][i:i+window_size] for item in sublist]
            t.append(flat_list)
        input[j] = t
        input[j] = t[0:len(t) - 1]
        
    print("\nFinished widen window_size!")
    
    
    output = output[0:data_size]
    
    # Get original quantiry of each label
    y = np.bincount(output)
    ii = np.nonzero(y)[0]
    a = list(zip(ii,y[ii]))
    #Get quantity of the largest sample.
    max_sample = max(a, key=lambda x:x[1])[1]
    #print("max_sample: ",max_sample)
    
    print("EMOTION_ANNOTATE: ", EMOTION_ANNOTATORS)
    print("\nThe quantity of each label: ", a, "\n")
    
    #Get the class_weight of each class
    
    class_weight = class_weight.compute_class_weight('balanced'
                                               ,np.unique(output)
                                               ,output)
    
    #Remove labels that have small quantity.
    indices = [] 
    for i in range(0, len(output)):
        if output[i] >= 6:
            indices.append(i)
    input = np.delete(input, indices, axis = 0)
    output = np.delete(output, indices)
    print("\nRemoved samples that have the tiny quantity!")

    #shuffle data
    c = list(zip(input, output))
    random.shuffle(c)
    input, output = zip(*c)
    
    print("\nFinished shuffling all data!")
    
    #Group the same values into each bucket
    c = list(zip(input, output))
    values = list(set(map(lambda x:x[1], c)))
    input_groups = [[y[0] for y in c if y[1]==x] for x in values]
  

    # Split train, test
    trainlen = int(0.8 * data_size)
    train_in = []
    train_out = []
    test_in = [] 
    test_out = []
   
    for i in range(0, len(values)):
        out_label = np.zeros((1,num_class))[0]
        out_label[values[i]] = 1

        len_group = len(input_groups[i])
        ratio_sample_int = int(max_sample / len_group)
        ratio_sample_remain = max_sample / len_group - ratio_sample_int
        
        new_train_in = input_groups[i][0:int(0.8 * len_group)]
        
        new_train_in_up_sampling = list(np.repeat(new_train_in, ratio_sample_int, axis = 0))
        number_addition_sample = int(ratio_sample_remain * len(new_train_in)) - len(new_train_in)
        new_train_in_up_sampling = new_train_in_up_sampling + new_train_in[0:number_addition_sample]
       
        train_in  = train_in + new_train_in_up_sampling
        
        print("Up sampling sample ", i, "in training set. ", "The new length is: ", len(new_train_in_up_sampling))
        new_test_in = input_groups[i][int(0.8 * len_group) : len_group]
        test_in = test_in + new_test_in
        train_out = train_out + [out_label] * len(new_train_in_up_sampling)
        test_out = test_out + [out_label] * len(new_test_in)

    print("\nFinished upsampling data!")
        
    # to numpy array    
    train_out = np.array(train_out)
    test_out = np.array(test_out)


    #shuffle data
    c = list(zip(train_in, train_out))
    random.shuffle(c)
    train_in, train_out = zip(*c)
    
    
    print("\nFinished shuffling training set")
    
    
    data_dim = 1025 * window_size
    timesteps = None
    

    # expected input data shape: (batch_size, timesteps, data_dim)
    model = Sequential()
    model.add(LSTM(144, return_sequences=True, input_shape=(None, data_dim)))  # returns a sequence of vectors of dimension 32
    model.add(LSTM(144, return_sequences=True))  # returns a sequence of vectors of dimension 32
    model.add(LSTM(144))  # return a single vector of dimension 32
#     model.add(Reshape((12,12,1), input_shape=(144, )))
#     model.add(Conv2D(32, kernel_size=(3, 3), strides=(1, 1),
#                  activation='relu',
#                  input_shape=(1,12,12,1)))
   
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Conv2D(64, (3, 3), activation='relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Flatten())
    
    model.add(Dense(6))#fully connected
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    def accuracy(model, input, output):
        sumTrue = 0
        for x,y in zip(input, output):
            p = model.predict_classes(np.array([x]))
            if (p[0] == np.argmax(y)):
                sumTrue = sumTrue + 1
        
        return sumTrue / len(input)
            
    
    try:
        step = 0
        for i in range (0,4):
            for seq, label in zip(train_in, train_out):
                step = step + 1
                weight_sample = class_weight[np.argmax(label)]
                #sample_weight = np.array([weight_sample])

                model.train_on_batch(np.array([seq]), np.array([label]))
                if (step == 1 or step % 500 == 0):

                    print("Trained 500 sample!")
                    print("Score train set: ", accuracy(model, train_in, train_out))
                    print("Score test set; ", accuracy(model, test_in, test_out))
    except Exception as e:
        print("This is error: ", e)

    
    print("\nAfter training........................................................")
    print("Score train set: ", accuracy(model, train_in, train_out))
    print("Score test set; ", accuracy(model, test_in, test_out))
    

    ps = []
    for x, y in zip (test_in, test_out):
        p = model.predict_classes(np.array([x]))
#         print(p)
        ps.append(p)
    r = np.argmax(test_out, axis=1)
    #p = model.predict_classes(test_in)
   
    matrix = confusion_matrix(r, ps)
    print("\nConfusion matrix: \n", matrix)
    sum_colum = np.sum(matrix, axis = 0)
   # print("\nsum_column:", sum_colum)
    sum_row = np.sum(matrix, axis = 1)
    #print("\nsum_row:", sum_row)
    TP = [matrix[i,i] for i in range(0, len(matrix))]
    print("\nTP: ", TP,"\n")   
    FP = [sum_colum[i] - matrix[i,i] for i in range(0, len(matrix))]
    print("FP: ", FP,"\n")
    FN = [sum_row[i] - matrix[i,i] for i in range(0, len(matrix))]
    print("FN: ", FN,"\n")
    Presision = [TP[i] /(TP[i] + FP[i])  for i in range(0, len(matrix))]
    Recall = [TP[i] /(TP[i] + FN[i])  for i in range(0, len(matrix))]
    F1_score = [2 * Presision[i] * Recall[i] /(Presision[i] + Recall[i])  for i in range(0, len(matrix))]
    
  
    
    print("\nPrecision: ", Presision,"\n")
    print("Recall: ", Recall,"\n")
    print("F1_scrore: ", F1_score, "\n")
    
        
    #Your statements here

    stop = timeit.default_timer()

    print ("Time for training and testing: ", stop - start, "(s)") 