In [1]:
import os
import re
import glob 
import matplotlib.pyplot as plt
import numpy as np
from os.path import basename
import audiosegment
from multiprocessing import Pool
modulePath = 'ChristiansPythonLibrary/src' 
import sys
import numpy
sys.path.append(modulePath)
import generalUtility
import dspUtil
import matplotlibUtil



#Constant
EMOTION_ANNOTATORS = {'anger': 0, 'happiness' : 1, 'sadness' : 2, 'neutral' : 3, 'frustration' : 4, 'excited': 5,
           'fear' : 6,'surprise' : 7,'disgust' : 8, 'other' : 9}

EMOTION = {'ang': 0, 'hap' : 1, 'sad' : 2, 'neu' : 3, 'fru' : 4, 'exc': 5,
           'fea' : 6,'sur' : 7,'dis' : 8, 'oth' : 9, 'xxx':10}



#Define class
class Input:
    ##spectral, prosody, erergy are dict type
    def __init__(self, spectral, prosody, energy):
        self.spectral = spectral
        self.prosody = prosody
        self.energy = energy
        
    def print(self):
        print("spectral  features: ", spectral)
        print("prosody features: ", prosody)
        print("energy: ", energy)
        
    def input2Vec(self):
        features = []
        s = list(self.spectral.values())
        p = list(self.prosody.values())
        e = list(self.energy.values())
        [features.extend(x) for x in [s, p, e]]
        return features

class Output:
    def __init__(self, duration, code, category_origin, category_evaluation, attribute):
        self.duration = duration
        self.code = code
        self.category_origin = category_origin
        self.category_evaluation = category_evaluation
        self.attribute = attribute
        
     
    def print(self):
        print("duration: ", self.duration)
        print("code: ", self.code)
        print("category_origin: ", self.category_origin)
        print("category_evaluation: ", self.category_evaluation)
        print("attribute: ", self.attribute)
        
    def output2Vec(self):
        emotion = EMOTION[self.category_origin]
        return emotion
    
    
    
#Functions for get features from audio file
def amp2Db(samples):
    dbs = []
    for  x in samples:
        if x < 0:
            v = - dspUtil.rmsToDb(np.abs(x))
        elif x == 0:
            v = 0
        else :
            v = dspUtil.rmsToDb(np.abs(x))
        dbs.append(v)
    return dbs

def getF0Features(file):
    features = {}
    sound = audiosegment.from_file(file)
    voiced = sound.filter_silence(duration_s=0.2)
    frame_rate = sound.frame_rate
    frames = sound.dice(0.032)

    f0s = []
    for f in frames:
        f0 = dspUtil.calculateF0once(amp2Db(f.get_array_of_samples()), frame_rate)
        if(f0 != 0):
            f0s.append(f0)
    
    features['f0_min'] = np.min(f0s)
    features['f0_max'] = np.max(f0s)
    features['f0_range'] = np.max(f0s) - np.min(f0s)
    features['f0_mean'] = np.mean(f0s)
    features['f0_median'] = np.median(f0s)
    features['f0_25th'] = np.percentile(f0s, 25)
    features['f0_75th'] = np.percentile(f0s, 75)
    features['f0_std'] = np.std(f0s)
    
  
    return features

def getEnergyFeatures(file):
    features = {}
    sound = audiosegment.from_file(file)
    voiced = sound.filter_silence(duration_s=0.2)
    samples = voiced.get_array_of_samples()
    frame_rate = sound.frame_rate
    frames = sound.dice(0.032)
    
    e = []
    for f in frames:
        e.append(np.abs(f.max_dBFS))
    
    
    features['energy_min'] = np.min(e)
    features['energy_max'] = np.max(e)
    features['energy_range'] = np.max(e) - np.min(e)
    features['energy_mean'] = np.mean(e)
    features['energy_median'] = np.median(e)
    features['energy_25th'] = np.percentile(e, 25)
    features['energy_75th'] = np.percentile(e, 75)
    features['energy_std'] = np.std(e)   

    return features
    
def audio2Features(file):
    spectral = {}
    prosody = {}
    energy = {}
    try:
        prosody = getF0Features(file)
        energy = getEnergyFeatures(file)
        #To be continued....
    
        return Input(spectral, prosody, energy)
    except Exception as e:
        print(e)
        
        
#Function for getting input vector and corresponding output      
def parallel_task(d0, d1):
    # Each input diectory contains many file
    # This fucntion will walk through all valid 'wav'files in this directory and get features like engergy, frequency...
    def parseInput(dir):
        dicts = {} 
        for f in os.listdir(dir):
            if not f.startswith(".") and os.path.splitext(f)[1] == ".wav":
                dicts[os.path.splitext(f)[0]] = audio2Features(dir + "/" + f)


        return dicts
    
    # Each output file contains label of many diffrent 'wav' file.
    # This function will parse content of text file using 'regrex'. Then turn it into label
    def parseOutput(file):
        dict_namefile_output = {}
        # Open file to get all contents excepts the first line.
        f = open(file, 'r')
        content = ""
        index = 0
        for line in f:
            index = index + 1
            if index == 1:
                continue
            content  = content + line

        # Find all matched patterns in the content
        ps = re.findall(r'\[.*?\)\n\n', content, re.DOTALL)

        # Parse each matched pattern into  'Output' object
        try:
            for p in ps:
                ls = p.split("\n")
                ls = list(filter(lambda x: len(x) > 0 ,ls))

                # Split elements of the first line which looks like : 
                # [147.0300 - 151.7101]	Ses01F_impro02_M012	neu	[2.5000, 2.0000, 2.0000]
                ele_line0 = re.search(r'(\[.*?\])(\s)(.*?)(\s)(.*?)(\s)(\[.*?\])', ls[0]).groups()

                # Split time components which looks like:
                # [147.0300 - 151.7101]
                time_dur = ele_line0[0]
                ele_time_dur = re.findall(r"[-+]?\d*\.\d+|\d+", time_dur)
                ele_time_dur = [float(x) for x in ele_time_dur]

                # Get code and category_origin which looks like:
                # Code: Ses01F_impro02_M012
                # Category_origin: neu
                code = ele_line0[2]
                category_origin = ele_line0[4]

                # Split attribute components which looks like:
                # [2.5000, 2.0000, 2.0000]
                attribute = ele_line0[6]
                ele_attribute = re.findall(r"[-+]?\d*\.\d+|\d+", attribute)
                ele_attribute = [float(x) for x in ele_attribute]

                # Get categorial_evaluation:
                lines_categorical = list(filter(lambda x : x[0] == 'C', ls))
                rex = re.compile(r'C.*?:(\s)(.*?)(\s)\(.*?\)')

                category_evaluation = []
                for l in lines_categorical:
                    elements = rex.search(l).groups()
                    cat = elements[1]
                    cat = cat.split(";")
                    cat = map(lambda x: x.lstrip(), cat)
                    cat = list(filter(lambda x: len(x)>0, cat))
                    category_evaluation.extend(cat)


                # Make list distinct
                category_evaluation = list(set(category_evaluation))

                # Make dict {name_file : parsed_output}
                dict_namefile_output[code] = Output(ele_time_dur, code, category_origin, category_evaluation, ele_attribute)
            return dict_namefile_output
        except Exception as e:
            print(e)


    ### Parse input and output files and get input and output as vector
    dicts_in = parseInput(d0)
    dicts_out = parseOutput(d1)
    in_out = []
    
    keys = list(dicts_in.keys())
    for key in keys:
        if(dicts_out[key].category_origin != 'xxx'):
            in_out.append((dicts_in[key].input2Vec(), dicts_out[key].output2Vec()))
    return in_out
    
    
def createInput_Output():
    ### Get directories of input and output
    DATA_DIR = "IEMOCAP_full_release"
    NUM_SESSION = 5
    input_output = []
    for i in range (1, NUM_SESSION + 1):
        name_session = "Session" + str(i)
        root_dir_of_wav = DATA_DIR + "/" + name_session + "/sentences" + "/wav"
        root_dir_of_labels = DATA_DIR + "/" + name_session + "/dialog" + "/EmoEvaluation"

        for x in os.walk(root_dir_of_wav):
            if(x[0] == root_dir_of_wav):
                dirs_of_wav = x[1]
                index = -1
            else:
                index = index + 1
                input_output.append((x[0], root_dir_of_labels + "/" + dirs_of_wav[index] + ".txt"))
                
    
    ds = input_output
    in_out = []
    input = []
    out = []
    
    # Multi processing
    with Pool(processes=8) as pool:
         in_out = pool.starmap(parallel_task, ds)
   
    r = []
    for e in in_out:
        r = r + e
    
    input = [x[0] for x in r]
    out = [x[1] for x in r]
            
    return (input, out)
 



##Get input, normalize input, get output
input, output = createInput_Output()
input = np.array(input)
input = input / input.max(axis=0)
output = np.array(output)

np.savetxt('input.txt', input, fmt='%f')
np.savetxt('output.txt', output, fmt='%d')

    
    

UnboundLocalError: local variable 'r' referenced before assignment

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sknn.mlp import Classifier, Layer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix


def training(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    
    kf = KFold(n_splits=10, random_state=None, shuffle=False)
    i_fold = 0
    accuracy_train_results = []
    accuracy_valid_results = []

    for train_index, valid_index in kf.split(X_train):
        i_fold = i_fold + 1
        
        x_train_sub, x_valid_sub = X_train[train_index], X_train[valid_index]
        y_train_sub, y_valid_sub = y_train[train_index], y_train[valid_index]
        clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20,), random_state=1)
        clf.fit(x_train_sub, y_train_sub)
        
        score = clf.score(x_train_sub, y_train_sub)
        score1 = clf.score(x_valid_sub, y_valid_sub)
        accuracy_train_results.append(score)
        accuracy_valid_results.append(score1)
        
        print("Score of training set: ", score)
        print("Score of validation set: ", score1)
     
       
    
    avg_accuracy_train_result = np.sum(accuracy_train_results) / len(accuracy_train_results)
    avg_accuracy_valid_result = np.sum(accuracy_valid_results) / len(accuracy_valid_results)
    print("Average accuracy training set, std:", avg_accuracy_train_result, " ",\
          np.std(accuracy_train_results))
    print("Average accuracy validation set, std:", avg_accuracy_valid_result," ", \
          np.std(accuracy_valid_results))     
    
    clf.fit(X_train, y_train)
   

    predicts = clf.predict(X_test)
    pro = clf.predict_proba(X_test)
    print("predicts: ", predicts)
    print("prob: ", pro[0])
    score_test = clf.score(X_test, y_test)
    print("SCore for test set: ", score_test)
    print ("Confusion matrix:..................... ")
    matrix = confusion_matrix(y_test, predicts)
    matrix_ratio = matrix/matrix.sum(1, keepdims=True)
    print(matrix)
    print("##########################")
    print(matrix_ratio)
 
input = np.loadtxt('input.txt', dtype=float)
output = np.loadtxt('output.txt', dtype=int)
training(input, output)





Score of training set:  0.04487179487179487
Score of validation set:  0.06896551724137931
Score of training set:  0.0
Score of validation set:  0.0
Score of training set:  0.0
Score of validation set:  0.0
Score of training set:  0.001282051282051282
Score of validation set:  0.0
Score of training set:  0.001282051282051282
Score of validation set:  0.0
Score of training set:  0.0
Score of validation set:  0.0
Score of training set:  0.0
Score of validation set:  0.0
Score of training set:  0.0012804097311139564
Score of validation set:  0.0
Score of training set:  0.0
Score of validation set:  0.0
Score of training set:  0.0
Score of validation set:  0.0
Average accuracy training set, std: 0.0048716307167011395   0.013345699219885505
Average accuracy validation set, std: 0.006896551724137931   0.02068965517241379
dict_keys(['beta_1', 'tol', 'solver', 'random_state', 'nesterovs_momentum', 'beta_2', 'early_stopping', 'learning_rate_init', 'warm_start', 'alpha', 'batch_size', 'validation

In [27]:
# import numpy as np
# a = np.array([[1.2122,2],[3,4]])
# np.savetxt('test1.txt', a, fmt='%f')
# b = np.loadtxt('test1.txt', dtype=float)
# print(b)

[[1.2122 2.    ]
 [3.     4.    ]]
