In [1]:
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

In [2]:
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

def load_data(data_path, mfcc_path):    
    df = pd.read_csv(data_path)    
    with open(mfcc_path, 'rb') as fp:
        mfcc = pickle.load(fp)

    return df,mfcc

def get_score(model, X_train, X_test, y_train, y_test,target_sex=True):
    optimiser = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimiser,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    if target_sex:
        model.compile(optimizer=optimiser,
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=30, verbose=0)
        y_predict = model.predict(X_test)
        mf1 = f1_score(y_test, y_predict,average='macro')
        return mf1 

    model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=30, verbose=0)
    y_predict = model.predict(X_test)
    y_predict = [np.argmax(x) for x in y_predict]
    mf1 = f1_score(y_test, y_predict,average='macro')
    return mf1 

def cross_validation(df,mfcc,models,target_sex=True):
    kf = KFold(n_splits = 10, shuffle= True)
    X = df.Id
    Y = df.Country - 2 

    if target_sex: 
      Y = df.Sex
    
    for m, values in models.items():
        print(f'Cross Validation for model {m}\n')
        for train_index, test_index in kf.split(X):
          
          X_train_Id, X_test_Id = X.iloc[train_index,], X.iloc[test_index,]
          y_train, y_test = Y[train_index], Y[test_index]
          
          x_train = np.array([mfcc[i] for i in X_train_Id])
          x_test = np.array([mfcc[i] for i in X_test_Id])
          
          values.append(get_score(m, x_train, x_test, y_train, y_test, target_sex))

        print(f'Done model {m}\n')
    print(f'Done')



In [3]:
def fnn_model_without_dropout(input_shape,output=1,function='relu'):
    model = keras.Sequential() 
    model.add(keras.layers.Flatten(input_shape=input_shape))
    model.add(keras.layers.Dense(512, activation=function))
    model.add(keras.layers.Dense(256, activation=function))
    model.add(keras.layers.Dense(64, activation=function))
    model.add(keras.layers.Dense(output, activation='softmax'))
    return model

In [4]:
def fnn_model_with_dropout(input_shape,output=1,function='relu',dropout=0.2):
    model = keras.Sequential() 
    model.add(keras.layers.Flatten(input_shape=input_shape))
    model.add(keras.layers.Dense(512, activation=function))
    model.add(keras.layers.Dense(256, activation=function))
    model.add(keras.layers.Dense(64, activation=function))
    model.add(keras.layers.Dropout(dropout))
    model.add(keras.layers.Dense(output, activation='softmax'))
    return model

In [5]:
def cnn_model(input_shape,output=1):
    model = keras.Sequential()

    # 1st conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(output, activation='softmax'))

    return model

In [6]:
models_sex = {
    cnn_model((61,13,1),1):['cnn'],
    fnn_model_with_dropout((61,13,1),1):['fnn_with_drop'],
    fnn_model_without_dropout((61,13,1),1):['fnn_without_drop']
}

models_country = {
    cnn_model((61,13,1),5):['cnn'],
    fnn_model_with_dropout((61,13,1),5):['fnn_with_drop'],
    fnn_model_without_dropout((61,13,1),5):['fnn_without_drop']
}

path_train_csv = 'dataset/Train.csv'
path_train_mfcc = 'dataset/mfcc_train.json'

df,mfcc = load_data(path_train_csv,path_train_mfcc) 

In [7]:
cross_validation(df,mfcc,models_sex,target_sex=True)

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd9789c6350>

Done model <keras.engine.sequential.Sequential object at 0x7fd9789c6350>

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd8fc1271d0>

Done model <keras.engine.sequential.Sequential object at 0x7fd8fc1271d0>

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd8fc1350d0>

Done model <keras.engine.sequential.Sequential object at 0x7fd8fc1350d0>

Done


In [8]:
cross_validation(df,mfcc,models_country,target_sex=False)

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd9789c6790>

Done model <keras.engine.sequential.Sequential object at 0x7fd9789c6790>

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd8fc14ce10>

Done model <keras.engine.sequential.Sequential object at 0x7fd8fc14ce10>

Cross Validation for model <keras.engine.sequential.Sequential object at 0x7fd8fc0a3950>

Done model <keras.engine.sequential.Sequential object at 0x7fd8fc0a3950>

Done


In [12]:
for m,values in models_sex.items():
    print(f' model {values[0]} macro f1-score mean is {np.mean(values[1:])}')

 model cnn macro f1-score mean is 0.33466902391003506
 model fnn_with_drop macro f1-score mean is 0.3346020091301354
 model fnn_without_drop macro f1-score mean is 0.33461165285919325


In [13]:
for m,values in models_country.items():
    print(f' model {values[0]} macro f1-score mean is {np.mean(values[1:])}')

 model cnn macro f1-score mean is 0.7912803677990046
 model fnn_with_drop macro f1-score mean is 0.3321740560188752
 model fnn_without_drop macro f1-score mean is 0.7886402686473031
