In [None]:
!pip install xlsxwriter
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, MaxPooling1D, Conv1D, Flatten, BatchNormalization, Activation, LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow.keras as keras
leaky_relu = tf.nn.leaky_relu


def CNN_3l_model(learn_rate =.001, activation="relu", init='glorot_uniform', kernel_regularizer="l1", dropout=0, input_shape= (None,61)):
    # create model
    model = Sequential()
    model.add(BatchNormalization())
    model.add(Conv1D(280, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))  # input_dim=(20, )
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(Flatten())
    model.add(Dense(1, kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))

    optimizer = tf.keras.optimizers.Adam(lr = learn_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def CNN_2l_model(learn_rate =.001, activation="relu", init='glorot_uniform', kernel_regularizer="l1", dropout=0, input_shape= (None,61)):
    # create model
    model = Sequential()
    model.add(BatchNormalization())
    model.add(Conv1D(128, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(Flatten())
    model.add(Dense(1, kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))

    optimizer = tf.keras.optimizers.Adam(lr = learn_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def CNN_1l_model(learn_rate =.001, activation="relu", init='glorot_uniform', kernel_regularizer="l1", dropout=0, input_shape= (None,61)):
    # create model
    model = Sequential()
    model.add(BatchNormalization())
    model.add(Conv1D(32, 8, padding = "same", kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(Flatten())
    model.add(Dense(1, kernel_initializer=init, activation=activation, kernel_regularizer=kernel_regularizer))

    optimizer = tf.keras.optimizers.Adam(lr = learn_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

function_list = [CNN_1l_model, CNN_2l_model, CNN_3l_model]

import warnings

with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    import tensorflow as tf
    from tensorflow import keras


def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]


tf.compat.v1.enable_eager_execution()
'''
Data Pull
Here I pull the train test data from excel files.
'''

File = "Data/S_10pData_withNoise_1.63sec"

File_types = ["GFCC", "LFCC","BFCC", "NGCC", "LPC", "RPLP", "chromashift", "melspect", "chromaCqt", "RMS", "specCont", "tonnetz", "MFCC", "delta", "deltadelta"]
for item in File_types:
    File_Name = File + "_" + item + ".xlsx"

    df = pd.read_excel(File_Name, "Sheet1", header=0)

    print(File_Name)

    people = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    sounds = [1, 2, 3, 4, 5, 6]
    scores_df = pd.DataFrame(None, columns=["Target ID", "Sound", "Model", "TP", "FN", "FP", "TN", "best params"])
    validation_df = pd.DataFrame(None, columns=["Target ID", "Sound", "Model", "means", "stds", "params"])
    feature_count = 20

    df = df.sample(frac=1)

    best_model = None
    best_model_performance = 0
    best_model_summary = None

    for column in df.columns:
        if ((column != "Person ID") & (column != "Sound ID")):
            df[column] = df[column] / df[column].max()

    pd.options.mode.use_inf_as_na = True
    df = df.replace([np.inf, -np.inf], np.nan)
    non_null_column = df.isnull().sum()[df.isnull().sum() == 0].index
    df = df[non_null_column]

    for person in people:
        validUser = person
        for sound in sounds:

            k_values = [30]

            for k_value in k_values:
                print("validUser is " + str(validUser))
                print("sound left out" + str(sound))
                print("K features" + str(k_value))
                traindf = df.loc[(df["Person ID"] == validUser) & (df["Sound ID"] != sound)]
                for person in people:
                    if (person != validUser):
                        traindf = pd.concat(
                            [traindf, df.loc[(df["Person ID"] == person) & (df["Sound ID"] != sound)][:int((len(
                                df.loc[(df["Person ID"] == validUser) & (df["Sound ID"] != sound)]) // (len(
                                people) - 1))//10*10)]])

                testdf = df.loc[(df["Person ID"] == validUser) & (df["Sound ID"] == sound)]
                for person in people:
                    if (person != validUser):
                        testdf = pd.concat(
                            [testdf, df.loc[(df["Person ID"] == person) & (df["Sound ID"] == sound)][:int((len(
                                df.loc[(df["Person ID"] == validUser) & (df["Sound ID"] == sound)]) // (len(
                                people) - 1))//10*10)]])

                traindf = traindf.sample(frac=1)
                testdf = testdf.sample(frac=1)

                X_train = traindf.drop(columns=["Person ID", "Sound ID"])
                X_test = testdf.drop(columns=["Person ID", "Sound ID"])

                Y_train = traindf["Person ID"]
                Y_test = testdf["Person ID"]
                for person in people:
                    if (person != validUser):
                        Y_train = Y_train.replace({person: 0})
                        Y_test = Y_test.replace({person: 0})

                for person in people:
                    if (person == validUser):
                        Y_train = Y_train.replace({person: 1})
                        Y_test = Y_test.replace({person: 1})

                print("Training Size: ", len(X_train), " balance of: ", Y_train.sum(), ":",
                      len(Y_train) - Y_train.sum(), \
                      "Training Size: ", len(X_test), " balance of : ", Y_test.sum(), ":", len(Y_test) - Y_test.sum())

                print("shapes", X_train.shape, Y_train.shape)
                X_train = X_train.to_numpy().reshape(int(X_train.shape[0]/10), 10, X_train.shape[1])
                Y_train = Y_train.to_numpy().reshape(int(Y_train.shape[0]/10), 10)
                X_test = X_test.to_numpy().reshape(int(X_test.shape[0]/10), 10, X_test.shape[1])
                Y_test = Y_test.to_numpy().reshape(int(Y_test.shape[0]/10), 10)

                print("final", X_train.shape, Y_train.shape)
                for function in function_list:
                  model = KerasClassifier(build_fn=function, verbose=0)
                  epochs = [50, 100, 150, 200]
                  batches = [25, 50, 75, 100]
                  param_grid = dict(epochs=epochs, batch_size=batches)
                  grid = GridSearchCV(estimator=model, param_grid=param_grid)
                  grid_result = grid.fit(X_train, Y_train)
                  # summarize results
                  # print("Training: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
                  means = grid_result.cv_results_['mean_test_score']
                  stds = grid_result.cv_results_['std_test_score']
                  params = grid_result.cv_results_['params']
                  for mean, stdev, param in zip(means, stds, params):
                      print("Trainning " + str(validUser) + " " + str(sound) + " %f (%f) with: %r" % (mean, stdev, param))
                      validation_info = [validUser, sound, "CNN (Adam)", means, stds, params]
                      validation_df = validation_df.append(pd.Series(validation_info, index=validation_df.columns),
                                                          ignore_index=True)

                  pred_keras = grid_result.predict(X_test)
                  matrix = confusion_matrix(Y_test.values.ravel(), pred_keras)
                  TP, FN, FP, TN = matrix[0][0], matrix[0][1], matrix[1][0], matrix[1][1]
                  score_info = [validUser, sound, "RNN", TP, FN, FP, TN, grid_result.best_params_]
                  scores_df = scores_df.append(pd.Series(score_info, index=scores_df.columns), ignore_index=True)
                  print("Testing " + str(validUser) + " " + str(sound) + " " + str(TP) + " " + str(FN) + " " + str(
                      FP) + " " + str(TN))
                  print("Testing " + str(validUser) + " " + str(sound) + " ", grid_result.best_params_)

    print("breakpoint save for ", item)
    writer = pd.ExcelWriter("Kera_Results_for_RNN_" + item + "_11_19.xlsx", engine='xlsxwriter')
    scores_df.to_excel(writer, sheet_name='Sheet1')
    validation_df.to_excel(writer, sheet_name="sheet2")
    writer.save()
    print(scores_df)