In [2]:
import pandas as pd
import numpy as np

import csv   
import math
import matplotlib.pyplot as plt
import datetime

import librosa as lr
import librosa.display as lrd
%matplotlib inline
%load_ext rpy2.ipython





In [2]:
SPLIT_TRAIN_TEST = .8
TARGET_FILE = '../data/noisemaker/noisemaker.csv'
SAMPLES_FOLDER = '../data/noisemaker/noisemaker_samples/'
FOLDER_PREFIX = 'noisemaker'

D = str(datetime.datetime.now())
RESULTS_FILE = 'RESULTS-' + FOLDER_PREFIX + '-' + D[:16].replace(' ','_') + '.csv'
print RESULTS_FILE

HOP_LENGTH = 512

N_MFCC = 20
N_MEL = 32

SUBSET = False
N_EPOCHS = 30
BATCH_SIZE = 32
VERBOSE = 1
VALIDATION = .1

DEPENDENCIES = {}

RESULTS-noisemaker-2018-01-10_03:45.csv


# Prepares datasets

In [3]:
# Prepares labels
Y = pd.read_csv(TARGET_FILE)
if SUBSET:
    Y = Y.head(100)
print Y.head()

len_train = int(Y.shape[0]*.8)
len_test = int(Y.shape[0]*.2)
print len_train, len_test

Y_train = Y.iloc[:len_train,:]
Y_test = Y.iloc[len_train:,:]
print 
print 'Training data:', Y_train.shape
print 'Test data:', Y_test.shape

Y_train, files_train =  Y_train.drop('file', axis=1), Y_train['file']
Y_test, files_test = Y_test.drop('file', axis=1), Y_test['file']
print
print files_train[:5]
print files_test[:5]

   filter_sustain  amp_sustain  lfo1_amount  osc1_pulse_width  amp_decay  \
0        0.629921     1.811024     2.755906          6.377953   1.653543   
1        9.291339     5.748031     0.314961          5.039370   9.212598   
2        7.086614     3.779528     9.921260          2.204724   6.141732   
3        0.314961     6.141732     8.425197          1.181102   7.007874   
4        3.622047     7.165354     5.748031          7.165354   3.464567   

   lfo1_rate  filter_cutoff lfo1_destination osc2_wave  osc2_volume  \
0   8.818898       1.496063           FILTER      Sine     3.464567   
1   0.078740       4.803150        OSC1PITCH      Sine     9.763780   
2   4.015748       2.440945          NOTHING      Sine     7.086614   
3   7.244094       2.204724          NOTHING     Noise     9.606299   
4   2.834646       0.472441          NOTHING  Triangle     1.653543   

   amp_attack  osc1_volume  filter_resonance  filter_attack  filter_decay  \
0    5.590551     8.346457          5.1

In [4]:
# Loads waveforms
sampling_rate = lr.load(SAMPLES_FOLDER + files_train[0])[1]
print 'Sampling rate:', sampling_rate

waveforms_test = []
for i, file_name in enumerate(files_test):
    if i % 100 == 0:
        print 'Loaded testing waveform #' + str(i)
    f = SAMPLES_FOLDER + file_name
    waveforms_test.append(lr.load(f)[0])
X_test = np.stack(waveforms_test)
print X_test.shape


waveforms_train = []
for i, file_name in enumerate(files_train):
    if i % 100 == 0:
        print 'Loaded training waveform #' + str(i)
    f = SAMPLES_FOLDER + file_name
    waveforms_train.append(lr.load(f)[0])
X_train = np.stack(waveforms_train)
print X_train.shape

Sampling rate: 22050
Loaded testing waveform #0
Loaded testing waveform #100
Loaded testing waveform #200
Loaded testing waveform #300
Loaded testing waveform #400
Loaded testing waveform #500
Loaded testing waveform #600
Loaded testing waveform #700
Loaded testing waveform #800
Loaded testing waveform #900
Loaded testing waveform #1000
Loaded testing waveform #1100
Loaded testing waveform #1200
Loaded testing waveform #1300
Loaded testing waveform #1400
Loaded testing waveform #1500
Loaded testing waveform #1600
Loaded testing waveform #1700
Loaded testing waveform #1800
Loaded testing waveform #1900
(2000, 19845)
Loaded training waveform #0
Loaded training waveform #100
Loaded training waveform #200


KeyboardInterrupt: 

# Independent models

## Prepares the features

In [None]:
L_test_mfcc = []
for i in range(X_test.shape[0]):
    S = X_test[i,:]
    mfcc = lr.feature.mfcc(y=S, sr=sampling_rate, hop_length=HOP_LENGTH, n_mfcc=N_MFCC)
    L_test_mfcc.append(mfcc)
X_test_mfcc = np.stack(L_test_mfcc)
print X_test_mfcc.shape

L_train_mfcc = []
for i in range(X_train.shape[0]):
    S = X_train[i,:]
    mfcc = lr.feature.mfcc(y=S, sr=sampling_rate, hop_length=HOP_LENGTH, n_mfcc=N_MFCC)
    L_train_mfcc.append(mfcc)
X_train_mfcc = np.stack(L_train_mfcc)
print X_train_mfcc.shape

In [None]:
plt.figure(figsize=(10, 4))
lr.display.specshow(X_train_mfcc[10,...], x_axis='time')
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()

In [None]:
X_test_mfcc = np.reshape(X_test_mfcc, (X_test_mfcc.shape[0],-1))
X_train_mfcc = np.reshape(X_train_mfcc, (X_train_mfcc.shape[0],-1))
print X_test_mfcc.shape
print X_train_mfcc.shape

## Predictions

In [None]:
import sys
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score,mean_absolute_error
from sklearn.model_selection import GridSearchCV

headers=['algo', 'type', 'param1','param2','regularization',
         'target','metric', 'train_score', 'test_score']
with open(RESULTS_FILE, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(headers)

    
def evaluate_algo(algo_label, grid_reg, grid_class):
    results = []
    for t_i, t_name in enumerate(list(Y_train)):

        print 'predicting feature', t_name
        y_train = Y_train[t_name]
        y_test = Y_test[t_name]
        print y_train.dtype
        
        if t_name in DEPENDENCIES:
            switch = DEPENDENCIES[t_name]
            
            to_keep_train = (Y_train[switch] == 'on')
            print 'Keeping', str(sum(to_keep_train)), 'training examples out of',  str(len(to_keep_train))
            y_train = y_train[to_keep_train]
            X_train= X_train_mfcc[to_keep_train,...]
            
            to_keep_test = (Y_test[switch] == 'on')
            print 'Keeping', str(sum(to_keep_test)), 'training examples out of',  str(len(to_keep_test))
            y_test = y_test[to_keep_test]
            X_test = X_test_mfcc[to_keep_test,...]
        
        else:
            X_train= X_train_mfcc
            X_test= X_test_mfcc
        
        print X_train.shape, y_train.shape
        print X_test.shape, y_test.shape
        
        best_params_C = None
        best_params_R = None
        
        try:
            # Case 1: classification
            if y_train.dtype == 'object':

                metric = 'class'

                # Cross-Validation Score
                grid_class.fit(X_train, y_train)
                cross_val_scores = grid_class.best_score_
                best_params_C = grid_class.best_params_
                print cross_val_scores

                # Test Score
                test_pred = grid_class.predict(X_test)
                test_score = accuracy_score(test_pred, y_test)
                print test_score


            # Case 2: regression
            elif y_train.dtype == 'float64':

                metric = 'reg'

                # Cross-Validation Score
                grid_reg.fit(X_train, y_train)
                cross_val_scores = grid_reg.best_score_ * -1
                best_params_R = grid_reg.best_params_
                print cross_val_scores

                # Test Score
                test_pred = grid_reg.predict(X_test)
                test_score = mean_absolute_error(test_pred, y_test)
                print test_score

            else:
                raise ValueError('Wrong Column Type')

            out = (algo_label, 'per_output', best_params_R, best_params_C,None,
                   t_name, metric, cross_val_scores, test_score)
            print out
            with open(RESULTS_FILE, 'a') as f:
                writer = csv.writer(f,quoting=csv.QUOTE_NONNUMERIC)
                writer.writerow(out)

        except:
            print "Unexpected error:", sys.exc_info()[0]

In [None]:
from sklearn import neighbors

algo_C =neighbors.KNeighborsClassifier()
grid_C = GridSearchCV(algo_C, {"n_neighbors":[3,5,8,16]}, scoring='accuracy', verbose=2)

algo_R =neighbors.KNeighborsRegressor()
grid_R = GridSearchCV(algo_R, {"n_neighbors":[3,5,8,16]}, scoring='neg_mean_absolute_error', verbose=2)

evaluate_algo('kNN', grid_R, grid_C)

In [None]:
from sklearn import tree

algo_C = tree.DecisionTreeClassifier()
grid_C = GridSearchCV(algo_C, {"max_depth":[2,4,8,16,32,64,128]}, scoring='accuracy', verbose=2)

algo_R = tree.DecisionTreeRegressor()
grid_R = GridSearchCV(algo_R, {"max_depth":[2,4,8,16,32,64,128]}, scoring='neg_mean_absolute_error', verbose=2)

evaluate_algo('Decision Tree', grid_R, grid_C)

In [None]:
from sklearn import dummy

algo_C = dummy.DummyClassifier()
grid_C = GridSearchCV(algo_C, {"strategy":["most_frequent"]}, scoring='accuracy', verbose=2)

algo_R = dummy.DummyRegressor()
grid_R = GridSearchCV(algo_R, {"strategy":["mean"]}, scoring='neg_mean_absolute_error', verbose=2)

evaluate_algo('Naive', grid_R, grid_C)

# Joint modelling

In [None]:
import collections

def target_info(df_y):
    infos = collections.OrderedDict()
    
    for i,name in enumerate(list(df_y)):
        print name
        
        y = df_y[[name]].values.flatten()
        infos[name] = {'type':y.dtype}
        
        if y.dtype=='object':
            u_vals = sorted(np.unique(y))
            infos[name]['int2char'] = {i:v for i,v in enumerate(u_vals)}
            infos[name]['char2int'] = {v:i for i,v in enumerate(u_vals)}
            
        elif y.dtype=='float64':
            infos[name]['mean'] = np.mean(y)
            infos[name]['sd'] = np.std(y)
        
    return infos

# Gets target info
Y_info = target_info(Y_train)
print Y_info

In [None]:
# Preps input data
X_train_mfcc = X_train_mfcc.reshape([X_train_mfcc.shape[0],N_MFCC,-1]).transpose(0,2,1)
X_test_mfcc = X_test_mfcc.reshape([X_test_mfcc.shape[0],N_MFCC,-1]).transpose(0,2,1)

print ''
print X_train_mfcc.shape
print X_test_mfcc.shape

In [None]:
from keras.callbacks import EarlyStopping

def format_output(Y):
    out = {}
    for name,infos in Y_info.iteritems():

        # Normalizes/one-hot encodes
        if infos['type'] == 'float64':
            y = (Y[name] - infos['mean']) / infos['sd']
            
        elif infos['type'] == 'object':
            char2int = infos['char2int']
            y = np.zeros((len(Y[name]), len(char2int)))
            for i,yval in enumerate(Y[name]):
                y[i, char2int[yval]] = 1        
        else:
            raise ValueError
        
        # Sets to zero if necessary
        if name in DEPENDENCIES:
            switch = DEPENDENCIES[name]
            is_off = Y[switch] == "off"
            print 'Setting', str(sum(is_off)),'elements to zero'
            y[is_off] = 0
        
        print name, y.shape
        out[name] = y
    
    return out

        
def test_model(label, params1, params2, regul, model, X_train, X_test):
    
    try:
        # Prepares input and output data
        X_mean = np.mean(X_train, axis=(0))
        X_sd = np.std(X_train, axis=(0))

        X_train = (X_train - X_mean) / X_sd
        X_test  = (X_test - X_mean) / X_sd

        Y_dict_train = format_output(Y_train)
        Y_dict_test  = format_output(Y_test)

        # Trains the model
        early_stopping = EarlyStopping(monitor='val_loss', patience=3)
        hist = model.fit(X_train, Y_dict_train, 
                  epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE,
                 validation_split = VALIDATION, callbacks=[early_stopping])

        # Makes predictions
        pred = model.predict(X_test, batch_size=BATCH_SIZE, verbose=VERBOSE)

        # Gets validation and test scores (for each metric)
        Y = {}
        for j, Y_pred in enumerate(pred):
            print 'Output:', j
            t_name = Y_info.keys()[j]
            t_infos = Y_info[t_name]
            truth = Y_test[t_name]
            print t_name
            print Y_pred.shape

            if t_name in DEPENDENCIES:
                switch = DEPENDENCIES[t_name]

                to_keep_test = (Y_test[switch] == 'on')
                print 'Keeping', str(sum(to_keep_test)), 'training examples out of',  str(len(to_keep_test))
                truth = truth[to_keep_test]
                Y_pred = Y_pred[to_keep_test,...]
                print Y_pred.shape, truth.shape

            if t_infos['type'] == 'float64':
                test_metric = 'reg'
                Y_pred = Y_pred.flatten()
                y_pred = Y_pred * t_infos['sd'] + t_infos['mean']
                y_pred = y_pred.tolist()
                test_score = mean_absolute_error(y_pred, truth)

            elif t_infos['type'] == 'object':
                test_metric = 'class'
                y_i = np.argmax(Y_pred, axis=1)
                y_pred = [t_infos['int2char'][y] for y in y_i]
                test_score = accuracy_score(y_pred, truth)

            else:
                raise ValueError

            train_scores = -1 * hist.history['val_loss'][-1]
            out = (label, 'joint', params1, params2, regul,
                   t_name, test_metric, train_scores, test_score)
            print out
            with open(RESULTS_FILE, 'a') as f:
                writer = csv.writer(f,quoting=csv.QUOTE_NONNUMERIC)
                writer.writerow(out)

            Y[t_name] = y_pred
            
    except:
        print "Unexpected error:", sys.exc_info()[0]
        return
    
    return Y

In [None]:
def NN_output(h_layer):
    # Output layers and losses
    out_layers = []
    losses = {}
    metrics = {}
    
    # Adds on/off switches
    switches = []
    for name,infos in Y_info.iteritems():
        if not name in DEPENDENCIES and infos['type'] == 'object':
            out_dim = len(infos['char2int'])
            out_layer = Dense(out_dim, activation='softmax', name=name)(h_layer)
            losses[name] = 'categorical_crossentropy'
            metrics[name] = 'accuracy'
            switches.append(name)
            out_layers.append(out_layer)
            
        if not name in DEPENDENCIES and infos['type'] == 'float64':
            out_layer = Dense(1, name=name)(h_layer)
            losses[name] = 'mean_absolute_error'
            metrics[name] = 'mean_absolute_error'
            switches.append(name)
            out_layers.append(out_layer)
            
    # Adds dependent switches and knobs
    dependent = []
    for name,infos in Y_info.iteritems():
        
        if name in DEPENDENCIES and infos['type'] == 'float64':
            pred_layer = Dense(1)(h_layer)
            
            switch = DEPENDENCIES[name]
            li_switch = switches.index(switch)
            i_on = Y_info[switch]['char2int']['on']
            switch = Lambda(lambda T: T[:,i_on])(out_layers[li_switch])
            out_layer = Multiply(name = name)([pred_layer, switch])
            
            losses[name] = 'mean_absolute_error'
            metrics[name] = 'mean_absolute_error'
            out_layers.append(out_layer)
            dependent.append(name)

        
        if name in DEPENDENCIES and infos['type'] == 'object':
            out_dim = len(infos['char2int'])
            pred_layer = Dense(out_dim, activation='softmax')(h_layer)            
            
            switch = DEPENDENCIES[name]
            li_switch = switches.index(switch)
            i_on = Y_info[switch]['char2int']['on']
            switch = Lambda(lambda T: T[:,i_on])(out_layers[li_switch])
            out_layer = Multiply(name = name)([pred_layer, switch])
            
            losses[name] = 'categorical_crossentropy'
            metrics[name] = 'accuracy'
            out_layers.append(out_layer)
            dependent.append(name)
        
    # Reorders everything
    all_out = switches + dependent
    ord_out = []
    for out_name in Y_info:
        o = out_layers[all_out.index(out_name)]
        ord_out.append(o)

    return ord_out, metrics, losses

In [None]:
# Model 1: 1 layer perceptron
from keras.layers import Input, Dense, Flatten, Multiply, Lambda, RepeatVector
from keras import backend as K
from keras.models import Model
from keras import regularizers

def MLP(n_hidden_units, n_layers, regul, in_shape):
    # Input layer
    in_layer = Input(shape=in_shape)
    
    # Hidden layers
    h_layer = Flatten()(in_layer)
    for i in range(n_layers):
        h_layer = Dense(n_hidden_units, activation='relu')(h_layer)

    out_layers, metrics, losses = NN_output(h_layer)
    model = Model(inputs = in_layer, outputs = out_layers)
    model.compile(loss=losses, optimizer='adam')
    
    print model.summary()
    return model

for N_LAYERS in [1,2]:
    for N_UNITS in [64,90,128]:
        for REGUL in [0]:
            in_dim = X_train_mfcc.shape[1:]
            model = MLP(N_UNITS, N_LAYERS, REGUL, in_dim)
            out = test_model('MLP', N_UNITS, N_LAYERS, REGUL, model, X_train_mfcc, X_test_mfcc)

In [None]:
from keras.layers import LSTM

def modLSTM(n_hidden_units, n_layers, regul, in_shape):
    # Input layer
    in_layer = Input(shape=in_shape)
    
    # Hidden layers
    h_layer = in_layer
    for i in range(n_layers):
        seq = i < n_layers - 1
        h_layer = LSTM(n_hidden_units, return_sequences=seq, dropout=regul)(h_layer)
        
    # Output layers and losses
    out_layers, metrics, losses = NN_output(h_layer)
    model = Model(inputs = in_layer, outputs = out_layers)
    model.compile(loss=losses, optimizer='adam')
    
    print model.summary()
    return model

for N_LAYERS in [1,2]:
    for N_UNITS in [32,64,80,128]:
        for REGUL in [0,0.2]:
            in_dim = X_train_mfcc.shape[1:]
            model = modLSTM(N_UNITS, N_LAYERS, REGUL, in_dim)
            out = test_model('LSTM', N_UNITS, N_LAYERS, REGUL, model, X_train_mfcc, X_test_mfcc)

In [None]:
from keras.layers import Conv1D,MaxPooling1D,GlobalMaxPooling1D

W = 4

def modConvo(n_filters, n_layers, regul, in_shape):
    # Input layer
    print in_shape
    in_layer = Input(shape=in_shape)
    
    # Hidden layers
    h_layer = in_layer
    for i in range(n_layers):
        print i
        conv_layer = Conv1D(n_filters*(i+1), W, 
                          kernel_regularizer=regularizers.l2(regul))(h_layer)
        h_layer = MaxPooling1D(W, strides=2)(conv_layer)
    
    h_layer_pool = GlobalMaxPooling1D()(h_layer)
    h_layer_full = Dense(n_filters, 
                          kernel_regularizer=regularizers.l2(regul))(h_layer_pool)
    
    # Output layers and losses
    out_layers, metrics, losses = NN_output(h_layer_full)
    model = Model(inputs = in_layer, outputs = out_layers)
    model.compile(loss=losses, optimizer='adam')
    
    print model.summary()
    return model

for N_FILTERS in [16,32,48,64]:
    for N_LAYERS in [1,2]:
        for REGUL in [0]:
            in_dim = X_train_mfcc.shape[1:]
            model = modConvo(N_FILTERS, N_LAYERS, REGUL, in_dim)
            out = test_model('Conv', N_FILTERS, N_LAYERS, REGUL, model, X_train_mfcc, X_test_mfcc)

# Mid level features

In [None]:
import collections

def target_info(df_y):
    infos = collections.OrderedDict()
    
    for i,name in enumerate(list(df_y)):
        print name
        
        y = df_y[[name]].values.flatten()
        infos[name] = {'type':y.dtype}
        
        if y.dtype=='object':
            u_vals = sorted(np.unique(y))
            infos[name]['int2char'] = {i:v for i,v in enumerate(u_vals)}
            infos[name]['char2int'] = {v:i for i,v in enumerate(u_vals)}
            
        elif y.dtype=='float64':
            infos[name]['mean'] = np.mean(y)
            infos[name]['sd'] = np.std(y)
        
    return infos

# Gets target info
Y_info = target_info(Y_train)
print Y_info

In [None]:
L_test_mel = []
for i in range(X_test.shape[0]):
    S = X_test[i,:]
    mel = lr.feature.melspectrogram(y=S, sr=sampling_rate, hop_length=HOP_LENGTH, n_mels=N_MEL)
    L_test_mel.append(mel)
X_test_mel = np.stack(L_test_mel)
print X_test_mel.shape

L_train_mel = []
for i in range(X_train.shape[0]):
    S = X_train[i,:]
    mel = lr.feature.melspectrogram(y=S, sr=sampling_rate, hop_length=HOP_LENGTH, n_mels=N_MEL)
    L_train_mel.append(mel)
X_train_mel = np.stack(L_train_mel)
print X_train_mel.shape

In [None]:
plt.figure(figsize=(10, 4))
lr.display.specshow(X_train_mel[1,...], x_axis='time')
plt.colorbar()
plt.title('Mel spectrogram')
plt.tight_layout()

In [None]:
X_train_mel = np.transpose(X_train_mel, (0,2,1))
X_test_mel = np.transpose(X_test_mel, (0,2,1))
print X_train_mel.shape, X_test_mel.shape

In [None]:
for N_LAYERS in [1,2]:
    for N_UNITS in [64,90,128]:
        for REGUL in [0]:
            in_dim = X_train_mel.shape[1:]
            model = MLP(N_UNITS, N_LAYERS, REGUL, in_dim)
            out = test_model('MLP_mid', N_UNITS, N_LAYERS, REGUL, model, X_train_mel, X_test_mel)

In [None]:
for N_LAYERS in [1,2]:
    for N_UNITS in [32,64,80,128]:
        for REGUL in [0,0.2]:
            in_dim = X_train_mel.shape[1:]
            model = modLSTM(N_UNITS, N_LAYERS, REGUL, in_dim)
            out = test_model('LSTM_mid', N_UNITS, N_LAYERS, REGUL, model, X_train_mel, X_test_mel)

In [None]:
for N_FILTERS in [16,32,48,64]:
    for N_LAYERS in [1,2]:
        for REGUL in [0]:
            in_dim = X_train_mel.shape[1:]
            model = modConvo(N_FILTERS, N_LAYERS, REGUL, in_dim)
            out = test_model('Conv_mid', N_FILTERS, N_LAYERS, REGUL, model, X_train_mel, X_test_mel)

# Visualizing the results

In [None]:
%%R -i RESULTS_FILE

library(tidyverse)

# Processing
csv <- read.csv(RESULTS_FILE)

indep_models <- csv %>% filter(type == 'per_output')

joint_models <- csv %>% filter(type == 'joint')
to_keep <- joint_models %>% 
                group_by(algo, type, param1,param2,regularization) %>%
                summarize(train_score = mean(train_score)) %>%
                group_by(algo) %>%
                filter(train_score == max(train_score)) %>% data.frame()
print(to_keep)

joint_models <- semi_join(joint_models, to_keep, by=c("algo","param1","param1","regularization")) %>%
                distinct()

all_models <- rbind(indep_models, joint_models)

# Plotting
to_plot <- all_models %>%
            select(algo, metric, test_score) %>%
            group_by(algo, metric) %>%
            summarize(score = mean(test_score)) %>%
            as.data.frame
print(to_plot)

to_plot$metric <- factor(to_plot$metric)

p <- ggplot(to_plot, aes(x=factor(algo), y=score)) +
    geom_bar(stat='identity') +
    facet_grid(metric~., scales='free')
p


In [3]:
%%R 

RESULTS_FILE <- "RESULTS-noisemaker-2018-01-10_15:47.csv"

library(tidyverse)

# Processing
csv <- read.csv(RESULTS_FILE)

indep_models <- csv %>% filter(type == 'per_output')

joint_models <- csv %>% filter(type == 'joint')
to_keep <- joint_models %>% 
                group_by(algo, type, param1,param2,regularization) %>%
                summarize(train_score = mean(train_score)) %>%
                group_by(algo) %>%
                filter(train_score == max(train_score)) %>% data.frame()
print(to_keep)

joint_models <- semi_join(joint_models, to_keep, by=c("algo","param1","param1","regularization")) %>%
                distinct()

all_models <- rbind(indep_models, joint_models)


# Plotting
to_plot <- all_models %>%
            select(algo, metric, test_score) %>%
            group_by(algo, metric) %>%
            summarize(score = mean(test_score)) %>%
            as.data.frame
to_plot$family <- sapply(to_plot$algo,function(n){
    if (n%in%c('Conv', "LSTM", "MLP")){
        "joint model + MFCC"
    } else if (n%in%c('Conv_mid', "LSTM_mid", "MLP_mid")){
        "joint model + Mel Spectrum"
    } else{
        "Independent + MFCC"
    }
})

print(to_plot)

to_plot$metric <- factor(to_plot$metric)

p <- ggplot(to_plot, aes(x=factor(algo), y=score, color=family, fill=family)) +
    geom_bar(stat='identity') +
    facet_grid(metric~family, scales='free')
print(p)

ggsave('results_guitar.pdf', p, width=10)


[32m<U+221A>[39m [34mtibble [39m 1.4.1     [32m<U+221A>[39m [34mdplyr  [39m 0.7.4
[32m<U+221A>[39m [34mtidyr  [39m 0.7.2     [32m<U+221A>[39m [34mstringr[39m 1.2.0
[32m<U+221A>[39m [34mreadr  [39m 1.1.1     [32m<U+221A>[39m [34mforcats[39m 0.2.0

[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()




      algo  type param1 param2 regularization train_score
1     Conv joint     64      2            0.0   -13.55755
2 Conv_mid joint     16      1            0.0   -15.66745
3     LSTM joint    128      2            0.2   -13.08998
4 LSTM_mid joint     80      2            0.2   -14.91300
5      MLP joint    128      2            0.0   -13.51242
6  MLP_mid joint     64      2            0.0   -15.19052
            algo metric     score                     family
1           Conv  class 0.4479167         joint model + MFCC
2           Conv    reg 2.1559831         joint model + MFCC
3       Conv_mid  class 0.2518333 joint model + Mel Spectrum
4       Conv_mid    reg 2.4841223 joint model + Mel Spectrum
5  Decision Tree  class 0.4008333         Independent + MFCC
6  Decision Tree    reg 2.1478555         Independent + MFCC
7            kNN  class 0.3555000         Independent + MFCC
8            kNN    reg 2.1473486         Independent + MFCC
9           LSTM  class 0.4771667         joi