# CNN librosa

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow import keras

In [2]:
def unpickle(filename):
    import pickle
    with open(filename, 'rb') as fp:
        banana = pickle.load(fp)
    return banana

In [3]:
def getXy(signals, mfcconly=False):
    X = []
    y = []
    for element in signals:
        if mfcconly:
            X.append(element["mfccs"])
        else:
            X.append(np.concatenate((element["mfccs"], element["delta"], element["delta2"])).T)
        y.append(element["encodedLabel"])
    return np.array(X), np.array(y)
        

def get_data_splits(signals, test_size = 0.1, validation_size = 0.2, mfcconly=False):
    X,y = getXy(signals, mfcconly)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
    
    X_train = X_train[..., np.newaxis]
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]
    
    return X_train, X_validation, X_test, y_train, y_validation, y_test

![img](https://miro.medium.com/max/486/1*jgWOhDiGjVp-NCSPa5abmg.png)

L2 - Ridge regression

In [26]:
def build_tf_model(input_shape, learning_rate, num_speakers, error="sparse_categorical_crossentropy"):
    model = keras.Sequential()
    
    #convolution layer 1
    model.add(keras.layers.Conv2D(64, (3,3), activation="relu", input_shape=input_shape, kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding="same"))
    
    #clayer 2
    model.add(keras.layers.Conv2D(32, (3,3), activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding="same"))
    
    #clayer 3
    model.add(keras.layers.Conv2D(32, (2,2), activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((2,2), strides=(2,2), padding="same"))
    
    #flatten -> feed to dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation="relu"))
    model.add(keras.layers.Dropout(0.3))
    
    #softmax
    model.add(keras.layers.Dense(num_speakers, activation="softmax"))
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(optimizer = optimizer, loss=error, metrics=["accuracy"])
    
    
    model.summary()
    
    return model

In [4]:
#global vars
LR = 0.0001
BATCH_SIZE=32
EPOCHS=80

#load data from pickles
librosa_signals = unpickle("librosa_signals.pickle")
labelEncoder = unpickle("labelEncoder.pickle")


In [11]:
X_train.shape

(80, 232, 39, 1)

In [32]:
X_train, X_validation, X_test, y_train, y_validation, y_test = get_data_splits(librosa_signals, mfcconly=False)

input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])

model = build_tf_model(input_shape, LR, len(labelEncoder.classes_))

model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_validation,y_validation))

test_err, test_acc = model.evaluate(X_test, y_test)
print(f"Test error: {test_err}, test acc: {test_acc}")

# model.save()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_19 (Conv2D)          (None, 230, 37, 64)       640       
                                                                 
 batch_normalization_19 (Bat  (None, 230, 37, 64)      256       
 chNormalization)                                                
                                                                 
 max_pooling2d_19 (MaxPoolin  (None, 115, 19, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_20 (Conv2D)          (None, 113, 17, 32)       18464     
                                                                 
 batch_normalization_20 (Bat  (None, 113, 17, 32)      128       
 chNormalization)                                                
                                                      

In [34]:
model.save("cnn_librosa_91.h5")

# Knn librosa

In [67]:

#load data from pickles
librosa_signals = unpickle("librosa_signals.pickle")
labelEncoder = unpickle("labelEncoder.pickle")


[]

In [73]:
def getSpkData():
    X = []
    speakerData = dict()
    for element in librosa_signals:
        trackEncodings = np.concatenate((element["mfccs"], element["delta"], element["delta2"])).T
        if element["encodedLabel"] in list(speakerData.keys()):
            speakerData[element["encodedLabel"]] = np.vstack((speakerData[element["encodedLabel"]], trackEncodings))
        else:
            speakerData[element["encodedLabel"]] = trackEncodings
            
    return speakerData

In [76]:
data[6].shape

(2320, 39)

In [99]:
X = [element for element in data.values()]

In [97]:
len()

11

In [86]:
y = [element for element in list(data.keys())]

In [90]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [92]:
from sklearn.neighbors import KNeighborsClassifier

In [94]:
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)

  array = np.asarray(array, order=order, dtype=dtype)


ValueError: Expected 2D array, got 1D array instead:
array=[array([[-433.55999758,   28.21585382,   21.16017777, ...,   -2.8878411 ,
           -0.89592522,   -2.2099711 ],
        [-221.60884322,   45.5208647 ,  -18.62908575, ...,   -2.8878411 ,
           -0.89592522,   -2.2099711 ],
        [ -82.53987707,   76.35240808,  -32.15127597, ...,   -2.8878411 ,
           -0.89592522,   -2.2099711 ],
        ...,
        [-589.51316059,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-589.51316059,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-589.51316059,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ]])
 array([[-4.18559345e+02,  4.77489625e+01,  2.86784434e+01, ...,
         -1.99636177e+00,  8.93695464e-02, -8.92843034e-01],
        [-4.07122823e+02,  2.14647093e+01,  2.26821090e+01, ...,
         -1.99636177e+00,  8.93695464e-02, -8.92843034e-01],
        [-2.52808410e+02,  2.54899537e+01, -1.67601242e+01, ...,
         -1.99636177e+00,  8.93695464e-02, -8.92843034e-01],
        ...,
        [-6.97343337e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.97343337e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.97343337e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])
 array([[-426.6466052 ,   65.69189919,    7.90904327, ...,   -1.61796658,
           -1.08917951,   -1.81951096],
        [-398.29111336,   64.05284218,    7.43452564, ...,   -1.61796658,
           -1.08917951,   -1.81951096],
        [-405.23589674,   58.41241889,   12.64890014, ...,   -1.61796658,
           -1.08917951,   -1.81951096],
        ...,
        [-554.71577178,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-554.71577178,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-554.71577178,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ]])
 array([[-3.78765802e+02,  9.15331726e+01,  5.47000340e+00, ...,
          6.72488097e-01, -1.26334669e+00, -1.42998430e+00],
        [-3.72600730e+02,  8.75588358e+01,  5.83768535e+00, ...,
          6.72488097e-01, -1.26334669e+00, -1.42998430e+00],
        [-3.84146161e+02,  8.15937326e+01,  2.07913623e+01, ...,
          6.72488097e-01, -1.26334669e+00, -1.42998430e+00],
        ...,
        [-7.04561826e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-7.04561826e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-7.04561826e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])
 array([[-438.562056  ,    6.72683815,   -4.10764499, ...,   -0.74477192,
           -1.83644352,   -1.12161538],
        [-374.42538421,   43.08770338,   -6.30584792, ...,   -0.74477192,
           -1.83644352,   -1.12161538],
        [-248.54562179,   72.15934818,  -14.76521421, ...,   -0.74477192,
           -1.83644352,   -1.12161538],
        ...,
        [-663.92006151,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-663.92006151,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-663.92006151,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ]])
 array([[-460.40021707,   15.06397539,   31.1264421 , ...,   -1.27616605,
            1.48804038,   -1.30483057],
        [-426.11724616,   10.81968483,   28.69611107, ...,   -1.27616605,
            1.48804038,   -1.30483057],
        [-423.10581582,    5.99483255,   25.40627481, ...,   -1.27616605,
            1.48804038,   -1.30483057],
        ...,
        [-706.63431055,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-706.63431055,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ],
        [-706.63431055,    0.        ,    0.        , ...,    0.        ,
            0.        ,    0.        ]])
 array([[-1.85399021e+02,  4.07218782e+01,  1.45835423e+01, ...,
         -2.41090553e-01,  2.15243201e-01,  1.03584545e+00],
        [-1.07306736e+02,  5.56760529e+01,  3.09679146e+01, ...,
         -2.41090553e-01,  2.15243201e-01,  1.03584545e+00],
        [-1.12639203e+02,  7.64117978e+01,  4.80880705e+01, ...,
         -2.41090553e-01,  2.15243201e-01,  1.03584545e+00],
        ...,
        [-6.29880849e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.29880849e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.29880849e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])
 array([[-3.64901958e+02,  7.68217629e+01,  1.82499550e+01, ...,
          6.38915286e-01,  5.00163610e-01, -1.67396517e-03],
        [-2.70713274e+02,  4.47117390e+01, -1.53750049e+01, ...,
          6.38915286e-01,  5.00163610e-01, -1.67396517e-03],
        [-2.13351208e+02,  8.41428735e+01,  3.42067573e+00, ...,
          6.38915286e-01,  5.00163610e-01, -1.67396517e-03],
        ...,
        [-6.51630757e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.51630757e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.51630757e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])   ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.