# CNN librosa

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow import keras

In [2]:
def unpickle(filename):
    import pickle
    with open(filename, 'rb') as fp:
        banana = pickle.load(fp)
    return banana

In [3]:
def getXy(signals, mfcconly=False):
    X = []
    y = []
    for element in signals:
        if mfcconly:
            X.append(element["mfccs"])
        else:
            X.append(np.concatenate((element["mfccs"], element["delta"], element["delta2"])).T)
        y.append(element["encodedLabel"])
    return np.array(X), np.array(y)
        

def get_data_splits(signals, test_size = 0.1, validation_size = 0.2, mfcconly=False):
    X,y = getXy(signals, mfcconly)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
    
    X_train = X_train[..., np.newaxis]
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]
    
    return X_train, X_validation, X_test, y_train, y_validation, y_test

![img](https://miro.medium.com/max/486/1*jgWOhDiGjVp-NCSPa5abmg.png)

L2 - Ridge regression

In [26]:
def build_tf_model(input_shape, learning_rate, num_speakers, error="sparse_categorical_crossentropy"):
    model = keras.Sequential()
    
    #convolution layer 1
    model.add(keras.layers.Conv2D(64, (3,3), activation="relu", input_shape=input_shape, kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding="same"))
    
    #clayer 2
    model.add(keras.layers.Conv2D(32, (3,3), activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding="same"))
    
    #clayer 3
    model.add(keras.layers.Conv2D(32, (2,2), activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D((2,2), strides=(2,2), padding="same"))
    
    #flatten -> feed to dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation="relu"))
    model.add(keras.layers.Dropout(0.3))
    
    #softmax
    model.add(keras.layers.Dense(num_speakers, activation="softmax"))
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(optimizer = optimizer, loss=error, metrics=["accuracy"])
    
    
    model.summary()
    
    return model

In [4]:
#global vars
LR = 0.0001
BATCH_SIZE=32
EPOCHS=80

#load data from pickles
librosa_signals = unpickle("librosa_signals.pickle")
labelEncoder = unpickle("labelEncoder.pickle")


In [11]:
X_train.shape

(80, 232, 39, 1)

In [33]:
data.shape

(232, 39, 1)

In [5]:
X_train, X_validation, X_test, y_train, y_validation, y_test = get_data_splits(librosa_signals, mfcconly=False)


In [32]:
X_train, X_validation, X_test, y_train, y_validation, y_test = get_data_splits(librosa_signals, mfcconly=False)

input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])

model = build_tf_model(input_shape, LR, len(labelEncoder.classes_))

model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_validation,y_validation))

test_err, test_acc = model.evaluate(X_test, y_test)
print(f"Test error: {test_err}, test acc: {test_acc}")

# model.save()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_19 (Conv2D)          (None, 230, 37, 64)       640       
                                                                 
 batch_normalization_19 (Bat  (None, 230, 37, 64)      256       
 chNormalization)                                                
                                                                 
 max_pooling2d_19 (MaxPoolin  (None, 115, 19, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_20 (Conv2D)          (None, 113, 17, 32)       18464     
                                                                 
 batch_normalization_20 (Bat  (None, 113, 17, 32)      128       
 chNormalization)                                                
                                                      

In [34]:
model.save("cnn_librosa_91.h5")

# Knn librosa

generalnie chyba jest przetrenowany i musiałbym zrobić crossvalidation faktycznie

każde moje nagranie stawia na Tobiasza XD

In [6]:
#load data from pickles
librosa_signals = unpickle("librosa_signals.pickle")
labelEncoder = unpickle("labelEncoder.pickle")

In [7]:
SIGNAL_LENGTH = len(librosa_signals[0]["signal"]) #any signal from here, they were padded earlier

In [8]:
def getSpkData(signals):
    X = []
    speakerData = []
    for element in signals:
        trackEncodings = np.concatenate((element["mfccs"], element["delta"], element["delta2"])).T
        speakerData.append({"speaker": element["encodedLabel"], "data": trackEncodings.flatten()})            
    return speakerData

In [9]:
spkData = getSpkData(librosa_signals)

In [10]:
X = list(map(lambda entry: entry["data"], spkData))
y = list(map(lambda entry: entry["speaker"],spkData))
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [17]:
len(X[0])

9048

In [11]:
len(X_train[0])

9048

In [12]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [13]:
neigh.score(X_test,y_test)

0.8695652173913043

In [139]:
y_test[2]

0

In [140]:
neigh.predict_proba([X_test[2]])

array([[0.33333333, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.66666667,
        0.        ]])

In [76]:
def export_to_pickle(filename, data):
    import pickle
    with open(filename+'.pickle', 'wb') as f:
        pickle.dump(data, f)
        
export_to_pickle("knn78.pickle", neigh)

# GMM

In [None]:
expectedResult = [d for d in exampleSet if d['type'] in keyValList]

In [7]:
data = [np.concatenate((x['mfccs'], x['delta'], x['delta2'])).T for x in librosa_signals if x["encodedLabel"] == 0]

In [6]:
speakers_data = []
for iter in range(len(labelEncoder.classes_)):
    data = [(np.concatenate((x['mfccs'], x['delta'], x['delta2'])).T).flatten() for x in librosa_signals if  x["encodedLabel"] == iter]
    y = labelEncoder.classes_[iter]
    speaker_data = {"y":y, "x": data}
    speakers_data.append(speaker_data)

In [11]:
a = speakers_data[0]['x'][0]

In [12]:
a

array([-418.55934491,   47.74896254,   28.67844339, ...,    0.        ,
          0.        ,    0.        ])

In [16]:
ab = np.vstack((a,speakers_data[0]['x'][1]))

In [18]:
ab

array([[-418.55934491,   47.74896254,   28.67844339, ...,    0.        ,
           0.        ,    0.        ],
       [-395.52408034,   53.52468454,   -0.99826231, ...,    0.        ,
           0.        ,    0.        ]])

In [1]:
import numpy as np
from sklearn.mixture import GaussianMixture
gmm=GaussianMixture(n_components=8).fit(speakers_data[0]['x'])

NameError: name 'speakers_data' is not defined

In [1]:
import numpy as np
from sklearn.mixture import GaussianMixture
speakerModels = []
for speaker in speakers_data:
    gmm = GaussianMixture(n_components = 4).fit(speaker['x'])
    speakerModels.append({'y':speaker['y'], "model": gmm})

NameError: name 'speakers_data' is not defined

In [23]:
gm.predict_proba([X_train[0]])

array([[1., 0.]])