
# Data extraction with librosa

In [1]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
from os import listdir
from os.path import isfile,join
from os import walk
import pandas as pd
from sklearn import preprocessing
import scipy
import python_speech_features as psf
from scipy.io.wavfile import read

[librosa default conf](https://librosa.org/doc/0.9.1/generated/librosa.feature.mfcc.html?highlight=mfcc)
(uses [melspectrogram](https://librosa.org/doc/0.9.1/_modules/librosa/feature/spectral.html#melspectrogram), [dct](https://docs.scipy.org/doc/scipy/reference/generated/scipy.fftpack.dct.html#scipy.fftpack.dct))

win_length = taki sam jak n_fft. Jak podam inny win_length to i tak robi padding zerami tak żeby pasowało do n_fft

n_fft = 2048

hop_length = 512

window => [getWindow](https://librosa.org/doc/main/generated/librosa.filters.get_window.html) => [hann](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.hann.html#scipy.signal.windows.hann), generalnie przyjmuje wszystkie paramsy okien ze scipy


In [3]:
def readFiles(useLibrosa = True):
    signals= []
    for path, subdirs, files in walk("samples"):
        for name in files:
            if useLibrosa:
                signal, sampling_rate = librosa.load(path + "/" + name, sr=1600) #uses default SR
            else: 
                sampling_rate,signal = scipy.io.wavfile.read(path + "/" + name)
            signals.append({
                "name":name,
                "signal": signal,
                "sr": sampling_rate,
                "label": name[:2].upper()
            })
    return signals

def padWithZeros(signals):
    longest_sample = max(list(map(lambda element: element["signal"].shape[0], signals)))
    for entry in signals:
        entry["signal"] = np.append(entry["signal"], np.zeros(longest_sample - entry["signal"].shape[0]))
    return signals

def encodedLabels(signals):
    labels = list(map(lambda element: element["label"],signals))
    le = preprocessing.LabelEncoder()
    le.fit(labels)
    for entry in signals:
        entry["encodedLabel"] = le.transform([entry["label"]])[0]
    return signals, le

def get_psf_mfcc(y, sr, window=scipy.signal.windows.hann, n_fft = 2048, win_length = None, n_mfcc=13, hop_length=512):
    if not win_length:
        win_length = 0.025
    if not hop_length:
        hop_length = 0.01
    return psf.mfcc(signal = y,
                   samplerate= sr,
                   winlen = win_length/sr,
                   winfunc = window,
                   numcep = n_mfcc,
                    winstep = hop_length/sr,
                    nfft=n_fft
                   )

def calcMfccs(signals, window=scipy.signal.windows.hann, n_fft = 2048, hop_length = 512, win_length = None, n_mfcc= 13, useLibrosa = True):
    for entry in signals:
        if useLibrosa:
            mfccs = librosa.feature.mfcc(y=entry["signal"], n_mfcc=n_mfcc, sr=entry["sr"],
                                        window = window,
                                        n_fft= n_fft,
                                        hop_length = hop_length,
                                        win_length = win_length)
        else: 
            mfccs = get_psf_mfcc(y=entry["signal"],sr = entry["sr"], n_mfcc = n_mfcc, hop_length = hop_length, n_fft= n_fft, win_length = win_length)
        entry["mfccs"] = mfccs
        entry["delta"] = librosa.feature.delta(mfccs)
        entry["delta2"] = librosa.feature.delta(mfccs, order=2)
    return signals

In [4]:
signals, labelEncoder = encodedLabels(padWithZeros(readFiles()))

In [5]:
signals = calcMfccs(signals)

In [None]:
#come in handy block

plt.figure(figsize=(25,10))
librosa.display.specshow(signals[0]["mfccs"], x_axis="time", sr=signals[0]["sr"])
plt.colorbar(format="%+2f")
plt.show()

# np.concatenate((mfccs, delta_mfccs, delta2_mfccs))

In [14]:
#output:
#signals[0].keys()
signals[0]["sr"]

16000

In [23]:
psf_signals, psf_labelEncoder = encodedLabels(padWithZeros(readFiles(False)))
psf_signals = calcMfccs(psf_signals, useLibrosa=False)

  sampling_rate,signal = scipy.io.wavfile.read(path + "/" + name)


--------
# Feature extraction with python_speech_features

In [1]:
#0.025s * 16000 sample rate = 400 samples

In [6]:
def export_to_pickle(filename, data):
    import pickle
    with open(filename+'.pickle', 'wb') as f:
        pickle.dump(data, f)

In [7]:
export_to_pickle("librosa_signals", signals)
export_to_pickle("labelEncoder",labelEncoder)

In [31]:
export_to_pickle("psf_signals", psf_signals)
export_to_pickle('psf_le', psf_labelEncoder)
export_to_pickle("librosa_signals", signals)
export_to_pickle("labelEncoder",labelEncoder)