In [1]:
import os
import librosa
import numpy as np
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")

N_PEOPLE = 4
N_DIGITS = 10
PEOPLE = {"jackson":0, "nicolas":1, "theo":2, "other":3}

In [2]:
def wav2mfcc(file_path, max_pad_len=25):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    wave = wave[::3]
    mfcc = librosa.feature.mfcc(wave, sr=16000)
    pad_width = max_pad_len - mfcc.shape[1]
    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfcc

In [3]:
def extract_name_digit(file):
    parts = file.split("_")
    digit = int(parts[0])
    name = parts[1]
    if PEOPLE.get(name) == None:
        name = "other"
        
    return name, digit

In [4]:
def extract_labels(name, digit):
    label = np.zeros((1, N_PEOPLE * N_DIGITS), dtype=np.float32)
    index = PEOPLE.get(name) * N_DIGITS + digit
    label[0, index] = 1.0
    return label

In [5]:
def save_numpy_array_tocsv(file_name, arr, cols):
    pd.DataFrame(arr).to_csv(file_name, sep=',', index=False, columns=cols)

In [6]:
def wav_info_extractor(dirpath, ext):
    path = "./" + dirpath
    mfcc_vectors = []
    label_vectors = []
    digit_vectors = []
    file_vectors = []
    for file in os.listdir(path):
        if file.endswith(ext):
            file_path = os.path.join(path, file)
            
            mfcc = wav2mfcc(file_path=file_path)
            mfcc_vectors.append(mfcc)
            
            name, digit = extract_name_digit(file)
            label = extract_labels(name, digit)
            label_vectors.append(label)
            
            digit_vectors.append(digit)
            file_vectors.append(file_path)
    
    features = np.array(mfcc_vectors)
    features = np.reshape(features, (features.shape[0], features.shape[1] * features.shape[2]))
    
    labels = np.array(label_vectors) 
    labels = np.reshape(labels, (labels.shape[0], labels.shape[1] * labels.shape[2]))
    
    digits = np.array(digit_vectors)
    digits = np.reshape(digits, (digits.shape[0], 1))
    
    files = np.array(file_vectors)
    
    return features, labels, digits, files

In [7]:
features, labels, digits, files = wav_info_extractor(dirpath="recordings/", ext=".wav")

In [8]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
features = scalar.fit_transform(features)

In [9]:
save_numpy_array_tocsv(file_name="features.csv", arr=features, cols=[x for x in range(features.shape[1])])
save_numpy_array_tocsv(file_name="labels.csv", arr=labels, cols=[x for x in range(labels.shape[1])])
save_numpy_array_tocsv(file_name="digits.csv", arr=digits, cols=[x for x in range(digits.shape[1])])
save_numpy_array_tocsv(file_name="files.csv", arr=files, cols=[0])