In [1]:
import os
import numpy as np
import pandas as pd
import librosa as lb
import librosa.display
import matplotlib.pyplot as plt
import csv
import numpy as np

## Create DataFrame and columns with Names and Numbers of samples

https://github.com/Jakobovski/free-spoken-digit-dataset

6 speakers, 3000 recordings(50 of each digit per speaker)

In [2]:
list_recording_files = (os.listdir('recordings'))

In [3]:
df = pd.DataFrame(list_recording_files)

In [4]:
df = df.rename(columns={ df.columns[0]:'File'})

In [5]:
df['split'] = df['File'].str.split('_')

In [6]:
df

Unnamed: 0,File,split
0,0_george_0.wav,"[0, george, 0.wav]"
1,0_george_1.wav,"[0, george, 1.wav]"
2,0_george_10.wav,"[0, george, 10.wav]"
3,0_george_11.wav,"[0, george, 11.wav]"
4,0_george_12.wav,"[0, george, 12.wav]"
...,...,...
2995,9_yweweler_5.wav,"[9, yweweler, 5.wav]"
2996,9_yweweler_6.wav,"[9, yweweler, 6.wav]"
2997,9_yweweler_7.wav,"[9, yweweler, 7.wav]"
2998,9_yweweler_8.wav,"[9, yweweler, 8.wav]"


In [7]:
df['Number'] = df['split'].str[0]
df['Name'] = df['split'].str[1]
df.drop('split', inplace=True, axis=1)

In [8]:
df

Unnamed: 0,File,Number,Name
0,0_george_0.wav,0,george
1,0_george_1.wav,0,george
2,0_george_10.wav,0,george
3,0_george_11.wav,0,george
4,0_george_12.wav,0,george
...,...,...,...
2995,9_yweweler_5.wav,9,yweweler
2996,9_yweweler_6.wav,9,yweweler
2997,9_yweweler_7.wav,9,yweweler
2998,9_yweweler_8.wav,9,yweweler


In [9]:
df.to_csv('samples.csv', index=False)

## get MFCC to CSV

In [10]:
csvPath = 'samples.csv'
metadata = pd.read_csv(csvPath)
metadata.head()

Unnamed: 0,File,Number,Name
0,0_george_0.wav,0,george
1,0_george_1.wav,0,george
2,0_george_10.wav,0,george
3,0_george_11.wav,0,george
4,0_george_12.wav,0,george


In [11]:
for index, row in metadata.iterrows():
    filename = (row[0])
    number = (row[1])
    name = (row[2])
    y, sr = lb.load(f'./recordings/{filename}')
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{filename} {number} {name} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    
    file = open('data.csv', 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(to_append.split())

In [12]:
data = pd.read_csv('data.csv', names= ['filename', 'number', 'name', 'chroma_stft', 'rmse', 'spec_cent', 'spec_bw', 'rolloff', 'zcr', 'mfcc0', 'mfcc1','mfcc2','mfcc3','mfcc4','mfcc5','mfcc6','mfcc7','mfcc8','mfcc9','mfcc10','mfcc11','mfcc12','mfcc13','mfcc14','mfcc15','mfcc16','mfcc17','mfcc18','mfcc19'])

In [13]:
data

Unnamed: 0,filename,number,name,chroma_stft,rmse,spec_cent,spec_bw,rolloff,zcr,mfcc0,...,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19
0,0_george_0.wav,0,george,0.198667,0.083719,1389.432001,1122.693199,2620.425180,0.060171,-312.483826,...,-24.018995,-43.016975,2.110411,-6.013774,-28.540850,-3.117069,-5.603108,-13.659479,-2.320226,-12.169437
1,0_george_1.wav,0,george,0.273889,0.042972,1034.395917,974.723142,2113.980807,0.046744,-379.066345,...,-19.969303,-40.412582,-2.325323,-6.705203,-25.866652,-7.142958,-10.470287,-16.260077,-5.955035,-14.368776
2,0_george_10.wav,0,george,0.342532,0.032147,1269.075083,1003.382474,2388.227983,0.076956,-424.772461,...,-23.223995,-34.547401,-1.589133,-11.633971,-23.948620,-0.723396,-4.458993,-10.715401,-1.525208,-11.829735
3,0_george_11.wav,0,george,0.255208,0.041286,1218.094283,1049.359689,2413.872070,0.062061,-389.584808,...,-23.147619,-41.559254,-0.668476,-12.243627,-26.992029,1.255178,-5.872051,-13.653750,-2.406699,-15.252200
4,0_george_12.wav,0,george,0.222804,0.041336,1146.683549,994.639615,2244.836426,0.061479,-389.274689,...,-21.835735,-41.768703,-1.458749,-10.668156,-28.250916,-1.996623,-7.960970,-16.239616,-3.517600,-17.073278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,9_yweweler_5.wav,9,yweweler,0.374018,0.008558,985.559078,884.080353,1823.593140,0.058289,-539.953735,...,10.374135,-23.454926,2.178336,0.333495,-16.196772,-3.123700,-9.697893,-11.340939,1.610538,-8.898899
5996,9_yweweler_6.wav,9,yweweler,0.408219,0.011599,1204.300650,911.623800,2174.853516,0.071094,-491.344299,...,11.993781,-14.578753,5.613831,0.955333,-12.609158,-2.106348,-12.828856,-10.495636,5.215742,-9.894784
5997,9_yweweler_7.wav,9,yweweler,0.376584,0.008699,1070.728559,908.047852,1971.633911,0.052460,-511.855133,...,6.611096,-17.942572,2.736619,-5.966835,-16.471325,-2.094696,-15.326060,-16.683109,-1.763444,-13.693180
5998,9_yweweler_8.wav,9,yweweler,0.352586,0.005564,1031.310462,940.535710,1887.145996,0.056478,-559.762512,...,7.000213,-17.065546,7.002769,-3.895181,-14.850399,1.386613,-11.851810,-10.300099,6.005344,-9.889004


In [14]:
data.to_csv('rec_data.csv', index=False)