In [2]:
# Example 1: short-term feature extraction
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 
import numpy as np 
import plotly.graph_objs as go 
import plotly
import IPython

# read audio data from file 
# (returns sampling freq and signal as a numpy array)
fs, s = aIO.read_audio_file("data/object.wav")

# play the initial and the generated files in notebook:
IPython.display.display(IPython.display.Audio("data/object.wav"))

# print duration in seconds:
duration = len(s) / float(fs)
print(f'duration = {duration} seconds')

# extract short-term features using a 50msec non-overlapping windows
win, step = 0.050, 0.050
[f, fn] = aF.feature_extraction(s, fs, int(fs * win), 
                                int(fs * step))
print(f'{f.shape[1]} frames, {f.shape[0]} short-term features')
print('Feature names:')
for i, nam in enumerate(fn):
    print(f'{i}:{nam}')
# plot short-term energy
# create time axis in seconds
time = np.arange(0, duration - step, win) 
# get the feature whose name is 'energy'
energy = f[fn.index('energy'), :]
mylayout = go.Layout(yaxis=dict(title="frame energy value"),
                     xaxis=dict(title="time (sec)"))
plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, 
                                                y=energy)], 
                               layout=mylayout))


duration = 1.03 seconds
20 frames, 68 short-term features
Feature names:
0:zcr
1:energy
2:energy_entropy
3:spectral_centroid
4:spectral_spread
5:spectral_entropy
6:spectral_flux
7:spectral_rolloff
8:mfcc_1
9:mfcc_2
10:mfcc_3
11:mfcc_4
12:mfcc_5
13:mfcc_6
14:mfcc_7
15:mfcc_8
16:mfcc_9
17:mfcc_10
18:mfcc_11
19:mfcc_12
20:mfcc_13
21:chroma_1
22:chroma_2
23:chroma_3
24:chroma_4
25:chroma_5
26:chroma_6
27:chroma_7
28:chroma_8
29:chroma_9
30:chroma_10
31:chroma_11
32:chroma_12
33:chroma_std
34:delta zcr
35:delta energy
36:delta energy_entropy
37:delta spectral_centroid
38:delta spectral_spread
39:delta spectral_entropy
40:delta spectral_flux
41:delta spectral_rolloff
42:delta mfcc_1
43:delta mfcc_2
44:delta mfcc_3
45:delta mfcc_4
46:delta mfcc_5
47:delta mfcc_6
48:delta mfcc_7
49:delta mfcc_8
50:delta mfcc_9
51:delta mfcc_10
52:delta mfcc_11
53:delta mfcc_12
54:delta mfcc_13
55:delta chroma_1
56:delta chroma_2
57:delta chroma_3
58:delta chroma_4
59:delta chroma_5
60:delta chroma_6
61:delta c

In [3]:
# Example 2: short-term feature extraction:
# spectral centroid of two speakers
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 
import numpy as np 
import plotly.graph_objs as go 
import plotly
import IPython

# read audio data from file 
# (returns sampling freq and signal as a numpy array)
fs, s = aIO.read_audio_file("data/trump_bugs.wav")

# play the initial and the generated files in notebook:
IPython.display.display(IPython.display.Audio("data/trump_bugs.wav"))

# print duration in seconds:
duration = len(s) / float(fs)
print(f'duration = {duration} seconds')

# extract short-term features using a 50msec non-overlapping windows
win, step = 0.050, 0.050
[f, fn] = aF.feature_extraction(s, fs, int(fs * win), 
                                int(fs * step))
print(f'{f.shape[1]} frames, {f.shape[0]} short-term features')

# plot short-term energy
# create time axis in seconds
time = np.arange(0, duration - step, win) 
# get the feature whose name is 'energy'
energy = f[fn.index('spectral_centroid'), :]
mylayout = go.Layout(yaxis=dict(title="spectral_centroid value"),
                     xaxis=dict(title="time (sec)"))
plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, 
                                                y=energy)], 
                               layout=mylayout))

duration = 3.812625 seconds
76 frames, 68 short-term features


In [4]:
# Example 3: segment-level feature extraction
from pyAudioAnalysis import MidTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 

# read audio data from file 
# (returns sampling freq and signal as a numpy array)
fs, s = aIO.read_audio_file("data/trump_bugs.wav")

# get mid-term (segment) feature statistics 
# and respective short-term features:
mt, st, mt_n = aF.mid_feature_extraction(s, fs, 1 * fs, 1 * fs, 
                                         0.05 * fs, 0.05 * fs)
print(f'signal duration {len(s)/fs} seconds')
print(f'{st.shape[1]} {st.shape[0]}-D short-term feature vectors extracted')
print(f'{mt.shape[1]} {mt.shape[0]}-D segment feature statistic vectors extracted')
print('mid-term feature names')
for i, mi in enumerate(mt_n):
    print(f'{i}:{mi}')

signal duration 3.812625 seconds
76 68-D short-term feature vectors extracted
4 136-D segment feature statistic vectors extracted
mid-term feature names
0:zcr_mean
1:energy_mean
2:energy_entropy_mean
3:spectral_centroid_mean
4:spectral_spread_mean
5:spectral_entropy_mean
6:spectral_flux_mean
7:spectral_rolloff_mean
8:mfcc_1_mean
9:mfcc_2_mean
10:mfcc_3_mean
11:mfcc_4_mean
12:mfcc_5_mean
13:mfcc_6_mean
14:mfcc_7_mean
15:mfcc_8_mean
16:mfcc_9_mean
17:mfcc_10_mean
18:mfcc_11_mean
19:mfcc_12_mean
20:mfcc_13_mean
21:chroma_1_mean
22:chroma_2_mean
23:chroma_3_mean
24:chroma_4_mean
25:chroma_5_mean
26:chroma_6_mean
27:chroma_7_mean
28:chroma_8_mean
29:chroma_9_mean
30:chroma_10_mean
31:chroma_11_mean
32:chroma_12_mean
33:chroma_std_mean
34:delta zcr_mean
35:delta energy_mean
36:delta energy_entropy_mean
37:delta spectral_centroid_mean
38:delta spectral_spread_mean
39:delta spectral_entropy_mean
40:delta spectral_flux_mean
41:delta spectral_rolloff_mean
42:delta mfcc_1_mean
43:delta mfcc_2_mea

In [5]:
# Example4: plot 2 features for 10 2-second samples 
# from classical and 10 from metal music

from pyAudioAnalysis import MidTermFeatures as aF
import os
import numpy as np
import plotly.graph_objs as go 
import plotly

dirs = ["data/music/classical", "data/music/metal"] 
class_names = [os.path.basename(d) for d in dirs] 
m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 

# segment-level feature extraction:
features = [] 
for d in dirs: # get feature matrix for each directory (class) 
    f, files, fn = aF.directory_feature_extraction(d, m_win, m_step, 
                                                   s_win, s_step) 
    features.append(f)
# (each element of the features list contains a 
# (samples x segment features) = (10 x 138) feature matrix)
print(features[0].shape, features[1].shape)
    
# select 2 features and create feature matrices for the two classes:
f1 = np.array([features[0][:, fn.index('spectral_centroid_mean')],
               features[0][:, fn.index('energy_entropy_mean')]])
f2 = np.array([features[1][:, fn.index('spectral_centroid_mean')],
               features[1][:, fn.index('energy_entropy_mean')]])

# plot 2D features
plots = [go.Scatter(x=f1[0, :],  y=f1[1, :], 
                    name=class_names[0], mode='markers'),
         go.Scatter(x=f2[0, :], y=f2[1, :], 
                    name=class_names[1], mode='markers')]
mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                     yaxis=dict(title="energy_entropy_mean"))
plotly.offline.iplot(go.Figure(data=plots, layout=mylayout))

Analyzing file 1 of 10: data/music/classical/classical.00012.au.wav
Analyzing file 2 of 10: data/music/classical/classical.00017.au.wav
Analyzing file 3 of 10: data/music/classical/classical.00023.au.wav
Analyzing file 4 of 10: data/music/classical/classical.00027.au.wav
Analyzing file 5 of 10: data/music/classical/classical.00030.au.wav
Analyzing file 6 of 10: data/music/classical/classical.00040.au.wav
Analyzing file 7 of 10: data/music/classical/classical.00048.au.wav
Analyzing file 8 of 10: data/music/classical/classical.00052.au.wav
Analyzing file 9 of 10: data/music/classical/classical.00057.au.wav
Analyzing file 10 of 10: data/music/classical/classical.00062.au.wav
Feature extraction complexity ratio: 2.1 x realtime
Analyzing file 1 of 10: data/music/metal/metal.00015.au.wav
Analyzing file 2 of 10: data/music/metal/metal.00023.au.wav
Analyzing file 3 of 10: data/music/metal/metal.00034.au.wav
Analyzing file 4 of 10: data/music/metal/metal.00038.au.wav
Analyzing file 5 of 10: dat

In [6]:
# Example5: plot 2 features for 10 2-second samples 
# from classical and 10 from metal music. 
# also train an SVM classifier and draw the respective
# decision surfaces

from pyAudioAnalysis import MidTermFeatures as aF
import os
import numpy as np
from sklearn.svm import SVC
import plotly.graph_objs as go 
import plotly

dirs = ["data/music/classical", "data/music/metal"] 
class_names = [os.path.basename(d) for d in dirs] 
m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 

# segment-level feature extraction:
features = [] 
for d in dirs: # get feature matrix for each directory (class) 
    f, files, fn = aF.directory_feature_extraction(d, m_win, m_step, 
                                                   s_win, s_step) 
    features.append(f)
    
# select 2 features and create feature matrices for the two classes:
f1 = np.array([features[0][:, fn.index('spectral_centroid_mean')],
               features[0][:, fn.index('energy_entropy_mean')]])
f2 = np.array([features[1][:, fn.index('spectral_centroid_mean')],
               features[1][:, fn.index('energy_entropy_mean')]])

# plot 2D features
p1 = go.Scatter(x=f1[0, :],  y=f1[1, :], name=class_names[0],
                marker=dict(size=10,color='rgba(255, 182, 193, .9)'),
                mode='markers')
p2 = go.Scatter(x=f2[0, :], y=f2[1, :],  name=class_names[1], 
                marker=dict(size=10,color='rgba(100, 100, 220, .9)'),
                mode='markers')
mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                     yaxis=dict(title="energy_entropy_mean"))

y = np.concatenate((np.zeros(f1.shape[1]), np.ones(f2.shape[1]))) 
f = np.concatenate((f1.T, f2.T), axis = 0)

# train the svm classifier
cl = SVC(kernel='rbf', C=20) 
cl.fit(f, y) 
# apply the trained model on the points of a grid
x_ = np.arange(f[:, 0].min(), f[:, 0].max(), 0.002) 
y_ = np.arange(f[:, 1].min(), f[:, 1].max(), 0.002) 
xx, yy = np.meshgrid(x_, y_) 
Z = cl.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) / 2 
# and visualize the grid on the same plot (decision surfaces)
cs = go.Heatmap(x=x_, y=y_, z=Z, showscale=False, 
               colorscale= [[0, 'rgba(255, 182, 193, .3)'], 
                           [1, 'rgba(100, 100, 220, .3)']]) 
mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                     yaxis=dict(title="energy_entropy_mean"))
plotly.offline.iplot(go.Figure(data=[p1, p2, cs], layout=mylayout))


Analyzing file 1 of 10: data/music/classical/classical.00012.au.wav
Analyzing file 2 of 10: data/music/classical/classical.00017.au.wav
Analyzing file 3 of 10: data/music/classical/classical.00023.au.wav
Analyzing file 4 of 10: data/music/classical/classical.00027.au.wav
Analyzing file 5 of 10: data/music/classical/classical.00030.au.wav
Analyzing file 6 of 10: data/music/classical/classical.00040.au.wav
Analyzing file 7 of 10: data/music/classical/classical.00048.au.wav
Analyzing file 8 of 10: data/music/classical/classical.00052.au.wav
Analyzing file 9 of 10: data/music/classical/classical.00057.au.wav
Analyzing file 10 of 10: data/music/classical/classical.00062.au.wav
Feature extraction complexity ratio: 2.1 x realtime
Analyzing file 1 of 10: data/music/metal/metal.00015.au.wav
Analyzing file 2 of 10: data/music/metal/metal.00023.au.wav
Analyzing file 3 of 10: data/music/metal/metal.00034.au.wav
Analyzing file 4 of 10: data/music/metal/metal.00038.au.wav
Analyzing file 5 of 10: dat

In [7]:
# Example6: use pyAudioAnalysis wrapper 
# to extract feature and train SVM classifier 
# for 20 music (10 classical/10 metal) song samples

from pyAudioAnalysis.audioTrainTest import extract_features_and_train
mt, st = 1.0, 0.05
dirs = ["data/music/classical", "data/music/metal"] 
extract_features_and_train(dirs, mt, mt, st, st, "svm_rbf", "svm_classical_metal")


Analyzing file 1 of 10: data/music/classical/classical.00012.au.wav
Analyzing file 2 of 10: data/music/classical/classical.00017.au.wav
Analyzing file 3 of 10: data/music/classical/classical.00023.au.wav
Analyzing file 4 of 10: data/music/classical/classical.00027.au.wav
Analyzing file 5 of 10: data/music/classical/classical.00030.au.wav
Analyzing file 6 of 10: data/music/classical/classical.00040.au.wav
Analyzing file 7 of 10: data/music/classical/classical.00048.au.wav
Analyzing file 8 of 10: data/music/classical/classical.00052.au.wav
Analyzing file 9 of 10: data/music/classical/classical.00057.au.wav
Analyzing file 10 of 10: data/music/classical/classical.00062.au.wav
Feature extraction complexity ratio: 52.6 x realtime
Analyzing file 1 of 10: data/music/metal/metal.00015.au.wav
Analyzing file 2 of 10: data/music/metal/metal.00023.au.wav
Analyzing file 3 of 10: data/music/metal/metal.00034.au.wav
Analyzing file 4 of 10: data/music/metal/metal.00038.au.wav
Analyzing file 5 of 10: da

Param = 0.50000 - classifier Evaluation Experiment 38 of 100
Param = 0.50000 - classifier Evaluation Experiment 39 of 100
Param = 0.50000 - classifier Evaluation Experiment 40 of 100
Param = 0.50000 - classifier Evaluation Experiment 41 of 100
Param = 0.50000 - classifier Evaluation Experiment 42 of 100
Param = 0.50000 - classifier Evaluation Experiment 43 of 100
Param = 0.50000 - classifier Evaluation Experiment 44 of 100
Param = 0.50000 - classifier Evaluation Experiment 45 of 100
Param = 0.50000 - classifier Evaluation Experiment 46 of 100
Param = 0.50000 - classifier Evaluation Experiment 47 of 100
Param = 0.50000 - classifier Evaluation Experiment 48 of 100
Param = 0.50000 - classifier Evaluation Experiment 49 of 100
Param = 0.50000 - classifier Evaluation Experiment 50 of 100
Param = 0.50000 - classifier Evaluation Experiment 51 of 100
Param = 0.50000 - classifier Evaluation Experiment 52 of 100
Param = 0.50000 - classifier Evaluation Experiment 53 of 100
Param = 0.50000 - classi

Param = 1.00000 - classifier Evaluation Experiment 85 of 100
Param = 1.00000 - classifier Evaluation Experiment 86 of 100
Param = 1.00000 - classifier Evaluation Experiment 87 of 100
Param = 1.00000 - classifier Evaluation Experiment 88 of 100
Param = 1.00000 - classifier Evaluation Experiment 89 of 100
Param = 1.00000 - classifier Evaluation Experiment 90 of 100
Param = 1.00000 - classifier Evaluation Experiment 91 of 100
Param = 1.00000 - classifier Evaluation Experiment 92 of 100
Param = 1.00000 - classifier Evaluation Experiment 93 of 100
Param = 1.00000 - classifier Evaluation Experiment 94 of 100
Param = 1.00000 - classifier Evaluation Experiment 95 of 100
Param = 1.00000 - classifier Evaluation Experiment 96 of 100
Param = 1.00000 - classifier Evaluation Experiment 97 of 100
Param = 1.00000 - classifier Evaluation Experiment 98 of 100
Param = 1.00000 - classifier Evaluation Experiment 99 of 100
Param = 1.00000 - classifier Evaluation Experiment 100 of 100
Param = 5.00000 - class

Param = 10.00000 - classifier Evaluation Experiment 31 of 100
Param = 10.00000 - classifier Evaluation Experiment 32 of 100
Param = 10.00000 - classifier Evaluation Experiment 33 of 100
Param = 10.00000 - classifier Evaluation Experiment 34 of 100
Param = 10.00000 - classifier Evaluation Experiment 35 of 100
Param = 10.00000 - classifier Evaluation Experiment 36 of 100
Param = 10.00000 - classifier Evaluation Experiment 37 of 100
Param = 10.00000 - classifier Evaluation Experiment 38 of 100
Param = 10.00000 - classifier Evaluation Experiment 39 of 100
Param = 10.00000 - classifier Evaluation Experiment 40 of 100
Param = 10.00000 - classifier Evaluation Experiment 41 of 100
Param = 10.00000 - classifier Evaluation Experiment 42 of 100
Param = 10.00000 - classifier Evaluation Experiment 43 of 100
Param = 10.00000 - classifier Evaluation Experiment 44 of 100
Param = 10.00000 - classifier Evaluation Experiment 45 of 100
Param = 10.00000 - classifier Evaluation Experiment 46 of 100
Param = 

Param = 20.00000 - classifier Evaluation Experiment 73 of 100
Param = 20.00000 - classifier Evaluation Experiment 74 of 100
Param = 20.00000 - classifier Evaluation Experiment 75 of 100
Param = 20.00000 - classifier Evaluation Experiment 76 of 100
Param = 20.00000 - classifier Evaluation Experiment 77 of 100
Param = 20.00000 - classifier Evaluation Experiment 78 of 100
Param = 20.00000 - classifier Evaluation Experiment 79 of 100
Param = 20.00000 - classifier Evaluation Experiment 80 of 100
Param = 20.00000 - classifier Evaluation Experiment 81 of 100
Param = 20.00000 - classifier Evaluation Experiment 82 of 100
Param = 20.00000 - classifier Evaluation Experiment 83 of 100
Param = 20.00000 - classifier Evaluation Experiment 84 of 100
Param = 20.00000 - classifier Evaluation Experiment 85 of 100
Param = 20.00000 - classifier Evaluation Experiment 86 of 100
Param = 20.00000 - classifier Evaluation Experiment 87 of 100
Param = 20.00000 - classifier Evaluation Experiment 88 of 100
Param = 

In [8]:
# Example7: use trained model from Example6 
# to classify an unknown sample (song)
from pyAudioAnalysis import audioTrainTest as aT
files_to_test = ["data/music/test/classical.00095.au.wav",
                 "data/music/test/metal.00004.au.wav",
                 "data/music/test/rock.00037.au.wav"]
for f in files_to_test:
    print(f'{f}:')
    c, p, p_nam = aT.file_classification(f, "svm_classical_metal","svm_rbf")
    print(f'P({p_nam[0]}={p[0]})')
    print(f'P({p_nam[1]}={p[1]})')
    print()


data/music/test/classical.00095.au.wav:
P(classical=0.7209757726699049)
P(metal=0.27902422733009513)

data/music/test/metal.00004.au.wav:
P(classical=0.06528456176913883)
P(metal=0.934715438230861)

data/music/test/rock.00037.au.wav:
P(classical=0.1598667500246094)
P(metal=0.8401332499753905)



In [9]:
# Example8: use trained model from Example6
# to classify audio files organized in folders  
# and evaluate the predictions, assuming that 
# foldernames = classes names as during training

from pyAudioAnalysis import audioTrainTest as aT
aT.evaluate_model_for_folders(["data/music/test/classical", 
                               "data/music/test/metal"],
                              "svm_classical_metal", 
                              "svm_rbf", 
                              "classical")


[[21  2]
 [ 5 17]]
[0.9130434782608695, 0.7727272727272727] [0.8076923076923077, 0.8947368421052632] [0.8571428571428572, 0.8292682926829269] 0.8444444444444444 0.8444444444444444


(array([[21,  2],
        [ 5, 17]]),
 array([0.3129702 , 0.43069888, 0.44629498, 0.45153444, 0.47177612,
        0.5       , 0.51524238, 0.52908438, 0.54182688, 0.5466315 ,
        0.56990164, 0.5723461 , 0.5984971 , 0.65070468, 0.69404812,
        0.6943266 , 0.75915432, 0.79075793, 0.90885072, 0.91644217,
        0.91966662, 0.93012789, 0.94087389, 0.96275864, 0.96945032,
        0.97132719, 0.97797007, 0.98279252, 0.98409325, 0.98788246,
        0.99530983]),
 array([0.74193548, 0.73333333, 0.75862069, 0.75      , 0.77777778,
        0.80769231, 0.84      , 0.83333333, 0.86956522, 0.90909091,
        0.9047619 , 0.95      , 0.94736842, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        ]),
 array([1.        , 0.95652174, 0.95652174, 0.91304348, 0.91304348,
        0.91304348, 0.

In [12]:
# Example9: 
# Train two linear SVM regression models
# that map song segments to pitch and pitch deviation
# The following function searches for .csv files in the 
# input folder. For each csv of the format <filename>,<value>
# a separate regresion model is trained

from pyAudioAnalysis import audioTrainTest as aT
aT.feature_extraction_train_regression("data/regression/f0/segments_train", 
                                       0.5, 0.5, 0.05, 0.05, 
                                       "svm", "singing", False)

Analyzing file 1 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_263.1595.wav
Analyzing file 2 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_264.957.wav
Analyzing file 3 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_301.632.wav
Analyzing file 4 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_328.748.wav
Analyzing file 5 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_331.2835.wav
Analyzing file 6 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_340.691.wav
Analyzing file 7 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_351.20450000000005.wav
Analyzing file 8 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_352.952.wav
Analyzing file 9 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_355.5175.wav
Analyzing file 10 of 120: data/regression/f0/segments_train/CSD_ER_alto_1.wav_segments_355.5800000000000

Analyzing file 89 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_277.908.wav
Analyzing file 90 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_292.791.wav
Analyzing file 91 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_294.52.wav
Analyzing file 92 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_310.715.wav
Analyzing file 93 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_329.986.wav
Analyzing file 94 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_330.364.wav
Analyzing file 95 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_331.14149999999995.wav
Analyzing file 96 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_332.3425.wav
Analyzing file 97 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_364.2785.wav
Analyzing file 98 of 120: data/regression/f0/segments_train/CSD_LI_alto_4.wav_segments_368.4835.w

([323.2007973561379, 689.1165160386663],
 [522.1201211817348, 3087.7069154520095],
 [0.5, 1.0])

In [32]:
# Example10
# load trained regression model for f0 and apply it to a folder
# of WAV files and evaluate (use csv file with ground truths)

import glob
import csv
import os
import numpy as np
import plotly.graph_objs as go 
import plotly
from pyAudioAnalysis import audioTrainTest as aT

# read all files in testing folder:
wav_files_to_test = glob.glob("data/regression/f0/segments_test/*.wav")
ground_truths = {}
with open('data/regression/f0/segments_test/f0.csv', 'r') as file:
    reader = csv.reader(file, delimiter = ',')
    for row in reader:
        ground_truths[row[0]] = float(row[1])
estimated_val, gt_val = [], []

for w in wav_files_to_test: # for each audio file
    # get the estimates for all regression models starting with "singing"
    values, tasks = aT.file_regression(w, "singing", "svm")
    # check if there is ground truth available for the current file
    if os.path.basename(w) in ground_truths:
        # ... and append ground truth and estimated values 
        # for the f0 task
        estimated_val.append(values[tasks.index('f0')])
        gt_val.append(ground_truths[os.path.basename(w)])

# compute mean square error:
mse = ((np.array(estimated_val) - np.array(gt_val))**2).mean()
print(f'Testing MSE={mse}')

# plot real vs predicted results
p = go.Scatter(x=gt_val,  y=estimated_val, mode='markers')
mylayout = go.Layout(xaxis=dict(title="f0 real"),
                     yaxis=dict(title="f0 predicted"), 
                     showlegend=False)
plotly.offline.iplot(go.Figure(data=[p, 
                                     go.Scatter(x=[min(gt_val+
                                                       estimated_val), 
                                                   max(gt_val+
                                                       estimated_val)], 
                                                   y=[min(gt_val+
                                                          estimated_val), 
                                                      max(gt_val+
                                                          estimated_val)])], 
                               layout=mylayout))

Testing MSE=492.7141430351564
