In [42]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

datadir = "../data"
gui_train = "../ptdb_tug.gui"
gui_test = "../fda_ue.gui"
windowlength = 32
frameshift = 15
padding = 16

In [43]:
def get_zero_crossing(frame):
    """
    Computes zero crossing rate of frame
    """
    count = len(frame)
    countZ = np.sum(np.abs(np.diff(np.sign(frame)))) / 2
    return np.float64(countZ) / np.float64(count-1.0)


def get_energy(frame):
    """
    Computes signal energy of frame
    """
    return np.sum(frame ** 2) / np.float64(len(frame))

In [44]:
from scipy.io import wavfile

def get_data(gui):
    
    wav_files = []
    voicing_results = []
    sample_rate = 0

    with open(gui) as f:
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            file_name = os.path.join(datadir, line + ".wav")
            voicing_file = os.path.join(datadir, line + ".f0ref")
            sample_rate, data = wavfile.read(file_name)

            nSamples = len(data)

            # From miliseconds to samples
            ns_windowlength = int(round((windowlength * sample_rate) / 1000))
            ns_frameshift = int(round((frameshift * sample_rate) / 1000))
            ns_padding = int(round((padding * sample_rate) / 1000))

            frames = []
            for ini in range(-ns_padding, nSamples - ns_windowlength + ns_padding + 1, ns_frameshift):
                first_sample = max(0, ini)
                last_sample = min(nSamples, ini + ns_windowlength)
                frame = data[first_sample:last_sample]
                frames.append(frame)

            wav_files += frames

            with open(voicing_file) as v:
                for line in v:
                    if line == "0":
                        voicing_results.append(False)
                    else:
                        voicing_results.append(True)

    np_wav = np.array(wav_files)
    np_voicing = np.array(voicing_results)
    print("nb_samples_wav", np_wav.size)
    print("nb_samples_voicing", np_voicing.size)
    print("sample_rate", sample_rate)
    np_wav = np_wav[:len(np_voicing)]
    
    formated_data = []
    for data in np_wav:
        zc = get_zero_crossing(data)
        energy = get_energy(data)
        formated_data.append([zc, energy])
    np_formated = np.array(formated_data)
    np_formated[:5]
    
    return formated_data, np_voicing

In [46]:
formated_data_test, np_voicing_test = get_data(gui_test)
formated_data_train, np_voicing_train = get_data(gui_train)

nb_samples_wav 22175
nb_samples_voicing 22140
sample_rate 20000
nb_samples_wav 2308001
nb_samples_voicing 3429011
sample_rate 48000


In [47]:
formated_train = np_formated[:len(np_voicing) - len(np_voicing)//10]
voicing_train = np_voicing[:len(np_voicing) - len(np_voicing)//10]

formated_test = np_formated[len(np_voicing) - len(np_voicing)//10: len(np_voicing)]
voicing_test = np_voicing[len(np_voicing) - len(np_voicing)//10: len(np_voicing)]

In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict

forest_clf = RandomForestClassifier(random_state=42)
forest_clf.fit(formated_train, voicing_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

In [49]:
res = forest_clf.predict(formated_test)

In [50]:
number_failures = 0
for i in range(len(formated_test)):
    if res[i] != voicing_test[i]:
        number_failures += 1
print(number_failures/len(formated_test))

0.0
