# Testing feature extraction

In [3]:
from python_speech_features import mfcc, logfbank
from scipy.io import wavfile
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def plot_signal(signal):
    plt.plot(list(signal))

def plot_fft(fft):
    data = list(fft)
    Y, freq = data[0], data[1]
    plt.plot(freq, Y)

def plot_fbank(fbank):
    plt.imshow(list(fbank),
        cmap='hot', interpolation='nearest')

def plot_mfccs(mfccs):
    plt.imshow(list(mfccs),
        cmap='hot', interpolation='nearest')

def calc_fft(signal, rate):
    n = len(signal)
    freq = np.fft.rfftfreq(n, d=1/rate)
    Y = abs(np.fft.rfft(signal)/n)
    return (Y, freq)

In [None]:
rate, signal = wavfile.read('data/FSDKaggle2018.audio_test/00326aa9.wav')

In [None]:
# plot_signal(signal)

In [None]:
# fft = calc_fft(signal, rate)
# plot_fft(fft)

In [None]:
# bank = logfbank(signal[:rate], rate, nfilt=26, nfft=1103).T
# plot_fbank(bank)

In [None]:
mel = mfcc(signal[:rate], rate, numcep=13, nfilt=26, nfft=1102).T
print(np.shape(mel))
plot_mfccs(mel)

In [None]:
print(mel)

## Calculate Raw Value Histogram Bins

In [None]:
import pandas as pd
import math

df = pd.read_csv('data/FSDKaggle2018.meta/test_post_competition_scoring_clips.csv', delimiter=',')
tuples = [tuple(x) for x in df.values][:5]

sample_count = 0
mels = []

# Encode all features
for i in range(len(tuples)):
    file = 'data/FSDKaggle2018.audio_test/' + tuples[i][0]
    #print(file)
    rate, signal = wavfile.read(file)
    mel = mfcc(signal, rate, numcep=13, nfilt=26, nfft=1103)
    sample_count += np.shape(mel)[0]
    mels.append(mel)
    
n = sample_count * 13
values = np.zeros(n)

# create a sorted list of all feature values
m = 0
for i in range(len(tuples)):
    
    mel = mels[i]
    
    for k in range(np.shape(mel)[1]): # for each feature
        
        for j in range(np.shape(mel)[0]): # for each value
            
            values[m] = mels[i][j,k]
            m += 1

# for i in range(len(values)):
#     print(get_bin(values[i]))
    
values = np.sort(values)

print(np.shape(values))

bin_count = 16
bin_size = math.floor(n/bin_count)

for i in range(bin_count):
    print(i, bin_size, values[i*bin_size])

In [None]:
bins = [
-29.986096130739355, 
-19.195566310391943
,-15.00016366240139
,-10.998076289023
,-6.748456575899296
,-3.495771366216079
,0.12159938915146001
,3.2165856050505877
,6.215759303055032
,8.07164885810958
,10.26041457182332
,12.93044695246801
,15.819326383946992
,18.75778148814183
,21.376683980339166]

def get_bin(value):
    for i in range(len(bins)):
        if value < bins[i]:
            return i
    return len(bins)

## Calculate Delta Value Histogram Bins

In [None]:
import pandas as pd
import math

df = pd.read_csv('data/FSDKaggle2018.meta/test_post_competition_scoring_clips.csv', delimiter=',')
tuples = [tuple(x) for x in df.values]

sample_count = 0
mels = []

# Encode all features
for i in range(len(tuples)):
    file = 'data/FSDKaggle2018.audio_test/' + tuples[i][0]
    #print(file)
    rate, signal = wavfile.read(file)
    mel = mfcc(signal, rate, numcep=13, nfilt=26, nfft=1103)
    sample_count += np.shape(mel)[0]
    mels.append(mel)
    

values_list = []

# create a sorted list of all feature values

last_v = float('nan')
for i in range(len(tuples)):
    
    mel = mels[i]
    
    for k in range(np.shape(mel)[1]): # for each feature
        
        for j in range(np.shape(mel)[0]): # for each value
            
            v = mels[i][j,k]
            
            if not math.isnan(last_v):
                delta = v - last_v
                values_list.append(delta)
#                 print(get_delta_bin(delta))
                      
            last_v = v

values = np.sort(np.array(values_list))

print(np.shape(values))

bin_count = 16
bin_size = math.floor(len(values)/bin_count)

for i in range(bin_count):
    print(i, bin_size, values[i*bin_size])

In [5]:
delta_bins = [
-7.449361934437546,
-4.959642638189663,
-3.5063742894420997,
-2.4518990367612536,
-1.6107117954938024,
-0.9037185755899212,
-0.3158629551912462,
-0.007594296904398945,
0.3127680464479745,
0.9294957576276741,
1.644535296152842,
2.487457184027898,
3.5417890204116764,
4.989806561955705,
7.457516420676731]

delta_zero_bin = 8

def get_delta_bin(value):
    for i in range(len(delta_bins)):
        if value < delta_bins[i]:
            return i
    return len(delta_bins)

## Calculate Interval Histogram Bins

In [31]:
import pandas as pd
import math


def get_interval(current_time, history_times, current_delta_bin):
    
    t = history_times[delta_zero_bin]

    if t == -1:
        return -1
    else:
        return current_time - t
    
    
df = pd.read_csv('data/FSDKaggle2018.meta/test_post_competition_scoring_clips.csv', delimiter=',')
tuples = [tuple(x) for x in df.values]

sample_count = 0
mels = []

# Encode all features
for i in range(len(tuples)):
    file = 'data/FSDKaggle2018.audio_test/' + tuples[i][0]
    rate, signal = wavfile.read(file)
    mel = mfcc(signal, rate, numcep=13, nfilt=26, nfft=1103)
    sample_count += np.shape(mel)[0]
    mels.append(mel)
    

V = []

for i in range(len(mels)): # for each sample
    
    mel = mels[i]
    
    for k in range(np.shape(mel)[1]): # for each feature
        
        history_times = np.full((16,), -1)
        t = 0
        last_delta_bin_1 = -1
        last_delta_bin_2 = -1
        
        last_v_1 = float('nan')
        
        for j in range(np.shape(mel)[0]): # for each value
            
            next_v = mel[j,k]
            current_v = last_v_1
            current_delta_bin = last_delta_bin_1
            last_delta_bin = last_delta_bin_2
            
            if not math.isnan(current_v):
                next_delta = next_v - current_v   
                next_delta_bin = get_delta_bin(next_delta)
                
                if current_delta_bin > -1:
                    current_interval = get_interval(t, history_times, current_delta_bin)
                    V.append(current_interval)
     
                    history_times[current_delta_bin] = t

                    last_delta_bin_2 = current_delta_bin
                    t+= 1
                    
                last_delta_bin_1 = next_delta_bin
                
            last_v_1 = next_v

            
V1 = np.array(V)
V1 = np.sort(V1)

print(np.shape(V1))

bin_count = 16
bin_size = math.floor(len(V1)/bin_count)

for i in range(bin_count):
    print(i, bin_size, V1[i*bin_size])

(10761296,)
0 672581 -1
1 672581 1
2 672581 2
3 672581 3
4 672581 5
5 672581 7
6 672581 10
7 672581 13
8 672581 16
9 672581 20
10 672581 25
11 672581 31
12 672581 38
13 672581 48
14 672581 63
15 672581 89


In [32]:
interval_bins = [
1,
2,
3,
5,
7,
10,
13,
16,
20,
25,
31,
38,
48,
63,
89
]


def get_interval_bin(interval_value):
    for i in reversed(range(len(interval_bins))):
        if interval_value >= interval_bins[i]:
            return i+1
    return 0


## Generate Transition Histogram

In [33]:
hist = np.zeros((13,16,16,16)) # feature, last_state, interval, current_state

import pandas as pd
import math

df = pd.read_csv('data/FSDKaggle2018.meta/test_post_competition_scoring_clips.csv', delimiter=',')
tuples = [tuple(x) for x in df.values]

sample_count = 0
mels = []

# Encode all features
for i in range(len(tuples)):
    file = 'data/FSDKaggle2018.audio_test/' + tuples[i][0]
    rate, signal = wavfile.read(file)
    mel = mfcc(signal, rate, numcep=13, nfilt=26, nfft=1103)
    sample_count += np.shape(mel)[0]
    mels.append(mel)

for i in range(len(mels)): # for each sample
    
    mel = mels[i]
    
    for k in range(np.shape(mel)[1]): # for each feature
        
        history_times = np.full((16,), -1)
        t = 0
        last_delta_bin_1 = -1
        last_delta_bin_2 = -1
        
        last_v_1 = float('nan')
        
        for j in range(np.shape(mel)[0]): # for each value
            
            next_v = mel[j,k]
            current_v = last_v_1
            current_delta_bin = last_delta_bin_1
            last_delta_bin = last_delta_bin_2
            
            if not math.isnan(current_v):
                next_delta = next_v - current_v   
                next_delta_bin = get_delta_bin(next_delta)
                
                if current_delta_bin > -1:
                    current_interval = get_interval(t, history_times, current_delta_bin)
                    current_interval_bin = get_interval_bin(current_interval)
                    
                    hist[k, current_delta_bin, current_interval_bin, next_delta_bin] = hist[k, current_delta_bin, current_interval_bin, next_delta_bin] + 1
        
                    history_times[current_delta_bin] = t

                    last_delta_bin_2 = current_delta_bin
                    t+= 1
                    
                last_delta_bin_1 = next_delta_bin
                
            last_v_1 = next_v
            
            

hist[7,8]

array([[ 95., 124., 133., 118., 101.,  92.,  66.,  50.,  59.,  75.,  82.,
        117., 104., 116., 108.,  91.],
       [ 30.,  56.,  44.,  72.,  73.,  74.,  68.,  33.,  34.,  71.,  63.,
         60.,  48.,  38.,  38.,  28.],
       [ 31.,  39.,  50.,  46.,  55.,  60.,  54.,  22.,  26.,  55.,  46.,
         63.,  46.,  48.,  41.,  17.],
       [ 70.,  91.,  99.,  98., 111., 124.,  84.,  61.,  54., 102.,  98.,
        106., 102., 101.,  65.,  50.],
       [ 67.,  87., 101.,  81., 100.,  82.,  96.,  54.,  55., 102.,  90.,
        106.,  94., 104.,  64.,  52.],
       [ 74., 107., 101., 115., 112., 148., 128.,  58.,  74., 129., 118.,
        116., 118., 117., 103.,  76.],
       [ 73.,  95., 102.,  98., 113., 121., 104.,  42.,  59.,  98., 121.,
        115., 111., 102.,  87.,  54.],
       [ 66.,  83.,  99., 102.,  97.,  94.,  83.,  63.,  58.,  78., 109.,
        107., 107.,  90.,  87.,  53.],
       [ 91., 106., 129., 120., 116., 114.,  98.,  72.,  56.,  97., 115.,
        108., 116.,  9

In [36]:
hist[11,15]

array([[ 890.,  496.,  365.,  288.,  215.,  184.,  174.,   84.,   69.,
         166.,  191.,  170.,  181.,  222.,  179.,  235.],
       [ 217.,  135.,  107.,   87.,   66.,   44.,   41.,   23.,   21.,
          44.,   43.,   52.,   29.,   37.,   50.,   36.],
       [ 179.,  167.,  124.,   64.,   80.,   50.,   55.,   28.,   25.,
          43.,   53.,   46.,   59.,   55.,   67.,   43.],
       [ 407.,  266.,  208.,  173.,  147.,  113.,   71.,   38.,   36.,
          85.,   95.,  101.,   93.,   91.,  103.,   99.],
       [ 377.,  263.,  197.,  182.,  155.,  102.,  109.,   34.,   42.,
          95.,   72.,  113.,   85.,   87.,  114.,  100.],
       [ 565.,  382.,  260.,  242.,  166.,  144.,  144.,   72.,   73.,
         134.,  130.,  144.,  138.,  146.,  156.,  133.],
       [ 508.,  352.,  279.,  225.,  173.,  146.,  132.,   62.,   56.,
         116.,  122.,  144.,  122.,  116.,  130.,  116.],
       [ 481.,  333.,  212.,  185.,  142.,  125.,  114.,   55.,   49.,
         111.,  120.,  130