In [1]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import os
import soundfile as sf
from scipy.io import wavfile #for audio processing

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
import tensorflow as tf
print(tf.__version__)

2.1.0


In [3]:
from keras.models import Model , Sequential
from keras.utils import Sequence
import keras

Using TensorFlow backend.


In [4]:
os.chdir('/home/varun/Desktop/speechReco')

In [5]:
os.getcwd()

'/home/varun/Desktop/speechReco'

In [6]:
path = '/data1'
os.chdir(os.getcwd() +path)

In [7]:
os.getcwd()

'/home/varun/Desktop/speechReco/data1'

In [8]:
all_classes = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'silence', 'house', 'nine', 'four', 'happy', 'wow', 'five', 'zero', 'tree', 'six', 'one', 'three', 'eight', 'cat', 'two', 'seven', 'marvin', 'dog', 'sheila', 'bed', 'bird']

In [9]:
len(all_classes)

31

In [10]:
max_label = 23 

In [11]:
char_map_str = """
<SPACE> 0
a 1
b 2
c 3
d 4
e 5
f 6
g 7
h 8
i 9
j 10
k 11
l 12
m 13
n 14
o 15
p 16
q 17
r 18
s 19
t 20
u 21
v 22
w 23
x 24
y 25
z 26
' 27
"""

char_map = {}
index_map = {}

for line in char_map_str.strip().split('\n'):
    ch, index = line.split()
    char_map[ch] = int(index)
    index_map[int(index)] = ch

index_map[0] = ' '
def get_label(Y , max_label):
    new = []
    for c in Y:
        if c not in char_map:
            continue
        elif c == "'":
            continue
        else:
            ch = char_map[c]
            new.append(ch)
 
    while(len(new) < max_label):
        new.append(27)
    label = np.array(new)
    
    return label

In [12]:
list_IDs = []

for direc in os.listdir():
        file = [ f for f in os.listdir(os.getcwd() + '/' + direc ) if f.endswith('.wav')]
        for f in file:
            list_IDs.append(direc + '/' + f)

print((list_IDs[1]))

go/80fe1dc7_nohash_0.wav


In [13]:
def graph_spectrogram(wav_file):
    rate, data = get_wav_info(wav_file)
    nfft = 200 # Length of each window segment
    fs = 8000 # Sampling frequencies
    noverlap = 120 # Overlap between windows
    nchannels = data.ndim
    if nchannels == 1:
        pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
    elif nchannels == 2:
        pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
    return pxx

# Load a wav file
def get_wav_info(wav_file):
    rate , data = wavfile.read(wav_file)
    return rate, data

def modify_spectrogram_shape(sample ,shape = (101,198) ):
    a = np.zeros(shape)
    a[: , :sample.shape[1]] = sample
    return sample

def add_noise(sample , noise_factor):
    noise = np.random.randn(sample.shape)
    augmented_data = sample + noise_factor * noise
    augmented_data = augmented_data.astype(type(sample[0]))
    return augmented_data

def normalise_spectrogram(sample):
    mean = np.mean(sample, axis=0)
    std = np.std(sample, axis=0)
    sample = (sample - mean) / std
    
    return sample

In [60]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, all_classes, list_IDs, max_label, batch_size=32, noise_factor = 0.1 , add_noise = False , normalise = False ,dim=(101,198),
                 shuffle=True ):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.all_classes = all_classes
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.noise_factor = noise_factor
        self.add_noise = add_noise
        self.normalise = normalise
        self.max_label = max_label
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, labels = self.__data_generation(list_IDs_temp)
        
        input_length = np.array([self.max_label for _ in range(self.batch_size)])
        label_length = np.array([self.max_label for _ in range(self.batch_size)])

        return X.shape, labels.shape , input_length.shape , label_length.shape

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim ))
        y = np.empty((self.batch_size , max_label ), dtype=int)
        

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            
            sample = graph_spectrogram(os.getcwd() + '/' + ID)
            if(sample.shape != self.dim):
                a = np.zeros(self.dim)
                a[: , :sample.shape[1]] = sample
                sample = a    
            if(self.add_noise):
                sample = add_noise(sample , self.noise_factor)
            if(self.normalise):
                sample = normalise_spectrogram(sample)
                
            X[i,] = sample

            # Store label
            y[i,] = get_label(ID.split('/')[0] , self.max_label)

        return X, y

In [61]:
training_generator = DataGenerator(all_classes ,list_IDs, 23)
