# Continuous Dev Numpy Generation

In [3]:
!pip install ffmpeg



In [4]:
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import random
import sys
import io
import os
import glob
import IPython
import wave
import pylab
import pandas as pd
from tf_utils import *
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile

# Import files for trigger-word detection model
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam

In [5]:
POSITIVE_DIRECTORY = "./raw_data/positive_data/"
BACKGROUND_DIRECTORY = "./raw_data/background_data/"
NEGATIVES_DIRECTORY = "./raw_data/google_dataset/"
NEGATIVES_TRUNCATED_DIRECTORY = "./raw_data/google_dataset_truncated/"
AUDIO_EXAMPLES_DIRECTORY = "./audio_examples/"
AUDIO_IGNORED_EXAMPLES_DIRECTORY = "./audio_ignored_examples/"
POSITIVE_EXAMPLE = "jh_1.wav"
AUDIO_EXAMPLE = "example_train.wav"
STUB_TRAIN_DIRECTORY = "./stub_data/XY_Train/"
STUB_DEV_DIRECTORY = "./stub_data/XY_Dev/"
STUB_MODEL = "./stub_data/models/tr_model.h5"
CONT_EXAMPLE_DIRECTORY = "./cont_example/"
DEV_DIRECTORY = "./raw_data/dev_dataset/"
DEV_CUT_DIRECTORY = "./raw_data/dev_dataset_cut/"
DEV_NPY_DIRECTORY = "./raw_data/dev_npy/"

In [6]:
play(AudioSegment.from_file(POSITIVE_DIRECTORY + POSITIVE_EXAMPLE))

In [7]:
Tx = 5490 # Based on created training example
n_freq = 129 # Based on created training example
Ty = 1369 # Based on model.summary() in 1.4 with shape := (Tx, n_freq)

## Some Original Functions

In [8]:
# Original insert_ones(y,segment_end_ms)
# def insert_ones(y, segment_end_ms):
#     """
#     Update the label vector y. The labels of the 50 output steps strictly after the end of the segment 
#     should be set to 1. By strictly we mean that the label of segment_end_y should be 0 while, the
#     50 followinf labels should be ones.
    
    
#     Arguments:
#     y -- numpy array of shape (1, Ty), the labels of the training example
#     segment_end_ms -- the end time of the segment in ms
    
#     Returns:
#     y -- updated labels
#     """
    
#     # duration of the background (in terms of spectrogram time-steps)
#     segment_end_y = int(segment_end_ms * Ty / 10000.0)
#     print(segment_end_y)
    
#     # Add 1 to the correct index in the background label (y)
#     ### START CODE HERE ### (≈ 3 lines)
#     for i in range(segment_end_y + 1, segment_end_y + 51):
#         if i < Ty:
#             y[0, i] = 1
#     ### END CODE HERE ###
    
#     return y

In [9]:
# Original create_training_example(id)
# def create_training_example(id):
#     """
#     Creates a training examples with a given background, positives, and negatives with id.
    
#     Arguments:
#     id -- an id is given such that the new files does not replace the previous files
    
#     Returns:
#     x -- the spectrogram of the training example
#     y -- the label at each time step of the spectrogram
#     """
#     global BACKGROUND_AUDIONAMES
#     global POSITIVES_AUDIONAMES
#     global NEGATIVES_AUDIONAMES
    
#     background = random_background(BACKGROUND_AUDIONAMES)
#     positives = random_positives(POSITIVES_AUDIONAMES)
#     negatives = random_negatives(NEGATIVES_AUDIONAMES)
    
#     # Make background quieter
#     background = background - 20
    
#     # Step 1: Initialize y (label vector) of zeros (≈ 1 line)
#     y = np.zeros((1, Ty))

#     # Step 2: Initialize segment times as empty list (≈ 1 line)
#     previous_segments = []    
    
#     # Step 3: Loop over randomly selected "activate" clips and insert in background
#     for random_positive in positives:
#         if cannot_insert(random_positive, previous_segments):
#             return create_training_example(id)
#         # Insert the audio clip on the background
#         background, segment_time = insert_audio_clip(background, random_positive, previous_segments)
#         # Retrieve segment_start and segment_end from segment_time
#         segment_start, segment_end = segment_time
#         # Insert labels in "y"
#         y = insert_ones(y, segment_end_ms=segment_end)

#     # Step 4: Loop over randomly selected negative clips and insert in background
#     for random_negative in negatives:
#         if cannot_insert(random_negative, previous_segments):
#             return create_training_example(id)
#         # Insert the audio clip on the background 
#         background, _ = insert_audio_clip(background, random_negative, previous_segments)

#     # Export new training example 
#     background = background.set_channels(1)
#     background = background.set_frame_rate(123000)
    
#     TRAIN_PREFIX = "train_"
#     file_handle = background.export(AUDIO_EXAMPLES_DIRECTORY + TRAIN_PREFIX + str(id) + ".wav", format="wav")
#     # print("File (train_" + str(id) + ".wav) was saved in your directory.")

#     sample_rate, samples = wavfile.read(AUDIO_EXAMPLES_DIRECTORY + TRAIN_PREFIX + str(id) +".wav")
#     frequencies, times, x = signal.spectrogram(samples, sample_rate)
    
#     return frequencies, times, x, y

In [10]:
# Original create_X_Y(size,start)
# def create_X_Y(size, start):
#     i, X, Y = 1, [], []
#     for i in range(start, start + size):
#         frequencies, times, x, y = create_training_example(i)
#         x = np.transpose(x)
#         y = np.transpose(y)
#         X.append(x)
#         Y.append(y)
#     return (np.array(X), np.array(Y))

## Helper Functions

In [11]:
def insert_ones_for_dev(y, segment_ends):
    """
    Update the label vector y. The labels of the 50 output steps strictly after the end of the segment 
    should be set to 1. By strictly we mean that the label of segment_end_y should be 0 while, the
    50 followinf labels should be ones.
    
    
    Arguments:
    y -- numpy array of shape (1, Ty), the labels of the training example
    segment_end_ms -- A list of end times segments of the dev file in ms
    
    Returns:
    y -- updated labels
    """

    # duration of the background (in terms of spectrogram time-steps)
    for segment_end_ms in segment_ends:
        segment_end_y = int(segment_end_ms * Ty / 10000.0)

        # Add 1 to the correct index in the background label (y)
        ### START CODE HERE ### (≈ 3 lines)
        for i in range(segment_end_y + 1, segment_end_y + 51):
             if i < Ty:
                 y[0, i] = 1
        ### END CODE HERE ###

    return y

In [12]:
def sec_to_ms(sec):
    return sec * 1000

In [63]:
def create_dev_example(id):
    """
    Creates dev_examples
    Arguments:
    id -- an id is given such that the new files does not replace the previous files
    
    Returns:
    x -- the spectrogram of the training example
    y -- the label at each time step of the spectrogram
    """
    global DEV_CUT_DIRECTORY
    
    # Step 1: Initialize y (label vector) of zeros (≈ 1 line)
    y = np.zeros((1, Ty))
    
    # Step 2: Import end time segments    
    CONT_PREFIX = "cont_"
    segment_ends = pd.read_csv(DEV_CUT_DIRECTORY + CONT_PREFIX + str(id) + ".txt", header=None) # Read in the end_time_segments txt
    if not str(segment_ends[0].iloc[0]) == 'nan': # If the audio clip has at least one "basically". Note: "NaN" are given as entries in empty txt
        segment_ends[0].apply(np.float)
        segment_ends[1] = segment_ends[0].apply(sec_to_ms) # Create a new column of ms
        segment_ends = segment_ends[1].tolist() # List of end time segments 
        
        # Step 3: Insert ones
        #for segment_end in segment_ends:
        y = insert_ones_for_dev(y, segment_ends)
    
    audio = AudioSegment.from_wav(DEV_CUT_DIRECTORY + CONT_PREFIX + str(id) +".wav")
    audio = audio.set_frame_rate(123000)
    file_handle = audio.export(DEV_CUT_DIRECTORY + CONT_PREFIX + str(id) +".wav", format = "wav")

    sample_rate, samples = wavfile.read(DEV_CUT_DIRECTORY + CONT_PREFIX + str(id) +".wav")
    frequencies, times, x = signal.spectrogram(samples, sample_rate)
    
    return frequencies, times, x, y

In [64]:
# Test case:
# frequencies, times, x, y = create_dev_example(44)
# c = 0
# for i in y[0]:
#     #print(i)
#     if (i == 1):
#        #print(i)
#        c += 1
# print(c)
#IPython.display.Audio(DEV_CUT_DIRECTORY + "cont_44.wav")

In [78]:
def create_dev_X_Y(list_of_ids):
    """
    list_of_ids: The list of ids of the desired dev audio files
    
    Returns:
    np.array(X)
    np.array(Y)
    """
    X, Y = [], []
    for id in list_of_ids:
        frequencies, times, x, y = create_dev_example(id)
        x = np.transpose(x)
        y = np.transpose(y)
        X.append(x)
        Y.append(y)
    X = np.array(X)
    Y = np.array(Y)
    return (X, Y)

## Create Numpy Arrays for Dev(Continuous) Set

In [87]:
def export_dev_npy(list_of_ids):
    """
    Arguments:
    list_of_ids: The list of ids of the desired dev audio files
    
    Function to save the numpy arrays of the dev audios to DEV_NPY_DIRECTORY
    """
    global DEV_NPY_DIRECTORY
    
    X, Y = create_dev_X_Y(list_of_ids)
    CONT_PREFIX = "cont_"
    np.save(DEV_NPY_DIRECTORY + CONT_PREFIX + "X" + ".npy", X)
    np.save(DEV_NPY_DIRECTORY + CONT_PREFIX + "Y" + ".npy", Y)
    print("Dimensions of np.array: " + "X:{} Y:{}".format(X.shape, Y.shape))
    print("np.array of X and Y + saved in " + DEV_NPY_DIRECTORY)

In [88]:
# A list of all currently available ids of dev audios
list_of_dev_ids = [11,12,13,14,20,21,22,23,24,30,31,32,33,34,35,36,37,38,39,310,40,41,42,43,44]

In [89]:
export_dev_npy(list_of_dev_ids)

Dimensions of np.array: X:(25, 5490, 129) Y:(25, 1369, 1)
np.array of X and Y + saved in ./raw_data/dev_npy/
