# Loading from `video.dtu.dk`.

In [1]:
import os
import numpy as np
import pandas as pd

#Import torch stuff.
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

#pip install git+https://github.com/facebookresearch/WavAugment.git
import augment

import IPython.display as ipd
import matplotlib.pyplot as plt

from tqdm.notebook import trange, tqdm

import wakeword_functions as wf

In [None]:
train, val, test = wf.get_splits()

In [None]:
ID = train['ID'].iloc[0]
duration = wf.get_duration(ID)

In [None]:
t1s     = []
t2s     = []
targets = []
IDs     = []


for ID in tqdm(train['ID'].unique()):
    
    #Get the duration on ID.
    duration = wf.get_duration(ID)
    
    #Make the t1 and t2s.
    t1 = np.arange(0, duration-2, 0.25)
    t2 = t1 + 2
    t  = np.vstack((t1, t2)).mean(axis=0)

    t_wakeword = train.loc[train['ID'] == ID][['t1', 't2']].values.mean(axis=1)

    targets_ID = np.zeros_like(t)

    #Set targets where the distance is less than 0.5 to 1.
    for t_word in t_wakeword:
        targets_ID[np.abs(t - t_word) <= 0.5] = 1
    
    
    t1s += t1.tolist()
    t2s += t2.tolist()
    targets += targets_ID.tolist()
    IDs += [ID]*len(t1)
    

In [None]:
class WakewordDataset(Dataset):
    '''
    Construct a dataset with sound files.
    '''
    
    def __init__(self, dataframe, f, sr = 22000, normalize = True, transforms=None):
        #f is the function that takes audio and returns the spectrogram.
        self.t1         = dataframe['t1'].tolist()
        self.t2         = dataframe['t2'].tolist()
        self.target     = dataframe['target'].tolist()
        self.ID         = dataframe['ID'].tolist()
        self.transforms = transforms
        self.f          = f
        
    def __len__(self):
        return len(self.t1)
    
    def __getitem__(self, idx):
        t1, t2, ID, target = self.t1[idx], self.t2[idx], self.ID[idx], self.target[idx]
        path = f'{ID}_{idx}.wav'
        wf.clip(t1, t2, ID, path)

        audio, sr, x    = wf.load_data(path, f = self.f, transforms=self.transforms)
        
        os.remove(path)
        
        return t1, t2, audio, sr, x, target, ID

In [None]:
df = pd.read_csv('df.csv')

In [None]:
t1, t2, audio, sr, x, target, ID = next(iter(WakewordDataset(df, T.Spectrogram())))

In [None]:
train = DataLoader(WakewordDataset(df, T.Spectrogram()), batch_size=124)

In [None]:
import time

time_1 = time.time()
t1, t2, audio, sr, x, target, ID = next(iter(train))
time_2 = time.time()

print(time_2 - time_1)

In [None]:
(df.shape[0]/124) * 24054.0

In [None]:
2982696/64

In [None]:
46604.625 * 13.6

In [None]:
train, test, val = wf.get_splits()

In [2]:
train, val, test = wf.get_splits()

In [None]:
wf.append_negative_classes(train, ratio=10)

In [6]:
import time
time_1 = time.time()
new = wf.append_negative_cases(val, ratio=10)

time_2 = time.time()

print(time_2 - time_1)

KeyboardInterrupt: 