In [1]:
import os
import sys
import numpy as np
from random import shuffle
import wavio

In [2]:
sys.path.append("../../")

In [3]:
from Libs.SharedLibs import getFileList;
from Libs.datetime_util import genDataTimeStr;

In [4]:
wav_src_dir = "../../datasets/CurrentUse/wav_files/fold{}"

In [5]:
#shuffle test
arr = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]])
# Display original array
print("Original array:\n",arr,"\n")
# Creating target labels
tar = np.array([0, 1, 2, 3, 4])
# Display target labels
print("Target labels:\n",tar,"\n")
# Shuffling array and target labels by same order
ind = [i for i in range(len(arr))]
shuffle(ind)
arr_new = arr[ind]
tar_new = tar[ind]
# Display result
print("Shuffled array and target label with same order:\n",arr_new,"\n\n",tar_new,"\n")

Original array:
 [[0 0 0]
 [1 1 1]
 [2 2 2]
 [3 3 3]
 [4 4 4]] 

Target labels:
 [0 1 2 3 4] 

Shuffled array and target label with same order:
 [[0 0 0]
 [2 2 2]
 [4 4 4]
 [1 1 1]
 [3 3 3]] 

 [0 2 4 1 3] 



## Generating Multi-Fold Training DataSet

In [6]:
def create_train_dataset(upper_level_dir=None, fold_dirs=None, p_classes=None, n_classes=None, export_path=None):
    train_dataset = {};
    for fold in fold_dirs:
        train_dataset['fold{}'.format(fold)] = {}
        train_sounds = []
        train_labels = []
        for t in p_classes:
            #Dealing with positive wav files
            p_current_dir = os.path.join(upper_level_dir,fold,'positive',t)
            print(f"work on dir:{p_current_dir}");
            lbl = t[t.rfind('_')+1:]
            tmp_list = getFileList(p_current_dir)
            for f in tmp_list:
                sound = wavio.read(f).data.T[0]
                start = sound.nonzero()[0].min()
                end = sound.nonzero()[0].max()
                sound = sound[start: end + 1]  # Remove silent sections
                train_sounds.append(sound)
                train_labels.append(lbl)
        for c in n_classes:
            n_current_dir = os.path.join(upper_level_dir,fold,'negative',c)
            print(f"work on dir:{n_current_dir}");
            n_lbl = c[:c.find('_')]
            tmp_list2 = getFileList(n_current_dir)
            for f in tmp_list2:
                sound = wavio.read(f).data.T[0]
                start = sound.nonzero()[0].min()
                end = sound.nonzero()[0].max()
                sound = sound[start: end + 1]  # Remove silent sections
                train_sounds.append(sound)
                train_labels.append(n_lbl)

        train_dataset['fold{}'.format(fold)]['sounds'] = train_sounds
        train_dataset['fold{}'.format(fold)]['labels'] = train_labels
    np.savez(export_path, **train_dataset)
    print(f"Training Data is generated and save at {export_path}")

In [41]:
fold_dirs = ["fold1","fold2","fold3","fold4","fold5"];
p_classes = ["alarm_52","help_mandrain_71"]
n_classes = ["10_rain_esc50","14_chirping_birds_esc50","15_water_drop_esc50","17_pouring_water_esc50","18_toilet_flush_esc50","19_thunderstorm_esc50"]
upper_level_dir = "../../datasets/CurrentUse/wav_files/"
output_path = "../CurrentUse/generated_datasets/train/multi_folds_train_{}.npz".format(genDataTimeStr());
dataset = create_dataset(upper_level_dir=upper_level_dir, fold_dirs=fold_dirs, p_classes=p_classes, n_classes=n_classes,export_path=output_path);


work on dir:../../datasets/CurrentUse/wav_files/fold1/positive/alarm_52
work on dir:../../datasets/CurrentUse/wav_files/fold1/positive/help_mandrain_71
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/10_rain_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/14_chirping_birds_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/15_water_drop_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/17_pouring_water_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/18_toilet_flush_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold1/negative/19_thunderstorm_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold2/positive/alarm_52
work on dir:../../datasets/CurrentUse/wav_files/fold2/positive/help_mandrain_71
work on dir:../../datasets/CurrentUse/wav_files/fold2/negative/10_rain_esc50
work on dir:../../datasets/CurrentUse/wav_files/fold2/negative/14_chirping_birds_esc50
work on dir:../../datasets/Cu

## Generating Test DataSet

In [14]:
# test_list = [52 for _ in range(87)]

In [15]:
# print(test_list)

In [16]:
# print(len(test_list))

In [8]:
# test_lbl = 'alarm_52'
# print(test_lbl[test_lbl.rfind('_')+1:])

In [7]:
# test_lbl2 = '12_rainfall'
# print(test_lbl2[:test_lbl2.find('_')])