In [3]:
import sys
import os
import subprocess
import glob
import numpy as np
import wavio
import time;
import random;
from common import opts;
import common.utils as U;

In [4]:
src_wav_dir = "./datasets/before_processed_audio/"
train_dest_wav_dir = ".//datasets/processing_tmp_audios/train/"
test_dest_wav_dir = "./datasets/processing_tmp_audios/test/"

In [5]:
def convert_sr(src_path, dst_path, sr):
    # print('* {} -> {}'.format(src_path, dst_path))
    if not os.path.exists(dst_path):
        os.mkdir(dst_path);
    for src_file in sorted(glob.glob(os.path.join(src_path, '*.wav'))):
        dst_file = src_file.replace(src_path, dst_path);
        subprocess.call('ffmpeg -i {} -ac 1 -ar {} -loglevel error -y {}'.format(
            src_file, sr, dst_file), shell=True);

In [6]:
def main():
    classes_dict = {
        17:"17_pouring_water", #pouring_water
        18:"18_toilet_flushing", #toilet_flushing
        21:"21_sneezing", #snezzing
        24:"24_coughing", #coughing
        51:"51_kettle_sound", #kettle_sound
        52:"52_alarm", #alarm
        #53:"53_boiling_water_bubble_sound", #boiling_water_bubble_sound
        54:"54_ringtone", #rington
        55:"55_shower_water", #shower_water
        56:"56_pain_sounds", #pain_sounds
        57:"57_footsteps", #footsteps
        98:"98_silence", #silence
        99:"99_other_sounds", #other_sounds
                   };
    sr = 20000;
    iter_count = 0;
    for k in classes_dict:
        cur_src_dir = os.path.join(src_wav_dir,classes_dict[k]);
        print(f"current work directory:{cur_src_dir}\n");
        for w in sorted(glob.glob(os.path.join(cur_src_dir, '*.wav'))):
            fname = "{}.wav".format(os.path.basename(w).split('.')[0]);
            dest_fname = os.path.join(dest_wav_dir,classes_dict[k],fname);
            print(f"Convert Sampling Rate:{w} >> {dest_fname}");
            subprocess.call('ffmpeg -i {} -ac 1 -ar {} -loglevel error -y {}'.format(
            w, sr, dest_fname), shell=True);
            # print(f"wav:{w}");
        # print(f"key:{k}, value:{classes_dict[k]}");
    

In [5]:
# main()

In [6]:
def create_dataset(src_path, dst_path):
    # print('* {} -> {}'.format(src_path, dst_path))
    classes_dict = {
        17:"17_pouring_water", #pouring_water
        18:"18_toilet_flushing", #toilet_flushing
        21:"21_sneezing", #snezzing
        24:"24_coughing", #coughing
        51:"51_kettle_sound", #kettle_sound
        52:"52_alarm", #alarm
        #53:"53_boiling_water_bubble_sound", #boiling_water_bubble_sound
        54:"54_ringtone", #rington
        55:"55_shower_water", #shower_water
        56:"56_pain_sounds", #pain_sounds
        57:"57_footsteps", #footsteps
        98:"98_silence", #silence
        99:"99_other_sounds", #other_sounds
    };
    # idx_dict = {
    #     17:1, #pouring_water
    #     18:2, #toilet_flushing
    #     21:3, #snezzing
    #     24:4, #coughing
    #     51:5, #kettle_sound
    #     52:6, #alarm
    #     #53:"53_boiling_water_bubble_sound", #boiling_water_bubble_sound
    #     54:7, #rington
    #     55:8, #shower_water
    #     56:9, #pain_sounds
    #     57:10, #footsteps
    #     98:11, #silence
    #     99:12, #other_sounds
    # };
    my_dataset = {};
    for fold in range(1, 6):
        print(f"--Start to preparing fold{fold} dataset...---------------");
        my_dataset['fold{}'.format(fold)] = {}
        my_sounds = []
        my_labels = []
        for k in classes_dict:
            cur_src_dir = os.path.join(src_path,classes_dict[k]);
            print(f"current source directory:{cur_src_dir}");
            for wav_file in sorted(glob.glob(os.path.join(cur_src_dir, '*.wav'))):
                sound = wavio.read(wav_file).data.T[0]
                start = sound.nonzero()[0].min()
                end = sound.nonzero()[0].max()
                sound = sound[start: end + 1]  # Remove silent sections
                # label = k;#int(os.path.splitext(wav_file)[0].split('-')[-1])
                my_sounds.append(sound)
                my_labels.append(k)
                print(f"sound:{wav_file}\nlabel:{k}") 
        print(f"--End of preparing fold{fold} dataset-------------------");

        my_dataset['fold{}'.format(fold)]['sounds'] = my_sounds
        my_dataset['fold{}'.format(fold)]['labels'] = my_labels

    np.savez(dst_path, **my_dataset)

In [7]:
# save_npz_folder = "../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/fsd50k_processed_audios/test_fsd50_20K.npz"
# create_dataset(test_dest_wav_dir,save_npz_folder);

In [8]:
def load_train_npz(train_npz):
    dataset = np.load(train_npz, allow_pickle=True);
    print(f"len of sound:{len(dataset['fold1'].item()['sounds'][123])}");
    print(f"label:{dataset['fold1'].item()['labels'][123]}");
    # print(f"sound:{dataset['fold3'].item()['sounds'][123]}");
    # print(f"label:{dataset['fold3'].item()['labels'][123]}");
    # train_sounds = []
    # train_labels = []
    # for i in range(1, opt.nFolds + 1):
    #     sounds = dataset['fold{}'.format(i)].item()['sounds']
    #     labels = dataset['fold{}'.format(i)].item()['labels']
    #     if i != split:
    #         train_sounds.extend(sounds)
    #         train_labels.extend(labels)

In [12]:
#load_train_npz("../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/fsd50k_processed_audios/train_fsd50_20K__202401041450.npz")

In [44]:
class ValGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize // options.nCrops;
        self.preprocess_funcs = self.preprocess_setup();

    def get_data(self):
        #Generate one batch of data
        x, y = self.generate();
        x = np.expand_dims(x, axis=1)
        x = np.expand_dims(x, axis=3)
        print(x.shape);
        print(y.shape);
        return x, y

    def generate(self):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            sound, target = self.data[i];
            sound = self.preprocess(sound).astype(np.float32)
            label = np.zeros((self.opt.nCrops, self.opt.nClasses));
            label[:,target] = 1;

            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);

        sounds = sounds.reshape(sounds.shape[0]*sounds.shape[1], sounds.shape[2]);
        labels = labels.reshape(labels.shape[0]*labels.shape[1], labels.shape[2]);

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        funcs += [U.padding(self.opt.inputLength // 2),
                  U.normalize(32768.0),
                  U.multi_crop(self.opt.inputLength, self.opt.nCrops)]

        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;



In [None]:
def create_test_dataset(src_path, dst_path):
    # print('* {} -> {}'.format(src_path, dst_path))
    classes_dict = {
        17:"17_pouring_water", #pouring_water
        18:"18_toilet_flushing", #toilet_flushing
        21:"21_sneezing", #snezzing
        24:"24_coughing", #coughing
        51:"51_kettle_sound", #kettle_sound
        52:"52_alarm", #alarm
        #53:"53_boiling_water_bubble_sound", #boiling_water_bubble_sound
        54:"54_ringtone", #rington
        55:"55_shower_water", #shower_water
        56:"56_pain_sounds", #pain_sounds
        57:"57_footsteps", #footsteps
        98:"98_silence", #silence
        99:"99_other_sounds", #other_sounds
    };
    # idx_dict = {
    #     17:1, #pouring_water
    #     18:2, #toilet_flushing
    #     21:3, #snezzing
    #     24:4, #coughing
    #     51:5, #kettle_sound
    #     52:6, #alarm
    #     #53:"53_boiling_water_bubble_sound", #boiling_water_bubble_sound
    #     54:7, #rington
    #     55:8, #shower_water
    #     56:9, #pain_sounds
    #     57:10, #footsteps
    #     98:11, #silence
    #     99:12, #other_sounds
    # };
    my_dataset = {};
    my_sounds = []
    my_labels = []
    my_dataset['testdata'] = {}
    for k in classes_dict:
        cur_src_dir = os.path.join(src_path,classes_dict[k]);
        print(f"current source directory:{cur_src_dir}");
        for wav_file in sorted(glob.glob(os.path.join(cur_src_dir, '*.wav'))):
            sound = wavio.read(wav_file).data.T[0]
            start = sound.nonzero()[0].min()
            end = sound.nonzero()[0].max()
            sound = sound[start: end + 1]  # Remove silent sections
            # label = k;#int(os.path.splitext(wav_file)[0].split('-')[-1])
            my_sounds.append(sound)
            my_labels.append(k)
            print(f"sound:{wav_file}\nlabel:{k}") 
    print(f"--End of preparing test dataset-------------------");

    my_dataset['testdata']['sounds'] = my_sounds
    my_dataset['testdata']['labels'] = my_labels

    np.savez(dst_path, **my_dataset)

In [45]:
save_test_npz_folder = "../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/fsd50k_processed_audios/test_fsd50_20K_single_fold_2.npz"
create_test_dataset(test_dest_wav_dir,save_test_npz_folder);

current source directory:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/1-51436-A-17.wav
label:17
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/2-102414-B-17.wav
label:17
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/2-126433-A-17.wav
label:17
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/4-181628-A-17.wav
label:17
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/4-186693-A-17.wav
label:17
sound:../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/processing_tmp_audios/test/17_pouring_water/5-205589-A-17.wav
lab

In [58]:
def load_test_npz(test_npz):
    dataset = np.load(test_npz, allow_pickle=True);
    # sound_ary = np.asarray();
    # sound_tmp = dataset['testdata'].item()['sounds'];
        # extend(repeat(x, 100))
    # sound_data = []
    # sound_data.extend(sound_tmp, len(sound_tmp))
    print(dataset['x'])

In [59]:
load_test_npz("../../../RLRepo/Works/Projects/TransferLearning_for_ACDNet/datasets/fold1_test16000.npz");

[[[[ 0.00000000e+00]
   [ 0.00000000e+00]
   [ 0.00000000e+00]
   ...
   [ 6.10351562e-05]
   [ 0.00000000e+00]
   [-3.05175781e-05]]]


 [[[-1.54724121e-02]
   [ 9.46044922e-03]
   [ 9.61303711e-03]
   ...
   [-3.05175781e-05]
   [ 9.15527344e-05]
   [-1.22070312e-04]]]


 [[[-3.05175781e-05]
   [ 6.10351562e-05]
   [-6.10351562e-05]
   ...
   [ 0.00000000e+00]
   [-3.05175781e-05]
   [ 3.05175781e-05]]]


 ...


 [[[ 5.43487549e-01]
   [ 4.60784912e-01]
   [-9.30480957e-02]
   ...
   [-4.04571533e-01]
   [-5.11474609e-02]
   [ 3.15582275e-01]]]


 [[[ 3.61938477e-02]
   [-8.50708008e-01]
   [-7.28393555e-01]
   ...
   [ 1.45568848e-02]
   [ 9.88769531e-03]
   [-8.54492188e-03]]]


 [[[ 1.74255371e-01]
   [-1.04034424e-01]
   [-2.41210938e-01]
   ...
   [ 0.00000000e+00]
   [ 0.00000000e+00]
   [ 0.00000000e+00]]]]
