## 說明：
總共要產生三種訓練集
- 訓練集
- 驗證集
- 測試集   
其中驗證集及測試集要經二階段建立。

In [1]:
import sys
import os
import subprocess
import glob
import numpy as np
import wavio
import time;
import random;
sys.path.append(os.path.abspath("../"))
import re

In [2]:
from common.tlopts import display_info;
import common.utils as U;
from Libs.SharedLibs import *

In [3]:
import argparse

In [4]:
from datetime import datetime

## Global Shared Variables

### define sounds sources folders

In [5]:
training_positive_sounds_src = "../datasets/forOneClassModel_alarm/train/alarm/"
training_negative_sounds_src = "../datasets/forOneClassModel_alarm/train/other_sounds/20K_25Classes_160Items/"
val_positive_sounds_src = "../datasets/forOneClassModel_alarm/test_val/alarm/"
val_negative_sounds_src = "../datasets/forOneClassModel_alarm/test_val/other_sounds/"

### define save pathes

In [41]:
training_npz = "../datasets/forOneClassModel_alarm/train/trainSet_{}.npz"
val_src_npz = "../datasets/forOneClassModel_alarm/test_val/src_npz/val_src_{}.npz"
final_val_npz = "../datasets/forOneClassModel_alarm/test_val/final_val_test_npz/final_valSet_{}"

## Utility Functions

In [7]:
def getFolderList(rootDir=None, recursive=False):
    if not recursive:
        return next(os.walk(rootDir));
    else:
        return [x[0] for x in os.walk(rootDir)]

In [44]:
# generate opt files for data preparation
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='ACDNet_TL_Model_Extend',  required=False);
    parser.add_argument('--data', default='../datasets/processed/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args();

    """
    current best setting for accuracy: 96.5
    opt.batchSize = 32;
    opt.LR = 0.1;
    opt.weightDecay = 5e-3;
    opt.momentum = 0.09;
    opt.schedule = [0.3, 0.5, 0.9];
    """
    #Leqarning settings
    opt.batchSize = 32;
    opt.LR = 0.1;
    opt.weightDecay = 5e-3;#1e-2;#5e-3;#5e-4;
    opt.momentum = 0.09;
    opt.nEpochs = 800;
    opt.schedule = [0.3, 0.5, 0.9];
    opt.warmup = 10;
    opt.device = 'cpu';
    # if torch.backends.mps.is_available():
    #     opt.device="mps"; #for apple m2 gpu
    # elif torch.cuda.is_available():
    #     opt.device="cuda:0"; #for nVidia gpu
    # else:
    #     opt.device="cpu"
    print(f"***Use device:{opt.device}");
    # opt.device = torch.device("cuda:0" if  else "cpu");
    #Basic Net Settings
    opt.nClasses = 2#50;
    opt.nFolds = 1;
    opt.splits = [i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    opt.TLAcdnetConfig = [8,64,32,64,64,128,128,256,256,512,512,2];
    return opt

In [45]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

In [46]:
def testGetSoundsAndLabels(srcDir):
    wav_list = []
    wav_list = getFileList(srcDir)
    return wav_list

In [47]:
print(f"get all training positive wavs in {training_positive_sounds_src}:\n total counts:{len(testGetSoundsAndLabels(training_positive_sounds_src))}")
print(f"get all training negative wavs in {training_negative_sounds_src}:\n total counts:{len(testGetSoundsAndLabels(training_negative_sounds_src))}")
print(f"get all test positive wavs in {val_positive_sounds_src}:\n total counts:{len(testGetSoundsAndLabels(val_positive_sounds_src))}")
print(f"get all test negative wavs in {val_negative_sounds_src}:\n total counts:{len(testGetSoundsAndLabels(val_negative_sounds_src))}")

get all training positive wavs in ../datasets/forOneClassModel_alarm/train/alarm/:
 total counts:160
get all training negative wavs in ../datasets/forOneClassModel_alarm/train/other_sounds/20K_25Classes_160Items/:
 total counts:165
get all test positive wavs in ../datasets/forOneClassModel_alarm/test_val/alarm/:
 total counts:40
get all test negative wavs in ../datasets/forOneClassModel_alarm/test_val/other_sounds/:
 total counts:48


In [48]:
def create_dataset(dst_path=None, classes_dict=None, execFlag=1):
    # print('* {} -> {}'.format(src_path, dst_path))
    my_dataset = {};
    print(f"--Start to preparing training dataset...---------------");
    my_dataset['fold1'] = {}
    my_sounds = []
    my_labels = []
    wav_list = None
    for k in classes_dict:
        if k == 52:
            if execFlag == 1: #create training set:
                # cur_src_dir = os.path.join(src_path,classes_dict[k]);
                print(f"create training data from {training_positive_sounds_src}")
                wav_list = getFileList(training_positive_sounds_src);
            else:#create testing set
                print(f"create positive test data from {val_positive_sounds_src}")
                wav_list = getFileList(val_positive_sounds_src);
        elif k == 99:
            # cur_src_dir = os.path.join(src_path,classes_dict[k]);
            if execFlag == 1:#create trainging set
                print(f"create negative training data from {training_negative_sounds_src}")
                wav_list = getFileList(training_negative_sounds_src);
            else:#creating testing set
                print(f"create negative test data from {val_negative_sounds_src}")
                wav_list = getFileList(val_negative_sounds_src);
        # print(wav_list)
        # print(f"current source directory:{cur_src_dir}");
        for wav_file in wav_list:
            sound = wavio.read(wav_file).data.T[0]
            start = sound.nonzero()[0].min()
            end = sound.nonzero()[0].max()
            sound = sound[start: end + 1]  # Remove silent sections
            label = int(k);#int(os.path.splitext(wav_file)[0].split('-')[-1])
            my_sounds.append(sound)
            my_labels.append(k)
            print(f"sound:{os.path.basename(wav_file)} is chopped:\n   lable:{k}\n   from {start} to {end} \n   len:{(end-start)/20000}\n");
            # print(f"sound:{wav_file}\nlabel:{k}") 
    print(f"--End of preparing training dataset-------------------");

    my_dataset['fold1']['sounds'] = my_sounds
    my_dataset['fold1']['labels'] = my_labels
    print(f"total len of my_sounds is {len(my_sounds)}")
    print(f"total len of my_labels is {len(my_labels)}")
    npzname = dst_path.format(genDataTimeStr());
    np.savez(npzname, **my_dataset)
    print(f"npz file:{npzname}")

In [49]:
map_dict = {
        52:"alarm_positive_52", #alarm
        99:"alarm_negtive_99", #other_sounds
    };

## Create Training npz file

In [50]:
# This following codes are marked out because the npz has been created
# create_dataset(training_npz,map_dict,1);

## Create Test Src npz file

In [51]:
# create_dataset(val_src_npz,map_dict,2);

## define ValGenerator
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (58,) + inhomogeneous part.
把諸存的型態改成dtype='object'

In [57]:
class ValGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        print(len(samples));
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = len(samples);#88;#options.batchSize // options.nCrops;
        print(f"batch_size:{self.batch_size}");
        self.preprocess_funcs = self.preprocess_setup();
        self.map_dict= {
            52:1,
            99:2
        };

    def get_data(self):
        #Generate one batch of data
        x, y = self.generate();
        x = np.expand_dims(x, axis=1)
        x = np.expand_dims(x, axis=3)
        # print(x.shape);
        # print(y.shape);
        return x, y

    def generate(self):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            sound, target = self.data[i];
            target = self.map_dict[target] - 1;
            sound = self.preprocess(sound).astype(np.float32)
            # print(sound)
            label = np.zeros((self.opt.nCrops, self.opt.nClasses));
            label[:,target] = 1;
            # print(f"nCrops:{self.opt.nCrops}, nClasses:{self.opt.nClasses}")
            sounds.append(sound);
            labels.append(label);
        """
        #dtype="object" for ValueError: setting an array element with a sequence. 
        The requested array has an inhomogeneous shape after 1 dimensions. 
        The detected shape was (58,) + inhomogeneous part.
        """
        sounds = np.asarray(sounds,dtype="object")
        # expand_sounds = np.expand_dims(np.asarray(sounds,dtype="object"),axis=1); 
        labels = np.asarray(labels);
        # print(f"shape of sounds:{expand_sounds.shape}")
        sounds = sounds.reshape(sounds.shape[0]*sounds.shape[1], sounds.shape[2]);
        labels = labels.reshape(labels.shape[0]*labels.shape[1], labels.shape[2]);

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        funcs += [U.padding(self.opt.inputLength // 2),
                  U.normalize(32768.0),
                  U.multi_crop(self.opt.inputLength, 2)] # we use single crop here.

        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [58]:
def create_test_compress_npz(test_src_npz=None, dest_name=None):
    opt = getOpts();#opts.parse();
    display_info(opt);
    opt.batchSize=88;
    opt.nCrops = 2;
    opt.nClasses=2;
    for sr in [20000]:
        opt.sr = sr;
        opt.inputLength = 30225;
        val_sounds = [];
        val_labels = [];
        dataset = np.load(test_src_npz, allow_pickle=True);
        # for s in opt.splits:
        start_time = time.perf_counter();
        sounds = dataset['fold1'].item()['sounds'];
        labels = dataset['fold1'].item()['labels'];
        print(f"len of sounds:{len(sounds)}, len of labels:{len(labels)}")
        
        # print(sounds)
        val_sounds.extend(sounds);
        val_labels.extend(labels);
        print(f"len of val_sounds:{len(val_sounds)}, len of val_labels:{len(val_labels)}")
        
        valGen = ValGenerator(sounds, labels, opt);
        valX, valY = valGen.get_data();
        dest_name = dest_name.format(genDataTimeStr())
        np.savez_compressed(dest_name, x=valX, y=valY);
        print('compressed npz generated with\n  shape x:{}\n  y:{}\n  took {:.2f} secs'.format(valX.shape, valY.shape, time.perf_counter()-start_time));
        sys.stdout.flush();

In [59]:
_src_npz = "../datasets/forOneClassModel_alarm/test_val/src_npz/val_src_20240119003231.npz"
create_test_compress_npz(_src_npz, final_val_npz);

***Use device:cpu
+------------------------------+
| ACDNet_TL_Model_Extend Sound classification
+------------------------------+
| dataset  : uec_iot
| nEpochs  : 800
| LRInit   : 0.1
| schedule : [0.3, 0.5, 0.9]
| warmup   : 10
| batchSize: 32
| nFolds: 1
| Splits: [1]
+------------------------------+
len of sounds:88, len of labels:88
len of val_sounds:88, len of val_labels:88
88
batch_size:88
compressed npz generated with
  shape x:(176, 1, 30225, 1)
  y:(176, 2)
  took 1.13 secs
