In [1]:
import sys
import os
import torch

In [2]:
import glob;
import math;
import numpy as np;
import time;
from tensorflow import keras;
import tensorflow as tf

In [3]:
sys.path.append(os.path.abspath("../"))
sys.path.append(os.path.abspath("../common/"))

In [55]:
# import common.utils as U;
import common.opts as opts;
import resources.models as models;
# import resources.train_generator as train_generator;

In [56]:
from datetime import datetime
import argparse
import random

In [57]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

In [58]:
device = 'cpu'

In [59]:
retrain_src_model_path = "./retrained_models_after_pruned/retrained_model_20240124123209_acc_95.45455169677734_795th_epoch.pt"
config = torch.load(retrain_src_model_path, map_location=device)['config']

In [60]:
print(config)

[5, 32, 9, 16, 23, 33, 29, 56, 47, 65, 90, 2]


In [61]:
tf_net = models.GetAcdnetModel(input_length=30225, n_class=2, sr=20000, ch_config=config)

In [62]:
tf_net.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 1, 30225, 1)]     0         
                                                                 
 conv2d_12 (Conv2D)          (None, 1, 15109, 5)       45        
                                                                 
 batch_normalization_12 (Ba  (None, 1, 15109, 5)       20        
 tchNormalization)                                               
                                                                 
 re_lu_12 (ReLU)             (None, 1, 15109, 5)       0         
                                                                 
 conv2d_13 (Conv2D)          (None, 1, 7553, 32)       800       
                                                                 
 batch_normalization_13 (Ba  (None, 1, 7553, 32)       128       
 tchNormalization)                                         

In [136]:
tf_net.save("./keras_models/retrain_pruned_cp_weights.h5")

  saving_api.save_model(


## train the tf model and save

In [63]:
train_data = '../datasets/forOneClassModel_alarm/train/trainSet_20240119002902.npz'
test_data = '../datasets/forOneClassModel_alarm/test_val/final_val_test_npz/final_valSet_20240119004614.npz'

In [71]:
def padding(pad):
    def f(sound):
        return np.pad(sound, pad, 'constant')

    return f


def random_crop(size):
    def f(sound):
        org_size = len(sound)
        start = random.randint(0, org_size - size)
        return sound[start: start + size]

    return f


def normalize(factor):
    def f(sound):
        return sound / factor

    return f


# For strong data augmentation
# def random_scale(max_scale, interpolate='Linear'):
def random_scale(max_scale, interpolate='Nearest'):
    def f(sound):
        scale = np.power(max_scale, random.uniform(-1, 1))
        output_size = int(len(sound) * scale)
        ref = np.arange(output_size) / scale
        print("ref:",ref)
        if interpolate == 'Linear':
            ref1 = ref.astype(np.int32)
            ref2 = np.minimum(ref1 + 1, len(sound) - 1)
            r = ref - ref1
            scaled_sound = sound[ref1] * (1 - r) + sound[ref2] * r
        elif interpolate == 'Nearest':
            scaled_sound = sound[ref.astype(np.int32)]
        else:
            raise Exception('Invalid interpolation mode {}'.format(interpolate))

        return scaled_sound

    return f


def random_gain(db):
    def f(sound):
        return sound * np.power(10, random.uniform(-db, db) / 20.0)

    return f


# For testing phase
def multi_crop(input_length, n_crops):
    def f(sound):
        stride = (len(sound) - input_length) // (n_crops - 1)
        sounds = [sound[stride * i: stride * i + input_length] for i in range(n_crops)]
        return np.array(sounds)

    return f

def single_crop(input_length, n_crops):
    def f(sound):
        stride = (len(sound) - input_length) // (n_crops - 1)
        sounds = [sound[stride * i: stride * i + input_length] for i in range(n_crops)]
        return np.array(sounds)

    return f


# For BC learning
def a_weight(fs, n_fft, min_db=-80.0):
    freq = np.linspace(0, fs // 2, n_fft // 2 + 1)
    freq_sq = np.power(freq, 2)
    freq_sq[0] = 1.0
    weight = 2.0 + 20.0 * (2 * np.log10(12194) + 2 * np.log10(freq_sq)
                           - np.log10(freq_sq + 12194 ** 2)
                           - np.log10(freq_sq + 20.6 ** 2)
                           - 0.5 * np.log10(freq_sq + 107.7 ** 2)
                           - 0.5 * np.log10(freq_sq + 737.9 ** 2))
    weight = np.maximum(weight, min_db)

    return weight


def compute_gain(sound, fs, min_db=-80.0, mode='A_weighting'):
    if fs == 16000 or fs == 20000:
        n_fft = 2048
    elif fs == 44100:
        n_fft = 4096
    else:
        raise Exception('Invalid fs {}'.format(fs))
    stride = n_fft // 2

    gain = []
    #no xrange anymore supported
    for i in range(0, len(sound) - n_fft + 1, stride):
        if mode == 'RMSE':
            g = np.mean(sound[i: i + n_fft] ** 2)
        elif mode == 'A_weighting':
            spec = np.fft.rfft(np.hanning(n_fft + 1)[:-1] * sound[i: i + n_fft])
            power_spec = np.abs(spec) ** 2
            a_weighted_spec = power_spec * np.power(10, a_weight(fs, n_fft) / 10)
            g = np.sum(a_weighted_spec)
        else:
            raise Exception('Invalid mode {}'.format(mode))
        gain.append(g)

    gain = np.array(gain)
    gain = np.maximum(gain, np.power(10, min_db / 10))
    gain_db = 10 * np.log10(gain)

    return gain_db


def mix(sound1, sound2, r, fs):
    gain1 = np.max(compute_gain(sound1, fs))  # Decibel
    gain2 = np.max(compute_gain(sound2, fs))
    t = 1.0 / (1 + np.power(10, (gain1 - gain2) / 20.) * (1 - r) / r)
    sound = ((sound1 * t + sound2 * (1 - t)) / np.sqrt(t ** 2 + (1 - t) ** 2))

    return sound

# Convert time representation
def to_hms(time):
    h = int(time // 3600)
    m = int((time - h * 3600) // 60)
    s = int(time - h * 3600 - m * 60)
    if h > 0:
        line = '{}h{:02d}m'.format(h, m)
    else:
        line = '{}m{:02d}s'.format(m, s)

    return line

In [85]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples=None, labels=None, options=None, classes_dict=None):
        random.seed(42);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = np.asarray([(samples[i], labels[i]) for i in range (0, len(samples))],dtype="object");
        # print(f"self.data type is {type(self.data)}")
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = classes_dict

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));
        #return len(self.samples);

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            print(len(sound1))
            sound1 = self.preprocess(np.asarray(sound1))
            sound2 = self.preprocess(np.asarray(sound2))

            # Mix two examples
            r = np.array(random.random())
            sound = mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            # print(f"nClasses:{self.opt.nClasses}, type of mapdict:{type(self.mapdict)}, type of label1:{type(label1)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = self.mapdict[label1]- 1
            idx2 = self.mapdict[label2] - 1
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        print(f"batchIndex is {batchIndex}, total sounds is {len(sounds)}")
        print(f"labels in generate_batch is:\n{labels}")

        return sounds, labels;

    # def preprocess_setup(self):
    #     funcs = []
    #     if self.opt.strongAugment:
    #         funcs += [U.random_scale(1.25)]

    #     funcs += [U.padding(self.opt.inputLength // 2),
    #               U.random_crop(self.opt.inputLength),
    #               U.normalize(32768.0)]
    #     return funcs
    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [random_scale(1.25)]

        funcs += [padding(self.opt.inputLength // 2),
                  random_crop(self.opt.inputLength),
                  normalize(32768.0)]
        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [97]:
def getTrainGen(opt=None, split=None, classes_dict=None):
    # dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.sr // 1000)), allow_pickle=True);
    # dataset = np.load("../datasets/fold1_test16000.npz", allow_pickle=True);
    dataset = np.load(opt.trainData, allow_pickle=True);
    train_sounds = []
    train_labels = []
    # print(len(dataset['x']))
    # for i in range(1, opt.nFolds + 1):

    train_sounds = [list(dataset['fold1'].item()['sounds'][i]) for i in range(len(dataset['fold1'].item()['sounds']))]
    train_labels = dataset['fold{}'.format(1)].item()['labels']#[list(dataset['fold1'].item()['labels'][i]) for i in range(len(dataset['fold1'].item()['labels']))]
    # print(train_sounds)
    # print(train_labels)
    # train_sounds = dataset['fold{}'.format(1)].item()['sounds']
    # train_labels = dataset['fold{}'.format(1)].item()['labels']
    # print(train_labels)

    # trainGen = TLGenerator(train_sounds, train_labels, opt, classes_dict=classes_dict);
    return train_sounds, train_labels

In [132]:
class Trainer:
    def __init__(self, net=None, opt=None, classes_dict=None):
        self.opt = opt;
        # self.trainGen = train_generator.setup(self.opt, self.opt.split);
        self.model = net
        _x, _y = getTrainGen(opt=opt, split=2,classes_dict=classes_dict)
        # self.x, self.y = getTrainGen(opt=opt, split=2,classes_dict=classes_dict)
        input_x = []
        for i in range(11):
            tmp_x = _x[i][:30225]
            tmp_x = np.array(tmp_x).astype('float32')
            tmp_x = np.expand_dims(tmp_x, axis=1);
            tmp_x = np.expand_dims(tmp_x, axis=3);
            input_x.append(tf.convert_to_tensor(tmp_x))
        
        self.x = input_x
        self.y = tf.convert_to_tensor(_y[:11])
        print(self.x)
        print(self.y)
        # print(f"self.x type is {type(self.x[0])}")
        # self.trainGen = getTrainGen(opt=opt, split=2,classes_dict=classes_dict)
        
    def Train(self):
        # model = models.GetAcdnetModel();
        # if(self.trainGen != None):
        #     print("train data generator loaded")
        # else:
        #     print("train data generator load fail")
        self.model.summary();

        loss = 'kullback_leibler_divergence';
        optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=self.opt.LR,decay=self.opt.weightDecay)
        # optimizer = keras.optimizers.SGD(learning_rate=self.opt.LR, weight_decay=self.opt.weightDecay, momentum=self.opt.momentum, nesterov=True)

        self.model.compile(loss=loss, optimizer=optimizer , metrics=['accuracy']);

        # learning schedule callback
        lrate = keras.callbacks.LearningRateScheduler(self.GetLR);
        best_model = keras.callbacks.ModelCheckpoint('./tensorflow_models/retrain_pruned_model_copy_weights/{}_{}.h5'.format(self.opt.model_name, genDataTimeStr()), monitor='val_acc', save_best_only=True, verbose=0);
        custom_evaluator = CustomCallback(self.opt);
        callbacks_list = [lrate, custom_evaluator, best_model];

        # self.model.fit(self.trainGen, epochs=self.opt.nEpochs, steps_per_epoch=len(self.trainGen.data)//self.trainGen.batch_size, callbacks=callbacks_list, verbose=0);
        self.model.fit(self.x,self.y, epochs=self.opt.nEpochs, steps_per_epoch=len(self.x)//5, callbacks=callbacks_list, verbose=0);

    def GetLR(self, epoch):
        divide_epoch = np.array([self.opt.nEpochs * i for i in self.opt.schedule]);
        decay = sum(epoch > divide_epoch);
        if epoch <= self.opt.warmup:
            decay = 1;
        return self.opt.LR * np.power(0.1, decay);

class CustomCallback(keras.callbacks.Callback):
    def __init__(self, opt):
        self.opt = opt;
        self.testX = None;
        self.testY = None;
        self.curEpoch = 0;
        self.curLr = opt.LR;
        self.cur_epoch_start_time = time.time();
        self.bestAcc = 0.0;
        self.bestAccEpoch = 0;

    def on_epoch_begin(self, epoch, logs=None):
        self.curEpoch = epoch+1;
        self.curLr = Trainer(self.opt).GetLR(epoch+1);
        self.cur_epoch_start_time = time.time();

    def on_epoch_end(self, epoch, logs=None):
        train_time = time.time() - self.cur_epoch_start_time;
        self.load_test_data();
        val_acc, val_loss = self.validate(self.model);
        logs['val_acc'] = val_acc;
        logs['val_loss'] = val_loss;
        if val_acc > self.bestAcc:
            self.bestAcc = val_acc;
            self.bestAccEpoch = epoch + 1;
        epoch_time = time.time() - self.cur_epoch_start_time;
        val_time = epoch_time - train_time;
        # print(logs);
        line = 'SP-{}, Epoch: {}/{} | Time: {} (Train {}  Val {}) | Train: LR {}  Loss {:.2f}  Acc {:.2f}% | Val: Loss {:.2f}  Acc(top1) {:.2f}% | HA {:.2f}@{}\n'.format(
            self.opt.split, epoch+1, self.opt.nEpochs, to_hms(epoch_time), to_hms(train_time), to_hms(val_time),
            self.curLr, logs['loss'], logs['accuracy'] if 'accuracy' in logs else logs['acc'], val_loss, val_acc, self.bestAcc, self.bestAccEpoch);
        # print(line)
        sys.stdout.write(line);
        sys.stdout.flush();

    def load_test_data(self):
        if self.testX is None:
            data = np.load(test_data, allow_pickle=True);
            self.testX = data['x'];
            self.testY = data['y'];

    def validate(self, model):
        y_pred = None;
        y_target = None;
        batch_size = (self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops;
        for batchIndex in range(math.ceil(len(self.testX) / batch_size)):
            x = self.testX[batchIndex*batch_size : (batchIndex+1)*batch_size];
            y = self.testY[batchIndex*batch_size : (batchIndex+1)*batch_size];
            scores = model.predict(x, batch_size=len(y), verbose=0);
            y_pred = scores if y_pred is None else np.concatenate((y_pred, scores));
            y_target = y if y_target is None else np.concatenate((y_target, y));
            #break;

        acc, loss = self.compute_accuracy(y_pred, y_target);
        return acc, loss;

    #Calculating average prediction (10 crops) and final accuracy
    def compute_accuracy(self, y_pred, y_target):
        #Reshape y_pred to shape it like each sample comtains 10 samples.
        if self.opt.nCrops > 1:
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(axis=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(axis=1);

        loss = keras.losses.KLD(y_target, y_pred).numpy().mean();

        #Get the indices that has highest average value for each sample
        y_pred = y_pred.argmax(axis=1);
        y_target = y_target.argmax(axis=1);
        accuracy = (y_pred==y_target).mean()*100;

        return accuracy, loss;

In [133]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='ACDNet_TL_Model_Extend',  required=False);
    parser.add_argument('--data', default='../datasets/processed/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args();
    
    #Leqarning settings
    opt.batchSize = 5;
    opt.LR = 0.1;
    opt.weightDecay = 5e-2#9e-3;#5e-3;#5e-2;#1e-2;#5e-4;
    opt.momentum = 0.09;
    opt.nEpochs = 20;
    opt.schedule = [0.3, 0.6, 0.9];
    opt.warmup = 10;
    # if torch.backends.mps.is_available():
    #     opt.device="mps"; #for apple m2 gpu
    # elif torch.cuda.is_available():
    #     opt.device="cuda:0"; #for nVidia gpu
    # else:
    opt.device="cpu"
    print(f"***Use device:{opt.device}");
    # opt.device = torch.device("cuda:0" if  else "cpu");
    #Basic Net Settings
    opt.nClasses = 2#50;
    opt.nFolds = 1;
    opt.splits = [i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    opt.TLAcdnetConfig = [8,64,32,64,64,128,128,256,256,512,512,2];
    return opt
    # opt = parser.parse_args();

In [134]:
def main():
    map_dict_train = {
        52:1, #alarm
        99:2, #other_sounds
    };
    opt = getOpts()
    opt.model_name = "retrain_pruned_tfmodel_cp_weights"
    opt.sr = 20000;
    opt.inputLength = 30225;
    opt.trainer = None
    opt.trainData="../datasets/forOneClassModel_alarm/train/trainSet_20240119002902.npz";
    opt.testData="../datasets/forOneClassModel_alarm/test_val/final_val_test_npz/final_valSet_20240119004614.npz";
    trainer = Trainer(net=tf_net, opt=opt,classes_dict=map_dict_train)
    trainer.Train()

In [135]:
main()

***Use device:cpu


AxisError: axis 3 is out of bounds for array of dimension 3

In [45]:
dt = np.load(train_data, allow_pickle=True);


In [48]:
# print(dt['fold1'].item()['sounds'])
listdt = [list(dt['fold1'].item()['sounds'][i]) for i in range(len(dt['fold1'].item()['sounds']))]

In [81]:
s = random_scale(1.25)

In [84]:
s(np.asarray(listdt[0]))

ref: [0.00000000e+00 1.23348739e+00 2.46697479e+00 ... 9.99951225e+04
 9.99963560e+04 9.99975895e+04]


array([-100, -177, -185, ...,  134, -239,  243], dtype=int16)