In [3]:
import sys;
import os;
import glob;
import math;
import random;
import torch;
import torch.nn as nn;
import numpy as np
import copy

In [4]:
print(torch.__version__)

2.1.1+cu121


In [5]:
import common.opts as opts;
# import th.resources.models as models;
import th.resources.calculator as calc;
# import resources.train_generator as train_generator;

In [6]:
import argparse
# import common.tlopts as tlopts

In [7]:
import common.utils as U;
from torch.quantization import QuantStub, DeQuantStub
from tinynn.converter import TFLiteConverter

In [8]:
from datetime import datetime;

In [9]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;

In [10]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

In [11]:
"""
[8,64,32,64,64,128,128,256,256,512,512,2]
"""

'\n[8,64,32,64,64,128,128,256,256,512,512,2]\n'

In [24]:
class Customed_ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(Customed_ACDNetV2, self).__init__();
        self.input_length = input_length;
        self.ch_config = ch_conf;

        stride1 = 2;
        stride2 = 2;
        channels = 8;
        k_size = (3, 3);
        n_frames = (sr/1000)*10; #No of frames per 10ms

        sfeb_pool_size = int(n_frames/(stride1*stride2));
        # tfeb_pool_size = (2,2);
        if self.ch_config is None:
            self.ch_config = [channels, channels*8, channels*4, channels*8, channels*8, channels*16, channels*16, channels*32, channels*32, channels*64, channels*64, n_class];
        # avg_pool_kernel_size = (1,4) if self.ch_config[1] < 64 else (2,4);
        fcn_no_of_inputs =  n_class #self.ch_config[-1];
        ch_confing_10 = 512 #8 * 64
        ch_n_class = n_class
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1));
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2));
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1);
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1);
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1);
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1);
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1);
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1);
        conv9, bn9 = self.make_layers(self.ch_config[7], self.ch_config[8], k_size, padding=1);
        conv10, bn10 = self.make_layers(self.ch_config[8], self.ch_config[9], k_size, padding=1);
        conv11, bn11 = self.make_layers(self.ch_config[9], self.ch_config[10], k_size, padding=1);
        conv12, bn12 = self.make_layers(ch_confing_10, ch_n_class, (1, 1));
        fcn = nn.Linear(fcn_no_of_inputs, ch_n_class);
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid') # kaiming with sigoid is equivalent to lecun_normal in keras

        self.sfeb = nn.Sequential(
            #Start: Filter bank
            conv1, bn1, nn.ReLU(),\
            conv2, bn2, nn.ReLU(),\
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        );

        tfeb_modules = [];
        self.tfeb_width = int(((self.input_length / sr)*1000)/10); # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width);
        p_index = 0;
        for i in [3,4,6,8,10]:
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()]);

            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i+1)), eval('bn{}'.format(i+1)), nn.ReLU()]);

            h, w = tfeb_pool_sizes[p_index];
            if h>1 or w>1:
                tfeb_modules.append(nn.MaxPool2d(kernel_size = (h,w)));
            p_index += 1;

        tfeb_modules.append(nn.Dropout(0.2));
        tfeb_modules.extend([conv12, bn12, nn.ReLU()]);
        h, w = tfeb_pool_sizes[-1];
        if h>1 or w>1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size = (2,4)));
        tfeb_modules.extend([nn.Flatten(), fcn]);

        self.tfeb = nn.Sequential(*tfeb_modules);

        self.output = nn.Sequential(
            nn.Softmax(dim=1)
        );
        

    def forward(self, x):
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1,1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias);
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu'); # kaiming with relu is equivalent to he_normal in keras
        bn = nn.BatchNorm2d(out_channels);
        return conv, bn;

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch);
        w = self.get_tfeb_pool_size_component(width);
        # print(w);
        pool_size = [];
        for  (h1, w1) in zip(h, w):
            pool_size.append((h1, w1));
        return pool_size;

    def get_tfeb_pool_size_component(self, length):
        # print(length);
        c = [];
        index = 1;
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length);
                else:
                    c.append(2);
                    length = length // 2;
            else:
               c.append(1);

            index += 1;

        return c;

def GetCustomedACDNetModel(input_len=30225, nclass=2, sr=20000, channel_config=None):
    net = Customed_ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

In [2]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = dict([(52,1),(99,2)])

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));
        #return len(self.samples);

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            sound1 = self.preprocess(sound1)
            sound2 = self.preprocess(sound2)

            # Mix two examples
            r = np.array(random.random())
            sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = self.mapdict[label1]- 1
            idx2 = self.mapdict[label2] - 1
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = U.random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        # print(f"labels in generate_batch is:\n{labels}")

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength),
                  U.normalize(32768.0)]
        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [26]:
def getTrainGen(opt=None, split=None):
    # dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.sr // 1000)), allow_pickle=True);
    # dataset = np.load("../datasets/fold1_test16000.npz", allow_pickle=True);
    dataset = np.load("./datasets/forOneClassModel_alarm/train_test_npz/trainData_20240108153111.npz", allow_pickle=True);
    train_sounds = []
    train_labels = []
    # print(len(dataset['x']))
    # for i in range(1, opt.nFolds + 1):

    # train_sounds = [dataset['x'][i][0] for i in range(len(dataset['x']))]
    # train_labels = [dataset['y'][i][0] for i in range(len(dataset['y']))]
    train_sounds = dataset['fold{}'.format(1)].item()['sounds']
    train_labels = dataset['fold{}'.format(1)].item()['labels']
    # print(train_sounds)

    trainGen = TLGenerator(train_sounds, train_labels, opt);
    return trainGen

In [37]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='TLACDNet',  required=False);
    parser.add_argument('--data', default='./datasets/forOneClassModel_alarm/train_test_npz/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args()
    #Leqarning settings
    opt.batchSize = 32;
    opt.weightDecay = 5e-3;
    opt.momentum = 0.09;
    opt.nEpochs = 800;#2000;
    opt.LR = 0.1;
    opt.schedule = [0.3, 0.6, 0.9];
    opt.warmup = 10;

    #Basic Net Settings
    opt.nClasses = 2#50;
    opt.nFolds = 1;#5;
    opt.split = 1#[i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    opt.ch_config = [8,64,32,64,64,128,128,256,256,512,512,2];
    return opt

In [12]:
class Trainer:
    def __init__(self, opt=None, split=0):
        self.opt = opt;
        self.testX = None;
        self.testY = None;
        self.trainX = None;
        self.trainY = None;
        # self.opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
        self.opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
        self.trainGen = getTrainGen(self.opt)#train_generator.setup(self.opt, self.opt.split);
        self.qunt_nClass = 2;

    def load_train_data(self):
        print('Preparing calibration dataset..');
        x,y = self.trainGen.__getitem__(0);
        self.trainX = torch.tensor(np.moveaxis(x, 3, 1)).to(self.opt.device);
        """
        trainX size:torch.Size([1, 1, 30225]), but must be [1,1,1,30225]
        Due to the reason: raise ValueError("Input shape must be `(N, C, H, W)`!")
        """
        # print(f"trainX[0] shape:{self.trainX[0].shape}")
        self.trainY = torch.tensor(y).to(self.opt.device);
        print('Calibration dataset is ready');
        # self.opt.batchSize = 32;

    def load_test_data(self):
        if(self.testX is None):
            data = np.load('./datasets/forOneClassModel_alarm/train_test_npz/compressed_val_npz_20240109163736.npz', allow_pickle=True);
            dataX = np.moveaxis(data['x'], 3, 1).astype(np.float32);
            self.testX = torch.tensor(dataX).to("cpu");
            self.testY = torch.tensor(data['y']).to("cpu");
            # self.testX = torch.tensor(dataX).to(self.opt.device);
            # self.testY = torch.tensor(data['y']).to(self.opt.device);

    def __validate(self, net, testX, testY):
        net.eval();
        with torch.no_grad():
            y_pred = None;
            batch_size = len(self.testX);
            x = self.testX[:];
            
            # batch_size = (self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops;
            # for idx in range(math.ceil(len(self.testX)/batch_size)):
            #     x = self.testX[idx*batch_size : (idx+1)*batch_size];
            #     #print(x.shape);
            #     # exit();
            #     scores = net(x);
            #     y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
            # print(f"type of x is {type(x)}")
            x = x.to("cpu");
            scores = net(x);
            y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
            y_pred = y_pred.to("cpu")
            acc = self.__compute_accuracy(y_pred, self.testY);
        return acc;

    #Calculating average prediction (10 crops) and final accuracy
    def __compute_accuracy(self, y_pred, y_target):
        print(y_pred.shape);
        with torch.no_grad():
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1);

            y_pred = y_pred.argmax(dim=1);
            y_target = y_target.argmax(dim=1);

            acc = (((y_pred==y_target)*1).float().mean()*100).item();
        return acc;

    def __load_model(self, quant=False):
        state = torch.load(self.opt.model_path, map_location=self.opt.device);
        
        net = None;
        if quant:
            net = models.GetACDNetQuantModel(input_len=self.opt.inputLength, nclass=self.qunt_nClass, sr=self.opt.sr, channel_config=state['config']).to(self.opt.device);
        else:
            # net = models.GetACDNetModel(input_len=self.opt.inputLength, nclass=self.qunt_nClass, sr=self.opt.sr, channel_config=self.opt.ch_config).to(self.opt.device);
            net = GetCustomedACDNetModel()#GetTLACDNet(opt=self.opt);
        calc.summary(net, (1,1,self.opt.inputLength));
        net.load_state_dict(state['weight']);
        return net;

    def __calibrate(self, net):
        self.load_train_data();
        net.eval();
        with torch.no_grad():
            for i in range(1,2):
                x_pred = None;
                for idx in range(math.ceil(len(self.trainX)/self.opt.batchSize)):
                    x = self.trainX[idx*self.opt.batchSize : (idx+1)*self.opt.batchSize];
                    #print(x.shape);
                    # exit();
                    scores = net(x);
                    x_pred = scores.data if x_pred is None else torch.cat((x_pred, scores.data));

                x_pred = x_pred.argmax(dim=1);
                x_target = self.trainY.argmax(dim=1);

                acc = (((x_pred==x_target)*1).float().mean()*100).item();
                print('calibrate accuracy is: {:.2f}'.format(acc));
        return acc;

    def QuantizeModel(self):
        # net = self.__load_model(True);
        net = self.__load_model(False);
        config = net.ch_config;
        net.eval();

        #Fuse modules to
        torch.quantization.fuse_modules(net.sfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.sfeb, ['3','4','5'], inplace=True);

        torch.quantization.fuse_modules(net.tfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['4','5','6'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['7','8','9'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['11','12','13'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['14','15','16'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['18','19','20'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['21','22','23'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['25','26','27'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['28','29','30'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['33','34','35'], inplace=True);

        # Specify quantization configuration
        net.qconfig = torch.quantization.get_default_qconfig('qnnpack');
        torch.backends.quantized.engine = 'qnnpack';
        print(net.qconfig);

        torch.quantization.prepare(net, inplace=True);

        # Calibrate with the training data
        self.__calibrate(net);

        # Convert to quantized model
        torch.quantization.convert(net, inplace=True);
        print('Post Training Quantization: Convert done');

        print("Size of model after quantization");
        torch.save(net.state_dict(), "temp.p")
        print('Size (MB):', os.path.getsize("temp.p")/1e6)
        os.remove('temp.p')
        # dummy_input = torch.randn(1, 1, 30225, 1); wrong: RuntimeError: quantized::conv2d (qnnpack): each dimension of output tensor should be greater than 0.
        dummy_input = torch.randn(1, 1, 1, 30225); #correct,workable
        # dummy_input = torch.randn(30225,1,1,1); wrong: RuntimeError: quantized::conv2d (qnnpack): each dimension of output tensor should be greater than 0.
        # dummy_input = torch.randn(1,30225,1,1); wrong:RuntimeError: Input channel size of weight and bias must match.
        converter = TFLiteConverter(net,
                                    dummy_input,
                                    tflite_path="./th/quant_only_models/{}.tflite".format(self.opt.model_name))
        converter.convert()
        self.load_test_data();
        val_acc = self.__validate(net, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));

        torch.jit.save(torch.jit.script(net), '{}/th/quantized_models/{}.pt'.format(os.getcwd(), self.opt.model_name.format()));
        # net.cpu();
        # net.eval();
        
        
    def TestModel(self, quant=False):
        if quant:
            net = torch.jit.load(os.getcwd() + '/th/quantized_models/' + self.opt.model_name + '.pt')
        else:
            net = self.__load_model();
            # calc.summary(net, (1,1,self.opt.inputLength));
        self.load_test_data();
        net.eval();
        val_acc = self.__validate(net, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));

    def GetModelSize(self):
        orig_net_path = self.opt.model_path;
        print('Full precision model size (KB):', os.path.getsize(orig_net_path)/(1024));
        save_onnx_name = "20khz_quant_only_acc_96.55_{}.onnx".format(genDataTimeStr());
        quant_net_path = os.getcwd()+"/"+save_onnx_name;
        print('Quantized model size (KB):', os.path.getsize(quant_net_path)/(1024))

SyntaxError: invalid syntax (2303418543.py, line 78)

In [13]:
# to_convert_model_path = "./th/pruned_models/second_stage_pruned_models/magnitude_pruning/acdnet_tl_hybrid_pruning_magnitude_model_202312281149_80.pt"

In [64]:
def main():
    opt = getOpts();#opts.parse();
    opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
    opt.model_path = "./th/trained_models/current_best/acdnet_alarm_20240109012514_acc_96.55171966552734_278th_epoch.pt"
    # valid_path = False;
    # while not valid_path:
    #     model_path = input("Enter the model PATH for 8-bit post training quantization\n:");
    #     file_paths = glob.glob(os.path.join(os.getcwd(), model_path));
    #     if len(file_paths)>0 and os.path.isfile(file_paths[0]):
    #         state = torch.load(file_paths[0], map_location='cpu');
    #         opt.model_path = file_paths[0];
    #         print('Model has been found at: {}'.format(opt.model_path));
    #         valid_path = True;

    opt.model_name = "model_96_55acc_no_pruning_{}".format(genDataTimeStr());
    # valid_model_name = False;
    # while not valid_model_name:
    #     model_name = input('Enter a name that will be used to save the quantized model model: ');
    #     if model_name != '':
    #         opt.model_name = model_name;
    #         valid_model_name = True;
    opt.split = 1;
    opt.hasQuated = False;
    trainer = Trainer(opt);

    print('Testing performance of the provided model.....');
    trainer.TestModel();

    print('Quantization process is started.....');
    trainer.QuantizeModel();
    print('Quantization done');

    print('Testing quantized model.');
    trainer.TestModel(True);
    print('Finished');

In [65]:
main()

length of samples:167
Testing performance of the provided model.....
+----------------------------------------------------------------------------+
+                           Pytorch Model Summary                            +
------------------------------------------------------------------------------
   Layer (type)       Input Shape      Output Shape    Param #      FLOPS #
       Conv2d-1     (1, 1, 30225)     (8, 1, 15109)         72    1,087,848
  BatchNorm2d-2     (8, 1, 15109)     (8, 1, 15109)         16            0
         ReLu-3     (8, 1, 15109)     (8, 1, 15109)          0      120,872
       Conv2d-4     (8, 1, 15109)     (64, 1, 7553)      2,560   19,335,680
  BatchNorm2d-5     (64, 1, 7553)     (64, 1, 7553)        128            0
         ReLu-6     (64, 1, 7553)     (64, 1, 7553)          0      483,392
    MaxPool2d-7     (64, 1, 7553)      (64, 1, 151)          0      483,200
      Permute-8      (64, 1, 151)      (1, 64, 151)          0            0
       Con

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [None]:
# opt2 = getOpts();#opts.parse();
# state2 = torch.load(to_convert_model_path, map_location="cuda:0");
# tmpnet = models.GetACDNetModel(input_len=opt2.inputLength, nclass=6, sr=20000, channel_config=state2['config']).to("cuda:0");
# tmpnet.load_state_dict(state2['weight']);
# print(tmpnet)

In [33]:
# quantnet = torch.jit.load('./th/quantized_models/acdnet_tl_quant_model_202312281348_80.pt')
# print(quantnet)