In [13]:
import sys;
import os;
import glob;
import math;
import random;
import torch;
import torch.nn as nn;
import numpy as np
import copy

In [14]:
print(torch.__version__)

2.1.2


In [15]:
sys.path.append(os.path.abspath("../"))

In [16]:
import common.opts as opts;
import th.resources.models as models;
import th.resources.calculator as calc;
# import resources.train_generator as train_generator;

In [17]:
import argparse
# import common.tlopts as tlopts

In [18]:
import common.utils as U;
from torch.quantization import QuantStub, DeQuantStub
from tinynn.converter import TFLiteConverter
from tinynn.graph.quantization.quantizer import PostQuantizer
from tinynn.graph.tracer import model_tracer
from tinynn.util.train_util import DLContext, get_device
from tinynn.graph.quantization.algorithm.cross_layer_equalization import cross_layer_equalize
from tinynn.converter import TFLiteConverter

In [19]:
from datetime import datetime;

In [20]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;

In [21]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

In [22]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = dict([(52,1),(99,2)])

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));
        #return len(self.samples);

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            sound1 = self.preprocess(sound1)
            sound2 = self.preprocess(sound2)

            # Mix two examples
            r = np.array(random.random())
            sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = self.mapdict[label1]- 1
            idx2 = self.mapdict[label2] - 1
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = U.random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        # print(f"labels in generate_batch is:\n{labels}")

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength),
                  U.normalize(32768.0)]
        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [23]:
def getTrainGen(opt=None, split=None):
    # dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.sr // 1000)), allow_pickle=True);
    # dataset = np.load("../datasets/fold1_test16000.npz", allow_pickle=True);
    dataset = np.load("../datasets/forOneClassModel_alarm/train/trainSet_20240119002902.npz", allow_pickle=True);
    train_sounds = []
    train_labels = []
    # print(len(dataset['x']))
    # for i in range(1, opt.nFolds + 1):

    # train_sounds = [dataset['x'][i][0] for i in range(len(dataset['x']))]
    # train_labels = [dataset['y'][i][0] for i in range(len(dataset['y']))]
    train_sounds = dataset['fold{}'.format(1)].item()['sounds']
    train_labels = dataset['fold{}'.format(1)].item()['labels']
    # print(train_sounds)

    trainGen = TLGenerator(train_sounds, train_labels, opt);
    return trainGen

In [24]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='TLACDNet',  required=False);
    parser.add_argument('--data', default='./datasets/processed/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args()
    #Leqarning settings
    opt.batchSize = 32;
    opt.weightDecay = 5e-3;
    opt.momentum = 0.09;
    opt.nEpochs = 800;#2000;
    opt.LR = 0.1;
    opt.schedule = [0.3, 0.6, 0.9];
    opt.warmup = 10;

    #Basic Net Settings
    opt.nClasses = 2#50;
    opt.nFolds = 1;#5;
    opt.split = 1#[i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    opt.ch_config = [8,64,32,64,64,128,128,256,256,512,512,2];
    return opt

In [51]:
class Trainer:
    def __init__(self, opt=None, split=0):
        self.opt = opt;
        self.testX = None;
        self.testY = None;
        self.trainX = None;
        self.trainY = None;
        # self.opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
        self.opt.device = torch.device("cpu")
        self.trainGen = getTrainGen(self.opt)#train_generator.setup(self.opt, self.opt.split);
        self.qunt_nClass = 2;

    def load_train_data(self):
        print('Preparing calibration dataset..');
        x,y = self.trainGen.__getitem__(0);
        self.trainX = torch.tensor(np.moveaxis(x, 3, 1)).to(self.opt.device);
        """
        trainX size:torch.Size([1, 1, 30225]), but must be [1,1,1,30225]
        Due to the reason: raise ValueError("Input shape must be `(N, C, H, W)`!")
        """
        # print(f"trainX[0] shape:{self.trainX[0].shape}")
        self.trainY = torch.tensor(y).to(self.opt.device);
        print('Calibration dataset is ready');
        # self.opt.batchSize = 32;

    def load_test_data(self):
        if(self.testX is None):
            data = np.load('../datasets/forOneClassModel_alarm/test_val/final_val_test_npz/final_valSet_20240119004614.npz', allow_pickle=True);
            dataX = np.moveaxis(data['x'], 3, 1).astype(np.float32);
            self.testX = torch.tensor(dataX).to(self.opt.device);
            self.testY = torch.tensor(data['y']).to(self.opt.device);

    def __validate(self, net, testX, testY):
        net.eval();
        with torch.no_grad():
            y_pred = None;
            batch_size = len(self.testX);
            x = self.testX[:];
            
            # batch_size = (self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops;
            # for idx in range(math.ceil(len(self.testX)/batch_size)):
            #     x = self.testX[idx*batch_size : (idx+1)*batch_size];
            #     #print(x.shape);
            #     # exit();
            #     scores = net(x);
            #     y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
            scores = net(x);
            y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
            acc = self.__compute_accuracy(y_pred, self.testY);
        return acc;

    #Calculating average prediction (10 crops) and final accuracy
    def __compute_accuracy(self, y_pred, y_target):
        print(y_pred.shape);
        with torch.no_grad():
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1);

            y_pred = y_pred.argmax(dim=1);
            y_target = y_target.argmax(dim=1);

            acc = (((y_pred==y_target)*1).float().mean()*100).item();
        return acc;

    def __load_model(self, quant=False):
        state = torch.load(self.opt.model_path, map_location=self.opt.device);
        print(state['config']);
        net = None;
        # if quant:
        net = models.GetACDNetQuantModel(input_len=self.opt.inputLength, nclass=self.qunt_nClass, sr=self.opt.sr, channel_config=state['config']).to(self.opt.device);
        # else:
            # net = models.GetACDNetModel(input_len=self.opt.inputLength, nclass=self.qunt_nClass, sr=self.opt.sr, channel_config=state['config']).to(self.opt.device);
            # net = GetTLACDNet(opt=self.opt);
        calc.summary(net, (1,1,self.opt.inputLength));
        net.load_state_dict(state['weight']);
        return net;

    def __calibrate(self, net):
        self.load_train_data();
        net.eval();
        with torch.no_grad():
            for i in range(1,2):
                x_pred = None;
                for idx in range(math.ceil(len(self.trainX)/self.opt.batchSize)):
                    x = self.trainX[idx*self.opt.batchSize : (idx+1)*self.opt.batchSize];
                    #print(x.shape);
                    # exit();
                    scores = net(x);
                    x_pred = scores.data if x_pred is None else torch.cat((x_pred, scores.data));

                x_pred = x_pred.argmax(dim=1);
                x_target = self.trainY.argmax(dim=1);

                acc = (((x_pred==x_target)*1).float().mean()*100).item();
                print('calibrate accuracy is: {:.2f}'.format(acc));
        return acc;

    def QuantizeModel(self):
        net = self.__load_model(True);
        # net = self.__load_model(False);
        config = net.ch_config;
        net.eval();

        #Fuse modules to
        torch.quantization.fuse_modules(net.sfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.sfeb, ['3','4','5'], inplace=True);

        torch.quantization.fuse_modules(net.tfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['4','5','6'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['7','8','9'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['11','12','13'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['14','15','16'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['18','19','20'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['21','22','23'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['25','26','27'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['28','29','30'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['33','34','35'], inplace=True);

        # Specify quantization configuration
        net.qconfig = torch.quantization.get_default_qconfig('qnnpack');
        torch.backends.quantized.engine = 'qnnpack';
        print(net.qconfig);
        torch.quantization.prepare(net, inplace=True);
        
        # Calibrate with the training data
        self.__calibrate(net);

        # Convert to quantized model
        torch.quantization.convert(net, inplace=True);
        print('Post Training Quantization: Convert done');

        print("Size of model after quantization");
        torch.save(net.state_dict(), "temp.p")
        print('Size (MB):', os.path.getsize("temp.p")/1e6)
        os.remove('temp.p')
        
        self.load_test_data();
        val_acc = self.__validate(net, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));

        # torch.jit.save(torch.jit.script(net), '{}/th/quantized_models/{}.pt'.format(os.getcwd(), self.opt.model_name.format()));
        torch.jit.save(torch.jit.script(net), '../th/quantized_models/{}.pt'.format(self.opt.model_name.format()));
 
        # **************convert to tflite**********
        with torch.no_grad():
            # dummy_input = torch.randn(1, 1, 30225, 1); wrong: RuntimeError: quantized::conv2d (qnnpack): each dimension of output tensor should be greater than 0.
            dummy_input = torch.randn(1, 1, 1, 30225); #correct,workable
            # dummy_input = torch.randn(30225,1,1,1); wrong: RuntimeError: quantized::conv2d (qnnpack): each dimension of output tensor should be greater than 0.
            # dummy_input = torch.randn(1,30225,1,1); wrong:RuntimeError: Input channel size of weight and bias must match.
            converter = TFLiteConverter(net,
                                        dummy_input,
                                        # quantize_input_output_type='int8',#設定此欄，輸入會強制為int8
                                        # fuse_quant_dequant=True,
                                        quantize_target_type='int8',
                                        hybrid_conv=False,
                                        float16_quantization=True,
                                        tflite_path="../th/quantized_models/{}.tflite".format(self.opt.model_name))
            converter.convert()

        print(net.state_dict())
            # qmodel = copy.deepcopy(mynn)
            # torch.quantization.convert(qmodel, inplace=False)
            #
            # torch.backends.quantized.engine = 'qnnpack'
            # converter = TFLiteConverter(qmodel.module,
            #                             torch.randn(1, 64, nn_h, nn_w,
            #                             tflite_path="qmodel.tflite")
            # converter.convert()
        
        
        
        
    def TestModel(self, quant=False):
        if quant:
            net = torch.jit.load('../th/quantized_models/{}.pt'.format(self.opt.model_name))
        else:
            net = self.__load_model();
            # calc.summary(net, (1,1,self.opt.inputLength));
        self.load_test_data();
        net.eval();
        val_acc = self.__validate(net, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));

    def GetModelSize(self):
        orig_net_path = self.opt.model_path;
        print('Full precision model size (KB):', os.path.getsize(orig_net_path)/(1024));
        save_onnx_name = "{}.onnx".format(self.opt.model_name);
        quant_net_path = "../th/onnx_models/"+save_onnx_name;
        print('Quantized model size (KB):', os.path.getsize(quant_net_path)/(1024))

In [52]:
# to_convert_model_path = "./th/pruned_models/second_stage_pruned_models/magnitude_pruning/acdnet_tl_hybrid_pruning_magnitude_model_202312281149_80.pt"

In [53]:
def main():
    opt = getOpts();#opts.parse();
    opt.device = 'cpu';
    opt.model_path = "./retrained_models_after_pruned/retrained_model_20240124123209_acc_95.45455169677734_795th_epoch.pt"
    # valid_path = False;
    # while not valid_path:
    #     model_path = input("Enter the model PATH for 8-bit post training quantization\n:");
    #     file_paths = glob.glob(os.path.join(os.getcwd(), model_path));
    #     if len(file_paths)>0 and os.path.isfile(file_paths[0]):
    #         state = torch.load(file_paths[0], map_location='cpu');
    #         opt.model_path = file_paths[0];
    #         print('Model has been found at: {}'.format(opt.model_path));
    #         valid_path = True;

    opt.model_name = "quant_retrained_model_95.4_{}".format(genDataTimeStr());
    # valid_model_name = False;
    # while not valid_model_name:
    #     model_name = input('Enter a name that will be used to save the quantized model model: ');
    #     if model_name != '':
    #         opt.model_name = model_name;
    #         valid_model_name = True;
    opt.split = 1;
    opt.hasQuated = False;
    trainer = Trainer(opt);

    print('Testing performance of the provided model.....');
    trainer.TestModel();

    print('Quantization process is started.....');
    trainer.QuantizeModel();
    print('Quantization done');

    print('Testing quantized model.');
    trainer.TestModel(True);
    print('Finished');

In [54]:
main()

length of samples:325
Testing performance of the provided model.....
[5, 32, 9, 16, 23, 33, 29, 56, 47, 65, 90, 2]
+----------------------------------------------------------------------------+
+                           Pytorch Model Summary                            +
------------------------------------------------------------------------------
   Layer (type)       Input Shape      Output Shape    Param #      FLOPS #
       Conv2d-1     (1, 1, 30225)     (5, 1, 15109)         45      679,905
  BatchNorm2d-2     (5, 1, 15109)     (5, 1, 15109)         10            0
         ReLu-3     (5, 1, 15109)     (5, 1, 15109)          0       75,545
       Conv2d-4     (5, 1, 15109)     (32, 1, 7553)        800    6,042,400
  BatchNorm2d-5     (32, 1, 7553)     (32, 1, 7553)         64            0
         ReLu-6     (32, 1, 7553)     (32, 1, 7553)          0      241,696
    MaxPool2d-7     (32, 1, 7553)      (32, 1, 151)          0      241,600
      Permute-8      (32, 1, 151)      (

INFO (tinynn.converter.base) Generated model saved to ../th/quantized_models/quant_retrained_model_95.4_20240130035114.tflite


ACDNetQuant(
  (sfeb): Sequential(
    (0): QuantizedConvReLU2d(1, 5, kernel_size=(1, 9), stride=(1, 2), scale=0.027175312861800194, zero_point=0)
    (1): Identity()
    (2): Identity()
    (3): QuantizedConvReLU2d(5, 32, kernel_size=(1, 5), stride=(1, 2), scale=0.02924538217484951, zero_point=0)
    (4): Identity()
    (5): Identity()
    (6): MaxPool2d(kernel_size=(1, 50), stride=(1, 50), padding=0, dilation=1, ceil_mode=False)
  )
  (tfeb): Sequential(
    (0): QuantizedConvReLU2d(1, 9, kernel_size=(3, 3), stride=(1, 1), scale=0.01393138151615858, zero_point=0, padding=(1, 1))
    (1): Identity()
    (2): Identity()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): QuantizedConvReLU2d(9, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.009488196112215519, zero_point=0, padding=(1, 1))
    (5): Identity()
    (6): Identity()
    (7): QuantizedConvReLU2d(16, 23, kernel_size=(3, 3), stride=(1, 1), scale=0.007174537051469088, zero_poin

In [23]:
# opt2 = getOpts();#opts.parse();
# state2 = torch.load(to_convert_model_path, map_location="cuda:0");
# tmpnet = models.GetACDNetModel(input_len=opt2.inputLength, nclass=6, sr=20000, channel_config=state2['config']).to("cuda:0");
# tmpnet.load_state_dict(state2['weight']);
# print(tmpnet)

## Convert tflite to C array

In [24]:
# MODEL_TFLITE = os.path.join(MODELS_DIR, 'model.tflite')
# FLOAT_MODEL_TFLITE = os.path.join(MODELS_DIR, 'float_model.tflite')
# MODEL_TFLITE_MICRO = os.path.join(MODELS_DIR, 'model.cc')
tflite_model = "../th/quantized_models/quant_retrained_model_95.4_20240125130552.tflite"
target_c_file ="../th/Model_C_Files/quant_from_retrain_95.4_240125120552.c"

In [25]:
# !apt-get update && apt-get -qq install xxd

In [26]:
# !apt-get update && apt-get -qq install xxd

In [27]:
!xxd -i {tflite_model} > {target_c_file}

In [28]:
# Update variable names
REPLACE_TEXT = "uec_iot_model_alarm";#MODEL_TFLITE.replace('/', '_').replace('.', '_')
!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {target_c_file}