## QAT過程問題集
- can't convert float NaN (actually 0.00000) to int:
  - 與weight-decay設定值可能有關，設定太大倒導致錯誤。
  - 可能是同時開啟三個訓練程式造成記憶體不足造成。


In [1]:
import sys;
import os;
import glob;
import math;
import random;
import torch;
import torch.optim as optim;
import torch.nn as nn;
import numpy as np
import copy

In [2]:
print(torch.__version__)

2.1.2


In [3]:
sys.path.append("../../../")
sys.path.append("../../")

In [4]:
import common.opts as opts;
import common.utils as U;
# import th.resources.models as models;
import th.resources.no_softmax_quant_model as models;
import th.resources.calculator as calc;
from SharedLibs.datestring import getDateStr, genDataTimeStr;

In [5]:
import argparse
import time

In [6]:
from torch.quantization import QuantStub, DeQuantStub
from tinynn.converter import TFLiteConverter
from tinynn.graph.quantization.quantizer import PostQuantizer
from tinynn.graph.tracer import model_tracer
from tinynn.util.train_util import DLContext, get_device
from tinynn.graph.quantization.algorithm.cross_layer_equalization import cross_layer_equalize
from tinynn.converter import TFLiteConverter

In [7]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;

In [8]:
mask8 = 0x4000 # >> 8 : 16384
mask7 = 0x2000 # >> 7 :  8192
mask6 = 0x1000 # >> 6 :  4096
mask5 = 0x0800 # >> 5 :  2048
mask4 = 0x0400 # >> 4 :  1024
mask3 = 0x0200 # >> 3 :   512
mask2 = 0x0100 # >> 2 :   256
mask1 = 0x0080 # >> 1 :   128
mask0 = 0x0040 # >> 0 :    64 below the value, drop the value

In [9]:
def maskOP(x):
    x = np.int16(x)
    # print(f"begin:x:{x}")
    if (mask8&x):
        return x >> 8
    elif (mask7&x):
        return x >> 7
    elif (mask6&x):
        return x >> 6
    elif (mask5&x):
        return x >> 5
    elif (mask4&x):
        return x >> 4
    elif (mask3&x):
        return x >> 3
    elif (mask2&x):
        return x >> 2
    elif (mask1&x):
        return x >> 1
    elif (mask0&x):
        return x
    else:
        return 0;

In [10]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = dict([('52',1),('56',2),('99',3)])

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            sound1 = self.preprocess(sound1)
            sound2 = self.preprocess(sound2)

            # Mix two examples
            r = np.array(random.random())
            sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = self.mapdict[str(label1)]- 1
            idx2 = self.mapdict[str(label2)] - 1
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = U.random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        # print(f"labels in generate_batch is:\n{labels}")

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength),
                  U.normalize(32768.0)]
        return funcs

    def preprocess_setup_without_normalization(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength)
                  ]
        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [11]:
def getTrainGen(opt=None, split=None):
    dataset = np.load("../../../datasets/CurrentUse/generated_datasets/train/version4_home/single_fold_train_20240506160623.npz", allow_pickle=True);
    train_sounds = []
    train_labels = []
    # train_sounds = [dataset['x'][i][0] for i in range(len(dataset['x']))]
    # train_labels = [dataset['y'][i][0] for i in range(len(dataset['y']))]
    train_sounds = dataset['fold{}'.format(1)].item()['sounds']
    train_labels = dataset['fold{}'.format(1)].item()['labels']
    trainGen = TLGenerator(train_sounds, train_labels, opt);
    return trainGen

In [15]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='TLACDNet',  required=False);
    parser.add_argument('--data', default='./datasets/processed/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args()
    """
   
    """
    #Leqarning settings
    opt.batchSize =64;
    opt.weightDecay = 5e-4;
    opt.LR = 0.03;
    opt.momentum = 0.1;
    opt.nEpochs = 500;#2000;
    opt.schedule = [0.05, 0.07, 0.09];
    opt.warmup = 10;

    #Basic Net Settings
    opt.nClasses = 3#50;
    opt.nFolds = 1;#5;
    opt.splits = [i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    # opt.ch_config = [8,64,32,64,64,128,128,256,256,512,512,2];
    return opt

In [16]:
def display_info(opt):
    print('+------------------------------+');
    print('| {} Sound classification'.format(opt.netType));
    print('+------------------------------+');
    print('| dataset  : {}'.format(opt.dataset));
    print('| nEpochs  : {}'.format(opt.nEpochs));
    print('| LRInit   : {}'.format(opt.LR));
    print('| weightDecay   : {}'.format(opt.weightDecay));
    print('| momentum   : {}'.format(opt.momentum));
    print('| schedule : {}'.format(opt.schedule));
    print('| warmup   : {}'.format(opt.warmup));
    print('| batchSize: {}'.format(opt.batchSize));
    print('| nFolds: {}'.format(opt.nFolds));
    print('| Splits: {}'.format(opt.splits));
    print('| Device: {}'.format(opt.device));
    print('| Model Path: {}'.format(opt.model_path));
    print('| Model Name: {}'.format(opt.model_name));
    print('+------------------------------+');

In [17]:
class QATTrainer:
    def __init__(self, opt=None, split=0):
        self.opt = opt;
        self.testX = None;
        self.testY = None;
        self.trainX = None;
        self.trainY = None;
        # self.opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
        self.opt.device = torch.device("cpu")
        self.trainGen = getTrainGen(self.opt)#train_generator.setup(self.opt, self.opt.split);
        self.qunt_nClass = opt.nClasses;
        self.bestAcc = 0.0;
        self.bestAccEpoch = 0;

    def load_train_data(self):
        print('Preparing calibration dataset..');
        x,y = self.trainGen.__getitem__(0);
        self.trainX = torch.tensor(np.moveaxis(x, 3, 1)).to(self.opt.device);
        """
        trainX size:torch.Size([1, 1, 30225]), but must be [1,1,1,30225]
        Due to the reason: raise ValueError("Input shape must be `(N, C, H, W)`!")
        """
        # print(f"trainX[0] shape:{self.trainX[0].shape}")
        self.trainY = torch.tensor(y).to(self.opt.device);
        print('Calibration dataset is ready');
        # self.opt.batchSize = 32;

    # def load_test_data(self):
    #     if(self.testX is None):
    #         data = np.load('../../datasets/CurrentUse/forOneClassModel_alarm/test_val/final_val_test_npz/final_valSet_20240119004614.npz', allow_pickle=True);
    #         dataX = np.moveaxis(data['x'], 3, 1).astype(np.float32);
    #         self.testX = torch.tensor(dataX).to(self.opt.device);
    #         self.testY = torch.tensor(data['y']).to(self.opt.device);

    def load_test_data(self):
        testData = '../../../datasets/CurrentUse/generated_datasets/val/version4_home/final_single_val_20240506160843.npz'
        data = np.load(testData, allow_pickle=True);
        print(f"device is :{self.opt.device}")
        print(f"len of Y:{len(data['y'])}")
        # self.testX = torch.tensor(np.moveaxis(data['x'], 3, 1)).to(self.opt.device);
        dataX = np.moveaxis(data['x'], 3, 1).astype(np.float32);
        self.testX = torch.tensor(dataX).to(self.opt.device);
        self.testY = torch.tensor(data['y']).type(torch.float32).to(self.opt.device);

    def __validate_test(self, net, qat_done, testX, testY):
        net.eval();
        # if qat_done:
        #     testX.to('cpu');
        #     testY.to('cpu');
        # else:
        #     testX.to('cuda:0');
        #     testY.to('cuda:0');
            
        with torch.no_grad():
            y_pred = None;
            batch_size = len(self.testX);
            x = self.testX[:];
            scores = net(x);
            y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
            acc = self.__compute_accuracy_2(y_pred, self.testY);
        return acc;

    
    def __validate(self, net, lossFunc):
        if self.testX is None:
            self.load_test_data();
        net.eval();
        acc=0.0; 
        loss = 0.0;
        with torch.no_grad():
            y_pred = None;
            batch_size = len(self.testX);#(self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops;
            x = self.testX[:];
            try:
                scores = net(x);
                y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));
                acc, loss = self.__compute_accuracy(y_pred, self.testY, lossFunc);
            except ValueError:
                print(f"error data:{x}")
        net.train();
        return acc, loss;

    #Calculating average prediction (10 crops) and final accuracy
    def __compute_accuracy_2(self, y_pred, y_target):
        print(y_pred.shape);
        with torch.no_grad():
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1);

            y_pred = y_pred.argmax(dim=1);
            y_target = y_target.argmax(dim=1);

            acc = (((y_pred==y_target)*1).float().mean()*100).item();
        return acc;
        

    def __compute_accuracy(self, y_pred, y_target, lossFunc):
        print(f"shape of y_pred:{y_pred.shape}");
        print(f"shape of y_target:{y_target.shape}");
        
        with torch.no_grad():
            #Reshape to shape theme like each sample comtains 10 samples, calculate mean and find theindices that has highest average value for each sample
            if self.opt.nCrops == 1:
                y_pred = y_pred.argmax(dim=1);
                y_target = y_target.argmax(dim=1);
            else:
                y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1).argmax(dim=1);
                y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1).argmax(dim=1);
                print(f"after: len of y_pred:{len(y_pred)}, len of y_target:{len(y_target)}")
            acc = (((y_pred==y_target)*1).float().mean()*100).item();
            # valLossFunc = torch.nn.KLDivLoss();
            loss = lossFunc(y_pred.float().log(), y_target.float()).item();
            # loss = 0.0;
        return acc, loss;
        

    def __load_model(self, quant=True):
        state = torch.load(self.opt.model_path, map_location=self.opt.device);
        print(state['config']);
        net = None;
        net = models.GetACDNetQuantModel(input_len=self.opt.inputLength, nclass=self.qunt_nClass, sr=self.opt.sr, channel_config=state['config']).to(self.opt.device);
        calc.summary(net, (1,1,self.opt.inputLength));
        net.load_state_dict(state['weight']);
        return net;

    
    def __train(self, net):
        self.load_train_data();
        # net.eval();
        # calc.summary(net, (1,1,self.opt.inputLength));
        lossFunc = torch.nn.KLDivLoss(reduction='batchmean');
        optimizer = optim.SGD(net.parameters(), lr=self.opt.LR, weight_decay=self.opt.weightDecay, momentum=self.opt.momentum, nesterov=True);
        train_start_time = time.time();
        for epochIdx in range(self.opt.nEpochs):
            epoch_start_time = time.time();
            optimizer.param_groups[0]['lr'] = self.__get_lr(epochIdx+1);
            cur_lr = optimizer.param_groups[0]['lr'];
            running_loss = 0.0;
            running_acc = 0.0;
            n_batches = math.ceil(len(self.trainGen.data)/self.opt.batchSize);
            for batchIdx in range(n_batches):
                # with torch.no_grad():
                x,y = self.trainGen.__getitem__(batchIdx)
                x = torch.tensor(np.moveaxis(x, 3, 1)).to(self.opt.device);
                y = torch.tensor(y).to(self.opt.device);
                # zero the parameter gradients
                optimizer.zero_grad();

                # forward + backward + optimize
                try:
                    outputs = torch.softmax(input=net(x),dim=1); #need to check float NaN value?
                    running_acc += (((outputs.data.argmax(dim=1) == y.argmax(dim=1))*1).float().mean()).item();
                    loss = lossFunc(outputs.log(), y);
                    loss.backward();
                    optimizer.step();
                    running_loss += loss.item();
                except ValueError:
                    print(f"error label:{y}")
                    print(f"error data:{x}")
                    continue

            tr_acc = (running_acc / n_batches)*100;
            tr_loss = running_loss / n_batches;

            #Epoch wise validation Validation
            epoch_train_time = time.time() - epoch_start_time;

            net.eval();
            val_acc, val_loss = self.__validate(net, lossFunc);
            #Save best model
            self.__chk_bestAcc(val_acc, epochIdx, net);
            self.__on_epoch_end(epoch_start_time, epoch_train_time, epochIdx, cur_lr, tr_loss, tr_acc, val_loss, val_acc);

            running_loss = 0;
            running_acc = 0;
            net.train();

        total_time_taken = time.time() - train_start_time;
        print("Execution finished in: {}".format(U.to_hms(total_time_taken)));


    def __chk_bestAcc(self, acc, epochIdx, net):
        print("__chk_bestAcc is called")
        print(f"current best Acc is {self.bestAcc}")
        print(f"pass in acc is {acc}")
        if acc > self.bestAcc:
            self.bestAcc = acc;
            self.bestAccEpoch = epochIdx +1;
            print(f"model saved....., acc: {acc}")
            
    def __on_epoch_end(self, start_time, train_time, epochIdx, lr, tr_loss, tr_acc, val_loss, val_acc):
        epoch_time = time.time() - start_time;
        val_time = epoch_time - train_time;
        line = 'SP-{} Epoch: {}/{} | Time: {} (Train {}  Val {}) | Train: LR {}  Loss {:.2f}  Acc {:.2f}% | Val: Loss {:.2f}  Acc(top1) {:.2f}% | HA {:.2f}@{}\n'.format(
            self.opt.splits, epochIdx+1, self.opt.nEpochs, U.to_hms(epoch_time), U.to_hms(train_time), U.to_hms(val_time),
            lr, tr_loss, tr_acc, val_loss, val_acc, self.bestAcc, self.bestAccEpoch);
        # print(line)
        sys.stdout.write(line);
        sys.stdout.flush();
        
    
    def __get_lr(self, epoch):
        divide_epoch = np.array([self.opt.nEpochs * i for i in self.opt.schedule]);
        decay = sum(epoch > divide_epoch);
        if epoch <= self.opt.warmup:
            decay = 1;
        return self.opt.LR * np.power(0.1, decay);

    def __get_batch(self, index):
        x = self.trainX[index*self.opt.batchSize : (index+1)*self.opt.batchSize];
        y = self.trainY[index*self.opt.batchSize : (index+1)*self.opt.batchSize];
        return x.to(self.opt.device), y.to(self.opt.device);
    
    
    def __calibrate(self, net):
        self.load_train_data();
        net.eval();
        with torch.no_grad():
            for i in range(1,2):
                x_pred = None;
                for idx in range(math.ceil(len(self.trainX)/self.opt.batchSize)):
                    x = self.trainX[idx*self.opt.batchSize : (idx+1)*self.opt.batchSize];
                    scores = net(x);
                    x_pred = scores.data if x_pred is None else torch.cat((x_pred, scores.data));
                x_pred = x_pred.argmax(dim=1);
                x_target = self.trainY.argmax(dim=1);
                acc = (((x_pred==x_target)*1).float().mean()*100).item();
                print('calibrate accuracy is: {:.2f}'.format(acc));
        return acc;

    def QuantizeModel(self):
        net = self.__load_model(True);
        # net = self.__load_model(False);
        config = net.ch_config;
        net.eval();
        
        #Fuse modules to
        torch.quantization.fuse_modules(net.sfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.sfeb, ['3','4','5'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['0','1','2'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['4','5','6'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['7','8','9'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['11','12','13'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['14','15','16'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['18','19','20'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['21','22','23'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['25','26','27'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['28','29','30'], inplace=True);
        torch.quantization.fuse_modules(net.tfeb, ['33','34','35'], inplace=True);

        net.train();
        net.qconfig = torch.quantization.get_default_qconfig('qnnpack')
        torch.backends.quantized.engine = 'qnnpack';
        print(f"net.qconfig : {net.qconfig}");
        torch.quantization.prepare_qat(net, inplace=True);
        
        # Calibrate with the training data
        # self.__calibrate(net);
        self.__train(net);

        #place trained model to cpu
        net.to('cpu');
        # Convert to quantized model
        torch.quantization.convert(net, inplace=True);
        print('Post Training Quantization: Convert done');

        print("Size of model after quantization");
        torch.save(net.state_dict(), "temp.p")
        print('Size (MB):', os.path.getsize("temp.p")/1e6)
        os.remove('temp.p')

        self.load_test_data();
        val_acc = self.__validate_test(net, True, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));
        net.to('cpu');
        # torch.jit.save(torch.jit.script(net), '{}/th/quantized_models/{}.pt'.format(os.getcwd(), self.opt.model_name.format()));
        torch.jit.save(torch.jit.script(net), '../../../trained_models/step_6_QAT_and_Convert2TFLite/{}.pt'.format(self.opt.model_name));
        # torch.save({'weight':net.state_dict(), 'config':net.ch_config}, '../has_qat_models/{}.pt'.format(self.opt.full_weight_and_config_model_name));
        
        # **************convert to tflite**********
        with torch.no_grad():
            dummy_input = torch.FloatTensor(quantize_int8(torch.randn(1, 1, 1, 30225).numpy(),3)); #correct,workable
            #the followng setting for TFLiteConverter, especially quantize_input_output_type='int8',fuse_quant_dequant=True,
            #we need to remove softmax layer from ACDQuantModel to satisfy the output is int8 type
            converter = TFLiteConverter(net,
                                        dummy_input,
                                        quantize_input_output_type='int8',#設定此欄，輸入會強制為int8
                                        fuse_quant_dequant=True,
                                        quantize_target_type='int8',
                                        hybrid_conv=False,
                                        float16_quantization=True,
                                        optimize=5,
                                        tflite_path="../../../trained_models/step_6_QAT_and_Convert2TFLite/{}.tflite".format(self.opt.model_name))
            converter.convert()

        
    def TestModel(self, quant=False):
        if quant:
            print(f"the model name:{self.opt.model_name}");
            net = torch.jit.load('../../../trained_models/step_6_QAT_and_Convert2TFLite/{}.pt'.format(self.opt.model_name))
        else:
            print("has not quanted, load unquanted model...");
            net = self.__load_model();
            # calc.summary(net, (1,1,self.opt.inputLength));
        self.load_test_data();
        net.eval();
        val_acc = self.__validate_test(net, False, self.testX, self.testY);
        print('Testing: Acc(top1) {:.2f}%'.format(val_acc));

    def GetModelSize(self):
        orig_net_path = self.opt.model_path;
        print('Full precision model size (KB):', os.path.getsize(orig_net_path)/(1024));
        save_onnx_name = "../../../trained_models/step_6_QAT_and_Convert2TFLite/{}.onnx".format(self.opt.model_name);
        quant_net_path = "../has_qat_models/onnx_models/"+save_onnx_name;
        print('Quantized model size (KB):', os.path.getsize(quant_net_path)/(1024))

In [18]:
def quantize_int8(x, axis):
    len_of_x = len(x[0][0][0])
    print(f"len_of_x:{len_of_x}")
    for i in range(len_of_x):
        nflag = 2; #positive
        print("{}:{}".format(i,x[0][0][0][i]))
        tmp_x = x[0][0][0][i]
        if tmp_x < 0:
            tmp_x = np.abs(tmp_x)
            nflag = 1
        tmp_x = maskOP(tmp_x)
        if(nflag==1):
            tmp_x = -1 * (tmp_x)
        print("{}:{}".format(i,x[0][0][0][i]))
        print("*********************************")
        x[0][0][0][i] = tmp_x
    return x

In [17]:
"""
pruning algo: tylor-pruning
pruning ration : 0.85
final accuracy : 87.46
epoch: 
self.opt.LR = 0.1;
opt.momentum = 0.09;
self.opt.schedule = [0.15, 0.30, 0.45, 0.60, 0.75];
self.opt.warmup = 0;
self.opt.prune_algo = 'tylor-pruning';
self.opt.prune_interval = 1;
self.opt.nEpochs = 1000;
===================================
#this is bad settings
pruning ration : 0.85
final accuracy : 
epoch: 
opt.batchSize = 128;
opt.weightDecay = 5e-4;
opt.LR = 0.5;
opt.momentum = 0.9;
opt.nEpochs = 1000;#2000;
opt.schedule = [0.6, 0.8, 0.9];
opt.warmup = 10;
===================================
pruning ration : 0.85
final accuracy : 
epoch: 
opt.batchSize = 128;
opt.weightDecay = 5e-3;
opt.LR = 0.1;
opt.momentum = 0.9;
opt.nEpochs = 600;#2000;
opt.schedule = [0.6, 0.8, 0.9];
opt.warmup = 10;
############################################Training DataSet Version 4##########################
============================
opt.batchSize = 64;
opt.weightDecay = 5e-4;
opt.LR = 0.05;
opt.momentum = 0.9;
opt.nEpochs = 800;#2000;
opt.schedule = [0.1, 0.8, 0.9];
opt.warmup = 10;
===========================
no use
opt.batchSize = 128;
opt.weightDecay = 5e-4;
opt.LR = 0.5;
opt.momentum = 0.5;
opt.nEpochs = 800;#2000;
opt.schedule = [0.3, 0.6
==========================
pruning ration : 0.85
final accuracy : 
epoch:
test acc:
opt.batchSize = 128;
opt.weightDecay = 5e-3;
opt.LR = 0.1;
opt.momentum = 0.09;
opt.nEpochs = 800;#2000;
opt.schedule = [0.3, 0.6, 0.9];
opt.warmup = 10;
=========================
# good setting
pruning ration : 0.85
final accuracy : 
epoch:
test acc:
opt.batchSize = 64;
opt.weightDecay = 5e-4;
opt.LR = 0.01;
opt.momentum = 0.1;
opt.nEpochs = 1000;#2000;
opt.schedule = [0.03, 0.06, 0.09];
opt.warmup = 10;
========================
not bad workable.
pruning ration : 0.85
final accuracy : 
epoch:
test acc:
opt.batchSize = 32;
opt.weightDecay = 5e-4;
opt.LR = 0.01;
opt.momentum = 0.1;
opt.nEpochs = 1000;#2000;
opt.schedule = [0.03, 0.06, 0.09];
opt.warmup = 10;
=========================
pruning ration : 0.85
training accuracy : 81.69
test acc:90.68
opt.batchSize =64;
opt.weightDecay = 5e-4;
opt.LR = 0.01;
opt.momentum = 0.1;
opt.nEpochs = 300;#2000;
opt.schedule = [0.03, 0.06, 0.09];
opt.warmup = 10;
=========================
training accuracy : 
test acc:
opt.batchSize =64;
opt.weightDecay = 5e-4;
opt.LR = 0.03;
opt.momentum = 0.1;
opt.nEpochs = 500;#2000;
opt.schedule = [0.05, 0.07, 0.09];
opt.warmup = 10;
"""



In [18]:
def main():
    opt = getOpts();#opts.parse();
    opt.device = 'cpu';
    opt.saveInfo = "valacc92.7_tracc_82.9_prunInfo_0.8_0.85_ds_ver4home"
    opt.model_path = "../../../trained_models/step_5_retrain_after_step_4/train_after_second_pruning_prunratio0.85_2024050315/retrained_model_ratio85.0_vaacc92.79661560058594_tracc_82.95454545454545@752epoch_20240503162619.pt"
    timeStr = genDataTimeStr();
    opt.model_name = "qat_model_{}_{}".format(opt.saveInfo,timeStr);
   
    opt.split = 1;
    opt.hasQuated = False;
    display_info(opt);
    trainer = QATTrainer(opt);

    print('Testing performance of the provided model.....');
    trainer.TestModel();

    print('Quantization process is started.....');
    trainer.QuantizeModel();
    print('Quantization done');

    print('Testing quantized model.');
    trainer.TestModel(True);
    print('Finished');

In [19]:
main()

+------------------------------+
| TLACDNet Sound classification
+------------------------------+
| dataset  : uec_iot
| nEpochs  : 300
| LRInit   : 0.01
| weightDecay   : 0.0005
| momentum   : 0.1
| schedule : [0.03, 0.06, 0.09]
| warmup   : 10
| batchSize: 64
| nFolds: 1
| Splits: [1]
| Device: cpu
| Model Path: ../../../trained_models/step_5_retrain_after_step_4/train_after_second_pruning_prunratio0.85_2024050315/retrained_model_ratio85.0_vaacc92.79661560058594_tracc_82.95454545454545@752epoch_20240503162619.pt
| Model Name: qat_model_valacc92.7_tracc_82.9_prunInfo_0.8_0.85_ds_ver4home_20240506220725
+------------------------------+
length of samples:655
Testing performance of the provided model.....
has not quanted, load unquanted model...
[5, 32, 10, 8, 17, 18, 27, 39, 34, 41, 72, 3]
+----------------------------------------------------------------------------+
+                           Pytorch Model Summary                            +
------------------------------------------

INFO (tinynn.converter.base) Generated model saved to ../../../trained_models/step_6_QAT_and_Convert2TFLite/qat_model_valacc92.7_tracc_82.9_prunInfo_0.8_0.85_ds_ver4home_20240506220725.tflite


Quantization done
Testing quantized model.
the model name:qat_model_valacc92.7_tracc_82.9_prunInfo_0.8_0.85_ds_ver4home_20240506220725
device is :cpu
len of Y:472
torch.Size([472, 3])
Testing: Acc(top1) 90.68%
Finished
