# Weight Pruning Records and Notes
## setting
- current pruning ratio : 0.7
- result: to-add how to observe weight-pruning

In [1]:
import sys;
import os;
import glob;
import math;
import numpy as np;
import random;
import time;
import torch
import torch.optim as optim;
import torch.nn as nn;

In [2]:
sys.path.append("../../../src/")

In [3]:
import common.utils as U;
import common.opts as opt;
import th.resources.models as models;
import th.resources.calculator as calc;
import th.resources.train_generator as train_generator;
import th.resources.pruning_tools.weight_pruning as weight_pruner;

In [4]:
import argparse
import common.tlopts as tlopts
import th.resources.calculator as calc;
from datetime import datetime;
import pathlib

In [5]:
#log file object
logObj = None;
def ChkAndCreateSingleDir(dir_path):
    if not pathlib.Path(dir_path).is_dir():
        os.mkdir(dir_path);
        print(f"'{dir_path}' folder is created.");

In [6]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

def getDateStr():
    return datetime.today().strftime('%Y-%m-%d %H').replace('-',"").replace(' ',"")#.replace(':',"");

In [7]:
class TLGenerator():
    #Generates data for Keras
    def __init__(self, samples, labels, options):
        random.seed(42);
        #Initialization
        print(f"length of samples:{len(samples)}")
        self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
        self.opt = options;
        self.batch_size = options.batchSize;
        self.preprocess_funcs = self.preprocess_setup();
        self.mapdict = dict([('52',1),('56',2),('99',3)])

    def __len__(self):
        #Denotes the number of batches per epoch
        return int(np.floor(len(self.data) / self.batch_size));
        #return len(self.samples);

    def __getitem__(self, batchIndex):
        #Generate one batch of data
        batchX, batchY = self.generate_batch(batchIndex);
        batchX = np.expand_dims(batchX, axis=1);
        batchX = np.expand_dims(batchX, axis=3);
        return batchX, batchY

    def generate_batch(self, batchIndex):
        #Generates data containing batch_size samples
        sounds = [];
        labels = [];
        indexes = None;
        for i in range(self.batch_size):
            # Training phase of BC learning
            # Select two training examples
            while True:
                sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
                sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
                if label1 != label2:
                    break
            sound1 = self.preprocess(sound1)
            sound2 = self.preprocess(sound2)

            # Mix two examples
            r = np.array(random.random())
            sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
            # print(f"sound length after U.mix is {len(sound)}")
            eye = np.eye(self.opt.nClasses)
            idx1 = self.mapdict[str(label1)]- 1
            idx2 = self.mapdict[str(label2)] - 1
            label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
            # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

            #For stronger augmentation
            sound = U.random_gain(6)(sound).astype(np.float32)
            # print(f"sound length after U.random_gain is {len(sound)}")
            sounds.append(sound);
            labels.append(label);

        sounds = np.asarray(sounds);
        labels = np.asarray(labels);
        

        return sounds, labels;

    def preprocess_setup(self):
        funcs = []
        if self.opt.strongAugment:
            funcs += [U.random_scale(1.25)]

        funcs += [U.padding(self.opt.inputLength // 2),
                  U.random_crop(self.opt.inputLength),
                  U.normalize(32768.0)]
        return funcs

    def preprocess(self, sound):
        for f in self.preprocess_funcs:
            sound = f(sound)

        return sound;

In [8]:
class ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(ACDNetV2, self).__init__();
        self.input_length = input_length;
        self.ch_config = ch_conf;

        stride1 = 2;
        stride2 = 2;
        channels = 8;
        k_size = (3, 3);
        n_frames = (sr/1000)*10; #No of frames per 10ms

        sfeb_pool_size = int(n_frames/(stride1*stride2));
        # tfeb_pool_size = (2,2);
        if self.ch_config is None:
            self.ch_config = [channels, channels*8, channels*4, channels*8, channels*8, channels*16, channels*16, channels*32, channels*32, channels*64, channels*64, n_class];
        # avg_pool_kernel_size = (1,4) if self.ch_config[1] < 64 else (2,4);
        fcn_no_of_inputs = self.ch_config[-1];
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1));
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2));
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1);
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1);
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1);
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1);
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1);
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1);
        conv9, bn9 = self.make_layers(self.ch_config[7], self.ch_config[8], k_size, padding=1);
        conv10, bn10 = self.make_layers(self.ch_config[8], self.ch_config[9], k_size, padding=1);
        conv11, bn11 = self.make_layers(self.ch_config[9], self.ch_config[10], k_size, padding=1);
        conv12, bn12 = self.make_layers(self.ch_config[10], self.ch_config[11], (1, 1));
        fcn = nn.Linear(fcn_no_of_inputs, n_class);
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid') # kaiming with sigoid is equivalent to lecun_normal in keras

        self.sfeb = nn.Sequential(
            #Start: Filter bank
            conv1, bn1, nn.ReLU(),\
            conv2, bn2, nn.ReLU(),\
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        );

        tfeb_modules = [];
        self.tfeb_width = int(((self.input_length / sr)*1000)/10); # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width);
        p_index = 0;
        for i in [3,4,6,8,10]:
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()]);

            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i+1)), eval('bn{}'.format(i+1)), nn.ReLU()]);

            h, w = tfeb_pool_sizes[p_index];
            if h>1 or w>1:
                tfeb_modules.append(nn.MaxPool2d(kernel_size = (h,w)));
            p_index += 1;

        tfeb_modules.append(nn.Dropout(0.2));
        tfeb_modules.extend([conv12, bn12, nn.ReLU()]);
        h, w = tfeb_pool_sizes[-1];
        if h>1 or w>1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size = (h,w)));
        tfeb_modules.extend([nn.Flatten(), fcn]);

        self.tfeb = nn.Sequential(*tfeb_modules);

        self.output = nn.Sequential(
            nn.Softmax(dim=1)
        );
        

    def forward(self, x):
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1,1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias);
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu'); # kaiming with relu is equivalent to he_normal in keras
        bn = nn.BatchNorm2d(out_channels);
        return conv, bn;

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch);
        w = self.get_tfeb_pool_size_component(width);
        # print(w);
        pool_size = [];
        for  (h1, w1) in zip(h, w):
            pool_size.append((h1, w1));
        return pool_size;

    def get_tfeb_pool_size_component(self, length):
        # print(length);
        c = [];
        index = 1;
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length);
                else:
                    c.append(2);
                    length = length // 2;
            else:
               c.append(1);

            index += 1;

        return c;

def GetACDNetModel(input_len=30225, nclass=50, sr=20000, channel_config=None):
    net = ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

In [9]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;
###########################################

class Customed_ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(Customed_ACDNetV2, self).__init__();
        self.input_length = input_length;
        self.ch_config = ch_conf;

        stride1 = 2;
        stride2 = 2;
        channels = 8;
        k_size = (3, 3);
        n_frames = (sr/1000)*10; #No of frames per 10ms

        sfeb_pool_size = int(n_frames/(stride1*stride2));
        # tfeb_pool_size = (2,2);
        if self.ch_config is None:
            self.ch_config = [channels, channels*8, channels*4, channels*8, channels*8, channels*16, channels*16, channels*32, channels*32, channels*64, channels*64, n_class];
        # avg_pool_kernel_size = (1,4) if self.ch_config[1] < 64 else (2,4);
        fcn_no_of_inputs =  n_class #self.ch_config[-1];
        ch_confing_10 = 512 #8 * 64
        ch_n_class = n_class
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1));
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2));
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1);
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1);
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1);
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1);
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1);
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1);
        conv9, bn9 = self.make_layers(self.ch_config[7], self.ch_config[8], k_size, padding=1);
        conv10, bn10 = self.make_layers(self.ch_config[8], self.ch_config[9], k_size, padding=1);
        conv11, bn11 = self.make_layers(self.ch_config[9], self.ch_config[10], k_size, padding=1);
        conv12, bn12 = self.make_layers(ch_confing_10, ch_n_class, (1, 1));
        fcn = nn.Linear(fcn_no_of_inputs, ch_n_class);
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid') # kaiming with sigoid is equivalent to lecun_normal in keras

        self.sfeb = nn.Sequential(
            #Start: Filter bank
            conv1, bn1, nn.ReLU(),\
            conv2, bn2, nn.ReLU(),\
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        );

        tfeb_modules = [];
        self.tfeb_width = int(((self.input_length / sr)*1000)/10); # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width);
        p_index = 0;
        for i in [3,4,6,8,10]:
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()]);

            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i+1)), eval('bn{}'.format(i+1)), nn.ReLU()]);

            h, w = tfeb_pool_sizes[p_index];
            if h>1 or w>1:
                tfeb_modules.append(nn.MaxPool2d(kernel_size = (h,w)));
            p_index += 1;

        tfeb_modules.append(nn.Dropout(0.2));
        tfeb_modules.extend([conv12, bn12, nn.ReLU()]);
        h, w = tfeb_pool_sizes[-1];
        if h>1 or w>1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size = (2,4)));
        tfeb_modules.extend([nn.Flatten(), fcn]);

        self.tfeb = nn.Sequential(*tfeb_modules);

        self.output = nn.Sequential(
            nn.Softmax(dim=1)
        );
        

    def forward(self, x):
        # print(f"sfeb:\n{list(self.sfeb.children())}");
        # print(f"input x shape:{x.size()}");
        """
        input dim should be input x shape:torch.Size([32, 1, 1, 30225])
        if you got input x shape:[32, 30225, 1, 1], that is wrong.
        """
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1,1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias);
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu'); # kaiming with relu is equivalent to he_normal in keras
        bn = nn.BatchNorm2d(out_channels);
        return conv, bn;

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch);
        w = self.get_tfeb_pool_size_component(width);
        # print(w);
        pool_size = [];
        for  (h1, w1) in zip(h, w):
            pool_size.append((h1, w1));
        return pool_size;

    def get_tfeb_pool_size_component(self, length):
        # print(length);
        c = [];
        index = 1;
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length);
                else:
                    c.append(2);
                    length = length // 2;
            else:
               c.append(1);

            index += 1;

        return c;

def GetCustomedACDNetModel(input_len=30225, nclass=3, sr=20000, channel_config=None):
    net = Customed_ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

In [10]:
def getOpts():
    parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
    parser.add_argument('--netType', default='TLACDNet',  required=False);
    parser.add_argument('--data', default='../datasets/processed/',  required=False);
    parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
    parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
    parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
    #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
    opt, unknown = parser.parse_known_args()
    #Leqarning settings
   
    opt.batchSize = 64;
    opt.LR = 0.1;
    opt.momentum = 0.9;
    opt.weightDecay = 5e-4;
    opt.schedule = [0.15, 0.30, 0.45, 0.60, 0.75];#default:[0.15, 0.30, 0.45, 0.60, 0.75];
    opt.warmup = 10;
    opt.nEpochs = 500;
    # opt.LR = 0.1;
    # opt.momentum = 0.09;
    # opt.nEpochs = 1000;#2000;
    # opt.schedule = [0.3, 0.6, 0.9];
    # opt.warmup = 10;

    #Basic Net Settings
    opt.nClasses = 3#50;
    opt.nFolds = 1;#5;
    opt.split = 1#[i for i in range(1, opt.nFolds + 1)];
    opt.sr = 20000;
    opt.inputLength = 30225;
    #Test data
    opt.nCrops = 2;
    return opt

- <font size=2 color='#FF6600'>For the accuracy and model generation capacity it is better to add more data to the training and validation datasets.</font>

In [11]:
def getTrainGen(opt=None, split=None):
    # dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.sr // 1000)), allow_pickle=True);
    # dataset = np.load("../datasets/fold1_test16000.npz", allow_pickle=True);
    dataset = np.load("../../../datasets/CurrentUse/generated_datasets/train/version6_office/single_fold_train_20240509143202.npz", allow_pickle=True);
    train_sounds = []
    train_labels = []
    # print(len(dataset['x']))
    # for i in range(1, opt.nFolds + 1):

    # train_sounds = [dataset['x'][i][0] for i in range(len(dataset['x']))]
    # train_labels = [dataset['y'][i][0] for i in range(len(dataset['y']))]
    train_sounds = dataset['fold{}'.format(1)].item()['sounds']
    train_labels = dataset['fold{}'.format(1)].item()['labels']
    # print(train_sounds)

    trainGen = TLGenerator(train_sounds, train_labels, opt);
    return trainGen

In [15]:
class PruningTrainer:
    global logObj
    def __init__(self, opt):
        self.opt = opt;
        #Conditional compression settings
        # self.opt.LR = 0.01;
        # self.opt.momentum = 0.09;
        # self.opt.schedule = [0.15, 0.30, 0.45, 0.60, 0.75];
        # self.opt.warmup = 0;
        self.opt.prune_algo = 'l0norm';
        self.opt.prune_interval = 1;
        # self.opt.nEpochs = 1000;
        self.testX = None;
        self.testY = None;
        self.bestAcc = 0.0;
        self.bestAccEpoch = 0;
        self.trainGen = getTrainGen(opt)#train_generator.setup(self.opt, self.opt.split);
        # if torch.device == "cuda:0":
        #     self.device = '"cuda:0"'
        # elif torch.device == "mps":
        #     self.device = "mps"
        # else:
        #     self.device = "cpu"
        self.device="cuda:0"
        print(f"In PruningTrainer:: current used device:{self.device}")
        # self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
        self.start_time = time.time();

    def PruneAndTrain(self):
        self.load_test_data();
        print(self.device);
        loss_func = torch.nn.KLDivLoss(reduction='batchmean');

        #Load saved model dict
        net = GetCustomedACDNetModel()#GetACDNetModel()
        net.load_state_dict(torch.load("../../../trained_models/step_1_base_train/base_train_lr0.1_bs64_wd0.0005_20240510094900/sp_ai_model_3Classes_hacc93.04029083251953_valacc_93.04029083251953_tracc_87.3798076923077_578th_epoch.pt", map_location=self.device)['weight']);
        calc.summary(net, (1,1,self.opt.inputLength))
        net.eval();
        val_acc, val_loss = self.__validate(net, loss_func);
        print('Testing - Val: Loss {:.3f}  Acc(top1) {:.3f}%'.format(val_loss, val_acc));
        net.train();

        optimizer = optim.SGD(net.parameters(), lr=self.opt.LR, weight_decay=self.opt.weightDecay, momentum=self.opt.momentum, nesterov=True)

        weight_name = ["weight"]# if not self.opt.factorize else ["weightA", "weightB", "weightC"]
        layers_n = weight_pruner.layers_n(net, param_name=["weight"])[1];
        all_num = sum(layers_n.values());
        print("\t TOTAL PRUNABLE PARAMS: {}".format(all_num));
        print("\t PRUNE RATIO :{}".format(self.opt.prune_ratio));
        sparse_factor = int(all_num * (1-self.opt.prune_ratio));
        print("\t SPARSE FACTOR: {}".format(sparse_factor));
        model_size = (sparse_factor * 4)/1024**2;
        print("\t MODEL SIZE: {:.2f} MB".format(model_size));
        prune_algo = getattr(weight_pruner, self.opt.prune_algo);
        prune_func = lambda m: prune_algo(m, sparse_factor, param_name=weight_name);

        for epoch_idx in range(self.opt.nEpochs):
            epoch_start_time = time.time();
            optimizer.param_groups[0]['lr'] = self.__get_lr(epoch_idx+1);
            cur_lr = optimizer.param_groups[0]['lr'];
            running_loss = 0.0;
            running_acc = 0.0;
            n_batches = math.ceil(len(self.trainGen.data)/self.opt.batchSize);
            net.train();
            for batch_idx in range(n_batches):
                # with torch.no_grad():
                x,y = self.trainGen.__getitem__(batch_idx)
                x = torch.tensor(np.moveaxis(x, 3, 1)).to(self.device);
                y = torch.tensor(y).to(self.device);

                # zero the parameter gradients
                optimizer.zero_grad();

                # forward + backward + optimize
                # outputs = net(x);#in office and use cpu
                x = x.type(torch.FloatTensor)
                outputs = net(x);
                res_y = y.argmax(dim=1)
                res_y = res_y.type(torch.FloatTensor)
                running_acc += ((( outputs.data.argmax(dim=1) == res_y)*1).float().mean()).item();
                y = y.type(torch.FloatTensor)
                loss = loss_func(outputs.log(), y);

                loss.backward();
                optimizer.step();

                running_loss += loss.item();

                with torch.no_grad():
                    prune_func(net);

            prune_func(net)

            tr_acc = (running_acc / n_batches)*100;
            tr_loss = running_loss / n_batches;

            #Epoch wise validation Validation
            epoch_train_time = time.time() - epoch_start_time;
            net.eval();
            val_acc, val_loss = self.__validate(net, loss_func);
            #Save best model
            self.__save_model(val_acc, tr_acc, epoch_idx, net);

            self.__on_epoch_end(epoch_start_time, epoch_train_time, epoch_idx, cur_lr, tr_loss, tr_acc, val_loss, val_acc);

            running_loss = 0;
            running_acc = 0;
            net.train();

        total_time_taken = time.time() - self.start_time;
        print("Execution finished in: {}".format(U.to_hms(total_time_taken)));

    def load_test_data(self):
        if(self.testX is None):
            data = np.load("../../../datasets/CurrentUse/generated_datasets/val/version6_office/final_single_val_20240509144233.npz", allow_pickle=True);
            dataX = np.moveaxis(data['x'], 3, 1).astype(np.float32);
            self.testX = torch.tensor(dataX).to(self.device);
            self.testY = torch.FloatTensor(data['y']).to(self.device);

    def __get_lr(self, epoch):
        divide_epoch = np.array([self.opt.nEpochs * i for i in self.opt.schedule]);
        decay = sum(epoch > divide_epoch);
        if epoch <= self.opt.warmup:
            decay = 1;
        return self.opt.LR * np.power(0.1, decay);

    def __validate(self, net, lossFunc):
        with torch.no_grad():
            y_pred = None;
            batch_size = (self.opt.batchSize//self.opt.nCrops)*self.opt.nCrops
            for idx in range(math.ceil(len(self.testX)/batch_size)):
                x = self.testX[idx*batch_size : (idx+1)*batch_size];
                x = torch.tensor(x)
                x = x.type(torch.FloatTensor) # use apple mp2
                scores = net(x);
                y_pred = scores.data if y_pred is None else torch.cat((y_pred, scores.data));

            acc, loss = self.__compute_accuracy(y_pred, self.testY, lossFunc);
        return acc, loss;

    #Calculating average prediction (10 crops) and final accuracy
    def __compute_accuracy(self, y_pred, y_target, lossFunc):
        with torch.no_grad():
            #Reshape to shape theme like each sample comtains 10 samples, calculate mean and find theindices that has highest average value for each sample
            y_pred = (y_pred.reshape(y_pred.shape[0]//self.opt.nCrops, self.opt.nCrops, y_pred.shape[1])).mean(dim=1).argmax(dim=1);
            y_target = (y_target.reshape(y_target.shape[0]//self.opt.nCrops, self.opt.nCrops, y_target.shape[1])).mean(dim=1).argmax(dim=1);
            # print(f"y_pred:{type(y_pred)}, y_target:{type(y_target)}")
            y_target = y_target.cpu() #use apple m2, in office use cuda
            acc = (((y_pred==y_target)*1).float().mean()*100).item();
            # valLossFunc = torch.nn.KLDivLoss();
            loss = lossFunc(y_pred.float().log(), y_target.float()).item();
            # loss = 0.0;
        return acc, loss;

    def __on_epoch_end(self, epoch_start_time, train_time, epochIdx, lr, tr_loss, tr_acc, val_loss, val_acc):
        epoch_time = time.time() - epoch_start_time;
        val_time = epoch_time - train_time;
        total_time = time.time() - self.start_time;
        line = '{} Epoch: {}/{} | Time: {} (Train {}  Val {}) | Train: LR {}  Loss {:.2f}  Acc {:.2f}% | Val: Loss {:.2f}  Acc(top1) {:.2f}% | HA {:.2f}@{}\n'.format(
            U.to_hms(total_time), epochIdx+1, self.opt.nEpochs, U.to_hms(epoch_time), U.to_hms(train_time), U.to_hms(val_time),
            lr, tr_loss, tr_acc, val_loss, val_acc, self.bestAcc, self.bestAccEpoch);
        # print(line)
        sys.stdout.write(line);
        sys.stdout.flush();

    def __save_model(self, acc, train_acc, epochIdx, net):
        if acc > self.bestAcc and acc > self.opt.first_save_acc:
            self.bestAcc = acc;
            self.bestAccEpoch = epochIdx +1;
            self.__do_save_model(acc, train_acc, self.bestAccEpoch, net);
        else:
            if acc > self.opt.save_val_acc and train_acc > self.opt.save_train_acc: 
                self.__do_save_model(acc, train_acc, epochIdx, net);
            else:
                pass

    def __do_save_model(self, acc, tr_acc, epochIdx, net):
        save_model_name = self.opt.model_name.format(self.bestAcc, acc, tr_acc, epochIdx, genDataTimeStr());
        save_model_fullpath = self.opt.save_dir + save_model_name;
        print(f"save model to {save_model_fullpath}")
        torch.save({'weight':net.state_dict(), 'config':net.ch_config}, save_model_fullpath);
        logObj.write(f"save model:{self.opt.model_name}, bestAcc:{self.bestAcc}, valAcc:{acc}, trainAcc:{tr_acc}, record@{epochIdx}-epoch");
        logObj.write("\n");
        logObj.flush();

In [16]:
"""
-----------------------------------------------------------------------------------------------
pruning ration : 0.9
final accuracy : 
epoch: 
self.opt.LR = 0.01;
opt.momentum = 0.9;
self.opt.schedule = [0.15, 0.30, 0.45, 0.60, 0.75];
self.opt.warmup = 0;
self.opt.prune_algo = 'l0norm';
self.opt.prune_interval = 1;
self.opt.nEpochs = 1000;
########################## Trainig Data Version 4 ##########################
opt.batchSize = 64;
opt.LR = 0.1;
opt.momentum = 0.9;
opt.weightDecay = 5e-4;
opt.schedule = [0.15, 0.30, 0.45, 0.60, 0.75];#default:[0.15, 0.30, 0.45, 0.60, 0.75];
opt.warmup = 10;
opt.nEpochs = 600;
==============================================================
"""



In [17]:
def main():
    global logObj;
    opt = getOpts()
    opt.sr = 20000;
    opt.inputLength = 30225;
    opt.trainer = None
    opt.prune_ratio = 0.8
    opt.first_save_acc = 91.0;
    opt.save_val_acc = 93.0;
    opt.save_train_acc = 80.0;
    trainStartTime = getDateStr();
    save_dir = "../../../trained_models/step_2_first_stage_pruning/pruning_time_{}_prunratio{}/".format(trainStartTime,opt.prune_ratio*100)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    opt.save_dir = save_dir;
    opt.model_name = "sp_ai_model_first_stage_prun_haacc_{}_valacc{}_tracc{}_epoch_{}_{}.pt";
    print("Initializing PruneAndTrain Object.....")
    trainer = PruningTrainer(opt)#TLTrainer(opt)
    print("Start to pruning.....")
    logSaveDir = "./first_stage_pruning_logs/"
    ChkAndCreateSingleDir(logSaveDir);
    logName = "FirstPruningLog_{}.log".format(trainStartTime);
    logObj = open(os.path.join(logSaveDir,logName),'w');
    trainer.PruneAndTrain();
    logObj.flush();
    logObj.close();

In [None]:
main()

Initializing PruneAndTrain Object.....
length of samples:803
In PruningTrainer:: current used device:cuda:0
Start to pruning.....
cuda:0
+----------------------------------------------------------------------------+
+                           Pytorch Model Summary                            +
------------------------------------------------------------------------------
   Layer (type)       Input Shape      Output Shape    Param #      FLOPS #
       Conv2d-1     (1, 1, 30225)     (8, 1, 15109)         72    1,087,848
  BatchNorm2d-2     (8, 1, 15109)     (8, 1, 15109)         16            0
         ReLu-3     (8, 1, 15109)     (8, 1, 15109)          0      120,872
       Conv2d-4     (8, 1, 15109)     (64, 1, 7553)      2,560   19,335,680
  BatchNorm2d-5     (64, 1, 7553)     (64, 1, 7553)        128            0
         ReLu-6     (64, 1, 7553)     (64, 1, 7553)          0      483,392
    MaxPool2d-7     (64, 1, 7553)      (64, 1, 151)          0      483,200
      Permute-8   

  x = torch.tensor(x)


Testing - Val: Loss nan  Acc(top1) 93.040%


  from .autonotebook import tqdm as notebook_tqdm


	 TOTAL PRUNABLE PARAMS: 4704625
	 PRUNE RATIO :0.8
	 SPARSE FACTOR: 940924
	 MODEL SIZE: 3.59 MB


  x = torch.tensor(x)


save model to ../../../trained_models/step_2_first_stage_pruning/pruning_time_2024051011_prunratio80.0/sp_ai_model_first_stage_prun_haacc_91.20879364013672_valacc91.20879364013672_tracc80.28846153846155_epoch_1_20240510110302.pt
0m19s Epoch: 1/500 | Time: 0m15s (Train 0m13s  Val 0m01s) | Train: LR 0.010000000000000002  Loss 0.41  Acc 80.29% | Val: Loss nan  Acc(top1) 91.21% | HA 91.21@1
0m35s Epoch: 2/500 | Time: 0m15s (Train 0m14s  Val 0m01s) | Train: LR 0.010000000000000002  Loss 0.29  Acc 81.85% | Val: Loss nan  Acc(top1) 89.01% | HA 91.21@1
0m50s Epoch: 3/500 | Time: 0m15s (Train 0m13s  Val 0m01s) | Train: LR 0.010000000000000002  Loss 0.23  Acc 83.05% | Val: Loss nan  Acc(top1) 88.28% | HA 91.21@1
1m06s Epoch: 4/500 | Time: 0m15s (Train 0m13s  Val 0m01s) | Train: LR 0.010000000000000002  Loss 0.22  Acc 81.61% | Val: Loss nan  Acc(top1) 85.35% | HA 91.21@1
1m21s Epoch: 5/500 | Time: 0m15s (Train 0m13s  Val 0m01s) | Train: LR 0.010000000000000002  Loss 0.21  Acc 82.45% | Val: Loss n