In [47]:
import torch.ao.nn.quantized as nnq
from torch.ao.quantization import QConfigMapping
import torch.ao.quantization.quantize_fx
import random;

In [2]:
sys.path.append(os.path.abspath("../"));
import common.utils as U;
import common.opts as opt;

In [3]:
import argparse
import numpy as np

In [4]:
# !pip install -U onnx
# !pip install -U onnx-tf

In [34]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;
class Customed_ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(ACDNetQuant, self).__init__();
        self.input_length = input_length;
        self.ch_config = ch_conf;

        stride1 = 2;
        stride2 = 2;
        channels = 8;
        k_size = (3, 3);
        n_frames = (sr/1000)*10; #No of frames per 10ms

        sfeb_pool_size = int(n_frames/(stride1*stride2));
        # tfeb_pool_size = (2,2);
        if self.ch_config is None:
            self.ch_config = [channels, channels*8, channels*4, channels*8, channels*8, channels*16, channels*16, channels*32, channels*32, channels*64, channels*64, n_class];
        # avg_pool_kernel_size = (1,4) if self.ch_config[1] < 64 else (2,4);
        fcn_no_of_inputs = self.ch_config[-1];
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1));
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2));
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1);
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1);
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1);
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1);
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1);
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1);
        conv9, bn9 = self.make_layers(self.ch_config[7], self.ch_config[8], k_size, padding=1);
        conv10, bn10 = self.make_layers(self.ch_config[8], self.ch_config[9], k_size, padding=1);
        conv11, bn11 = self.make_layers(self.ch_config[9], self.ch_config[10], k_size, padding=1);
        conv12, bn12 = self.make_layers(self.ch_config[10], self.ch_config[11], (1, 1));
        fcn = nn.Linear(fcn_no_of_inputs, n_class);
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid') # kaiming with sigoid is equivalent to lecun_normal in keras

        self.sfeb = nn.Sequential(
            #Start: Filter bank
            conv1, bn1, nn.ReLU(),\
            conv2, bn2, nn.ReLU(),\
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        );

        tfeb_modules = [];
        self.tfeb_width = int(((self.input_length / sr)*1000)/10); # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width);
        p_index = 0;
        for i in [3,4,6,8,10]:
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()]);

            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i+1)), eval('bn{}'.format(i+1)), nn.ReLU()]);

            h, w = tfeb_pool_sizes[p_index];
            if h>1 or w>1:
                tfeb_modules.append(nn.MaxPool2d(kernel_size = (h,w)));
            p_index += 1;

        tfeb_modules.append(nn.Dropout(0.2));
        tfeb_modules.extend([conv12, bn12, nn.ReLU()]);
        h, w = tfeb_pool_sizes[-1];
        if h>1 or w>1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size = (h,w)));
        tfeb_modules.extend([nn.Flatten(), fcn]);

        self.tfeb = nn.Sequential(*tfeb_modules);

        self.output = nn.Sequential(
            nn.Softmax(dim=1)
        );
        

    def forward(self, x):
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1,1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias);
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu'); # kaiming with relu is equivalent to he_normal in keras
        bn = nn.BatchNorm2d(out_channels);
        return conv, bn;

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch);
        w = self.get_tfeb_pool_size_component(width);
        # print(w);
        pool_size = [];
        for  (h1, w1) in zip(h, w):
            pool_size.append((h1, w1));
        return pool_size;

    def get_tfeb_pool_size_component(self, length):
        # print(length);
        c = [];
        index = 1;
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length);
                else:
                    c.append(2);
                    length = length // 2;
            else:
               c.append(1);

            index += 1;

        return c;

def GetCustomedACDNetModel(input_len=30225, nclass=6, sr=20000, channel_config=None):
    net = Customed_ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

In [17]:
# class TLGenerator():
#     #Generates data for Keras
#     def __init__(self, samples, labels, options):
#         random.seed(42);
#         #Initialization
#         print(f"length of samples:{len(samples)}")
#         self.data = [(samples[i], labels[i]) for i in range (0, len(samples))];
#         self.opt = options;
#         self.batch_size = options.batchSize;
#         self.preprocess_funcs = self.preprocess_setup();
#         self.mapdict = dict([(17,1),(18,2),(24,3),
#                              (51,4),(52,5),(53,6)])

#     def __len__(self):
#         #Denotes the number of batches per epoch
#         return int(np.floor(len(self.data) / self.batch_size));
#         #return len(self.samples);

#     def __getitem__(self, batchIndex):
#         #Generate one batch of data
#         batchX, batchY = self.generate_batch(batchIndex);
#         batchX = np.expand_dims(batchX, axis=1);
#         batchX = np.expand_dims(batchX, axis=3);
#         return batchX, batchY

#     def generate_batch(self, batchIndex):
#         #Generates data containing batch_size samples
#         sounds = [];
#         labels = [];
#         indexes = None;
#         for i in range(self.batch_size):
#             # Training phase of BC learning
#             # Select two training examples
#             while True:
#                 sound1, label1 = self.data[random.randint(0, len(self.data) - 1)]
#                 sound2, label2 = self.data[random.randint(0, len(self.data) - 1)]
#                 if label1 != label2:
#                     break
#             sound1 = self.preprocess(sound1)
#             sound2 = self.preprocess(sound2)

#             # Mix two examples
#             r = np.array(random.random())
#             sound = U.mix(sound1, sound2, r, self.opt.sr).astype(np.float32)
#             # print(f"sound length after U.mix is {len(sound)}")
#             eye = np.eye(self.opt.nClasses)
#             idx1 = self.mapdict[label1]- 1
#             idx2 = self.mapdict[label2] - 1
#             label = (eye[idx1] * r + eye[idx2] * (1 - r)).astype(np.float32)
#             # label = (eye[label1] * r + eye[label2] * (1 - r)).astype(np.float32)

#             #For stronger augmentation
#             sound = U.random_gain(6)(sound).astype(np.float32)
#             # print(f"sound length after U.random_gain is {len(sound)}")
#             # print(f"type of sound:{type(sound)}, and type of label:{type(label)}")
#             # print(f"shape of sound:{sound.shape}")
#             sounds.append(sound);
#             labels.append(label);

#         sounds = np.asarray(sounds);
#         labels = np.asarray(labels);
#         # print(f"labels in generate_batch is:\n{labels}")

#         return sounds, labels;

#     def preprocess_setup(self):
#         funcs = []
#         if self.opt.strongAugment:
#             funcs += [U.random_scale(1.25)]

#         funcs += [U.padding(self.opt.inputLength // 2),
#                   U.random_crop(self.opt.inputLength),
#                   U.normalize(32768.0)]
#         return funcs

#     def preprocess(self, sound):
#         for f in self.preprocess_funcs:
#             sound = f(sound)

#         return sound;

In [18]:
# def getOpts():
#     parser = argparse.ArgumentParser(description='Transfer Learning for ACDNet');
#     parser.add_argument('--netType', default='TLACDNet',  required=False);
#     parser.add_argument('--data', default='./datasets/processed/',  required=False);
#     parser.add_argument('--dataset', required=False, default='uec_iot', choices=['10']);
#     parser.add_argument('--BC', default=True, action='store_true', help='BC learning');
#     parser.add_argument('--strongAugment', default=True,  action='store_true', help='Add scale and gain augmentation');
#     #在ipynb中，不能使用parser.parse，要改用parser.parse_known_args()
#     opt, unknown = parser.parse_known_args()
#     #Leqarning settings
#     opt.batchSize = 2;
#     opt.weightDecay = 5e-4;
#     opt.momentum = 0.09;
#     opt.nEpochs = 10;#2000;
#     opt.LR = 0.01#0.1;
#     opt.schedule = [0.03, 0.06, 0.09]#[0.3, 0.6, 0.9];
#     opt.warmup = 10;

#     #Basic Net Settings
#     opt.nClasses = 6#50;
#     opt.nFolds = 1;#5;
#     opt.split = 1#[i for i in range(1, opt.nFolds + 1)];
#     opt.sr = 16000#20000;
#     opt.inputLength = 30225;
#     #Test data
#     opt.nCrops = 5;
#     return opt

In [19]:
# def getTrainGen(opt=None, split=None):
#     # dataset = np.load(os.path.join(opt.data, opt.dataset, 'wav{}.npz'.format(opt.sr // 1000)), allow_pickle=True);
#     # dataset = np.load("../datasets/fold1_test16000.npz", allow_pickle=True);
#     dataset = np.load("../datasets/fold1_dataset.npz", allow_pickle=True);
#     train_sounds = []
#     train_labels = []
#     # print(len(dataset['x']))
#     # for i in range(1, opt.nFolds + 1):

#     # train_sounds = [dataset['x'][i][0] for i in range(len(dataset['x']))]
#     # train_labels = [dataset['y'][i][0] for i in range(len(dataset['y']))]
#     train_sounds = dataset['fold{}'.format(1)].item()['sounds']
#     train_labels = dataset['fold{}'.format(1)].item()['labels']
#     # print(train_sounds)

#     trainGen = TLGenerator(train_sounds, train_labels, opt);
#     return trainGen

In [30]:
# def main():
#     opt = getOpts()
#     opt.sr = 20000;
#     opt.inputLength = 30225;
#     dataGen = getTrainGen(opt)
#     x,y = dataGen.__getitem__(0);
#     print(f"shape of x:{x.shape}")
#     x  = torch.tensor(np.moveaxis(x, 3, 1)).to('cpu');
#     print(f"shape of x:{x.shape}")

In [31]:
# length of samples:65
# shape of x:(2, 1, 30225, 1)
# shape of x:torch.Size([2, 1, 1, 30225])

length of samples:65
shape of x:(2, 1, 30225, 1)
shape of x:torch.Size([2, 1, 1, 30225])


In [43]:
def convert_pt2keras(pt_model=None):
    rdnary = torch.randn(2, 1, 30225, 1);
    # quant_model = GetCustomedACDNetModel()
    # quant_model.load_state_dict(torch.load(pt_model, map_location='cpu'));
    quant_model = torch.jit.load(pt_model)
    onnx_program = torch.onnx.dynamo_export(quant_model, rdnary)
    onnx_program.save("./onnx_models/acdnet_tl_quant_model_202312281348_80.onnx")

In [46]:
model_path = "./quantized_models/acdnet_tl_quant_model_202312281348_80.pt"
convert_pt2keras(pt_model=model_path)




OnnxExporterError: Failed to export the model to ONNX. Generating SARIF report at 'report_dynamo_export.sarif'. SARIF is a standard format for the output of static analysis tools. SARIF logs can be loaded in VS Code SARIF viewer extension, or SARIF web viewer (https://microsoft.github.io/sarif-web-component/). Please report a bug on PyTorch Github: https://github.com/pytorch/pytorch/issues

In [45]:
# !pip install onnxscript

Collecting onnxscript
  Downloading onnxscript-0.1.0.dev20231228-py3-none-any.whl.metadata (10 kB)
Downloading onnxscript-0.1.0.dev20231228-py3-none-any.whl (550 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m550.7/550.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m[31m2.1 MB/s[0m eta [36m0:00:01[0mm
[?25hInstalling collected packages: onnxscript
Successfully installed onnxscript-0.1.0.dev20231228


In [1]:
from tinynn.converter import TFLiteConverter

In [None]:
with torch.no_grad():
    qmodel = copy.deepcopy(mynn)
    torch.quantization.convert(qmodel, inplace=False)
    #
    torch.backends.quantized.engine = 'qnnpack'
    converter = TFLiteConverter(qmodel.module,
                                torch.randn(1, 64, nn_h, nn_w,
                                tflite_path="qmodel.tflite")
    converter.convert()