In [1]:
import os
import sys
import random;

In [2]:
import torch.nn as nn;
import torch.ao.nn.quantized as nnq
from torch.ao.quantization import QConfigMapping
import torch.ao.quantization.quantize_fx

In [3]:
sys.path.append(os.path.abspath("../"));
import common.utils as U;
import common.opts as opt;

In [4]:
import argparse
import numpy as np

In [5]:
from datetime import datetime

In [28]:
import onnx
from onnx2keras import onnx_to_keras
import onnxruntime as rt

In [52]:
import tensorflow.keras.backend as K;
from tensorflow import keras;
from tensorflow.keras.models import Model, load_model;
import tensorflow.keras.layers as L

In [27]:
# !pip install -U onnx
# !pip install -U onnx-tf
# !pip install onnx2keras
# !pip install onnxruntime

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu");
print(f"current use device:{device}")

current use device:cuda:0


In [8]:
def genDataTimeStr():
    return datetime.today().strftime('%Y-%m-%d %H:%M:%S').replace('-',"").replace(' ',"").replace(':',"");

In [9]:
seed = 42;
random.seed(seed);
np.random.seed(seed);
torch.manual_seed(seed);
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed);
torch.backends.cudnn.deterministic = True;
torch.backends.cudnn.benchmark = False;
class Customed_ACDNetV2(nn.Module):
    def __init__(self, input_length, n_class, sr, ch_conf=None):
        super(Customed_ACDNetV2, self).__init__();
        self.input_length = input_length;
        self.ch_config = ch_conf;

        stride1 = 2;
        stride2 = 2;
        channels = 8;
        k_size = (3, 3);
        n_frames = (sr/1000)*10; #No of frames per 10ms

        sfeb_pool_size = int(n_frames/(stride1*stride2));
        # tfeb_pool_size = (2,2);
        if self.ch_config is None:
            self.ch_config = [channels, channels*8, channels*4, channels*8, channels*8, channels*16, channels*16, channels*32, channels*32, channels*64, channels*64, n_class];
        # avg_pool_kernel_size = (1,4) if self.ch_config[1] < 64 else (2,4);
        fcn_no_of_inputs = self.ch_config[-1];
        conv1, bn1 = self.make_layers(1, self.ch_config[0], (1, 9), (1, stride1));
        conv2, bn2 = self.make_layers(self.ch_config[0], self.ch_config[1], (1, 5), (1, stride2));
        conv3, bn3 = self.make_layers(1, self.ch_config[2], k_size, padding=1);
        conv4, bn4 = self.make_layers(self.ch_config[2], self.ch_config[3], k_size, padding=1);
        conv5, bn5 = self.make_layers(self.ch_config[3], self.ch_config[4], k_size, padding=1);
        conv6, bn6 = self.make_layers(self.ch_config[4], self.ch_config[5], k_size, padding=1);
        conv7, bn7 = self.make_layers(self.ch_config[5], self.ch_config[6], k_size, padding=1);
        conv8, bn8 = self.make_layers(self.ch_config[6], self.ch_config[7], k_size, padding=1);
        conv9, bn9 = self.make_layers(self.ch_config[7], self.ch_config[8], k_size, padding=1);
        conv10, bn10 = self.make_layers(self.ch_config[8], self.ch_config[9], k_size, padding=1);
        conv11, bn11 = self.make_layers(self.ch_config[9], self.ch_config[10], k_size, padding=1);
        conv12, bn12 = self.make_layers(self.ch_config[10], self.ch_config[11], (1, 1));
        fcn = nn.Linear(fcn_no_of_inputs, n_class);
        nn.init.kaiming_normal_(fcn.weight, nonlinearity='sigmoid') # kaiming with sigoid is equivalent to lecun_normal in keras

        self.sfeb = nn.Sequential(
            #Start: Filter bank
            conv1, bn1, nn.ReLU(),\
            conv2, bn2, nn.ReLU(),\
            nn.MaxPool2d(kernel_size=(1, sfeb_pool_size))
        );

        tfeb_modules = [];
        self.tfeb_width = int(((self.input_length / sr)*1000)/10); # 10ms frames of audio length in seconds
        tfeb_pool_sizes = self.get_tfeb_pool_sizes(self.ch_config[1], self.tfeb_width);
        p_index = 0;
        for i in [3,4,6,8,10]:
            tfeb_modules.extend([eval('conv{}'.format(i)), eval('bn{}'.format(i)), nn.ReLU()]);

            if i != 3:
                tfeb_modules.extend([eval('conv{}'.format(i+1)), eval('bn{}'.format(i+1)), nn.ReLU()]);

            h, w = tfeb_pool_sizes[p_index];
            if h>1 or w>1:
                tfeb_modules.append(nn.MaxPool2d(kernel_size = (h,w)));
            p_index += 1;

        tfeb_modules.append(nn.Dropout(0.2));
        tfeb_modules.extend([conv12, bn12, nn.ReLU()]);
        h, w = tfeb_pool_sizes[-1];
        if h>1 or w>1:
            tfeb_modules.append(nn.AvgPool2d(kernel_size = (h,w)));
        tfeb_modules.extend([nn.Flatten(), fcn]);

        self.tfeb = nn.Sequential(*tfeb_modules);

        self.output = nn.Sequential(
            nn.Softmax(dim=1)
        );
        

    def forward(self, x):
        x = self.sfeb(x);
        #swapaxes
        x = x.permute((0, 2, 1, 3));
        x = self.tfeb(x);
        y = self.output[0](x);
        return y;

    def make_layers(self, in_channels, out_channels, kernel_size, stride=(1,1), padding=0, bias=False):
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias);
        nn.init.kaiming_normal_(conv.weight, nonlinearity='relu'); # kaiming with relu is equivalent to he_normal in keras
        bn = nn.BatchNorm2d(out_channels);
        return conv, bn;

    def get_tfeb_pool_sizes(self, con2_ch, width):
        h = self.get_tfeb_pool_size_component(con2_ch);
        w = self.get_tfeb_pool_size_component(width);
        # print(w);
        pool_size = [];
        for  (h1, w1) in zip(h, w):
            pool_size.append((h1, w1));
        return pool_size;

    def get_tfeb_pool_size_component(self, length):
        # print(length);
        c = [];
        index = 1;
        while index <= 6:
            if length >= 2:
                if index == 6:
                    c.append(length);
                else:
                    c.append(2);
                    length = length // 2;
            else:
               c.append(1);

            index += 1;

        return c;

def GetCustomedACDNetModel(input_len=30225, nclass=2, sr=20000, channel_config=None):
    net = Customed_ACDNetV2(input_len, nclass, sr, ch_conf=channel_config);
    return net;

## Keras Model

In [54]:
class TLACDNet:
	def __init__(self, pretrained_model_path=None,opt=None, num_class=6):
		self.opt = opt
		self.pretrained_model_path = pretrained_model_path
		self.new_model = None
		self.num_class = num_class

	def Create_TLACDNet(self):
		model = load_model(self.pretrained_model_path)
		print(f"original model loaded....")
		# for l in model.layers:
		# 	print(f"layer:{l} trainable weight length is {len(l.wei)}")
		total_layers_num = len(model.layers)
		replaced_layers_num = 2
		freeze_layers_num = total_layers_num-replaced_layers_num

		## freeze layers
		for i in range(freeze_layers_num):
			model.layers[i].trainable = False

		for j in range(freeze_layers_num, total_layers_num):
			model.layers[j].trainable = True

		custom_layers = model.layers[freeze_layers_num-1].output
		custom_layers = L.Dense(self.num_class)(custom_layers)
		# custom_layers = Softmax()(custom_layers)
		custom_layers = L.Dense(self.num_class,activation="softmax")(custom_layers)

		new_model = Model(inputs=model.input,outputs=custom_layers)
		print("new model info:\n")
		print(new_model.summary())
		print("\n")
		return new_model

def GetKerasACDNet(pretrained_model_path=None,opt=None, num_class=2):
	trainedModelPath = pretrained_model_path
	tlacdnet = TLACDNet(trainedModelPath, opt, num_class)
	return tlacdnet.Create_TLACDNet()

In [56]:
pretrainedmodelpath = "../resources/pretrained_models/acdnet20_20khz_fold4.h5";
keras_model = GetKerasACDNet(pretrained_model_path=pretrainedmodelpath);

DEBUG:h5py._conv:Creating converter from 3 to 5
2024-01-22 15:42:41.572309: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-22 15:42:41.584418: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-22 15:42:41.584500: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6

original model loaded....
new model info:

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 30225, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 1, 15109, 4)       36        
                                                                 
 batch_normalization (Batch  (None, 1, 15109, 4)       16        
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 1, 15109, 4)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 1, 7553, 32)       640       
                                                                 
 batch_normalization_1 (Bat  (None, 1, 7553, 32)       128       
 chNormalization) 

## Convert to onnx format

In [15]:
#initializing model structures
model = GetCustomedACDNetModel()

In [16]:
weights_path = "./trained_models/acdnet_alarm_3rd_20240119100903_acc_96.59091186523438_67th_epoch.pt"

In [17]:
state = torch.load(weights_path, map_location=device);
config = state['config']
model.load_state_dict(state['weight']);

In [38]:
# print(f"config is {config}")
onnx_save_path = "../th/onnx_models/onnx_3rd_97.7_{}.onnx".format(genDataTimeStr());
rdnary = torch.randn((1, 1, 1, 30225));
# torch.onnx.export(model, rdnary, onnx_save_path)
torch.onnx.export(model, rdnary, onnx_save_path, verbose=True, input_names = ['input.1'], output_names = ['121'])

Exported graph: graph(%input.1 : Float(1, 1, 1, 30225, strides=[30225, 30225, 30225, 1], requires_grad=0, device=cpu),
      %tfeb.38.weight : Float(2, 2, strides=[2, 1], requires_grad=1, device=cpu),
      %tfeb.38.bias : Float(2, strides=[1], requires_grad=1, device=cpu),
      %onnx::Conv_123 : Float(8, 1, 1, 9, strides=[9, 9, 9, 1], requires_grad=0, device=cpu),
      %onnx::Conv_124 : Float(8, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_126 : Float(64, 8, 1, 5, strides=[40, 5, 5, 1], requires_grad=0, device=cpu),
      %onnx::Conv_127 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_129 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_130 : Float(32, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_132 : Float(64, 32, 3, 3, strides=[288, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_133 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_135 : Float(64, 6

## Convert onnx to keras model

In [39]:
#first get input name
onnx_model_path = "../th/onnx_models/onnx_3rd_97.7_20240122152249.onnx"
sess = rt.InferenceSession(onnx_model_path)
input_name = sess.get_inputs()[0].name;
output_name = sess.get_outputs()[0];
print(f"input_name is {input_name}, and output name is {output_name}")

input_name is input.1, and output name is NodeArg(name='121', type='tensor(float)', shape=[1, 2])


In [41]:
onnx_model = onnx.load(onnx_model_path);
k_model = onnx_to_keras(onnx_model, ['input.1']);


INFO:onnx2keras:Converter is called.
DEBUG:onnx2keras:List input shapes:
DEBUG:onnx2keras:None
DEBUG:onnx2keras:List inputs:
DEBUG:onnx2keras:Input 0 -> input.1.
DEBUG:onnx2keras:List outputs:
DEBUG:onnx2keras:Output 0 -> 121.
DEBUG:onnx2keras:Gathering weights to dictionary.
DEBUG:onnx2keras:Found weight tfeb.38.weight with shape (2, 2).
DEBUG:onnx2keras:Found weight tfeb.38.bias with shape (2,).
DEBUG:onnx2keras:Found weight onnx::Conv_123 with shape (8, 1, 1, 9).
DEBUG:onnx2keras:Found weight onnx::Conv_124 with shape (8,).
DEBUG:onnx2keras:Found weight onnx::Conv_126 with shape (64, 8, 1, 5).
DEBUG:onnx2keras:Found weight onnx::Conv_127 with shape (64,).
DEBUG:onnx2keras:Found weight onnx::Conv_129 with shape (32, 1, 3, 3).
DEBUG:onnx2keras:Found weight onnx::Conv_130 with shape (32,).
DEBUG:onnx2keras:Found weight onnx::Conv_132 with shape (64, 32, 3, 3).
DEBUG:onnx2keras:Found weight onnx::Conv_133 with shape (64,).
DEBUG:onnx2keras:Found weight onnx::Conv_135 with shape (64, 64,

ValueError: '/sfeb/sfeb.0/Conv_output_0/' is not a valid root scope name. A root scope name has to match the following pattern: ^[A-Za-z0-9.][A-Za-z0-9_.\\/>-]*$

In [45]:
# !pip install onnxscript

Collecting onnxscript
  Downloading onnxscript-0.1.0.dev20231228-py3-none-any.whl.metadata (10 kB)
Downloading onnxscript-0.1.0.dev20231228-py3-none-any.whl (550 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m550.7/550.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m[31m2.1 MB/s[0m eta [36m0:00:01[0mm
[?25hInstalling collected packages: onnxscript
Successfully installed onnxscript-0.1.0.dev20231228


In [1]:
from tinynn.converter import TFLiteConverter

In [None]:
with torch.no_grad():
    qmodel = copy.deepcopy(mynn)
    torch.quantization.convert(qmodel, inplace=False)
    #
    torch.backends.quantized.engine = 'qnnpack'
    converter = TFLiteConverter(qmodel.module,
                                torch.randn(1, 64, nn_h, nn_w,
                                tflite_path="qmodel.tflite")
    converter.convert()