In [1]:
import sys

sys.path.append("../code")

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D,
    BatchNormalization,
    ReLU,
    GlobalAveragePooling2D,
    Dense,
    Softmax,
)
from kapre import STFT, Magnitude, MagnitudeToDecibel
from kapre.composed import get_melspectrogram_layer, get_log_frequency_spectrogram_layer
import tensorflow as tf
import tfimm
from config.config import config

In [None]:
class AttBlockV2(nn.Module):
    def __init__(self, in_features: int, out_features: int, activation="linear"):
        super().__init__()

        self.activation = activation
        self.att = nn.Conv1d(
            in_channels=in_features,
            out_channels=out_features,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=True,
        )
        self.cla = nn.Conv1d(
            in_channels=in_features,
            out_channels=out_features,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=True,
        )

        self.init_weights()

    def init_weights(self):
        init_layer(self.att)
        init_layer(self.cla)

    def forward(self, x):
        # x: (n_samples, n_in, n_time)
        norm_att = torch.softmax(torch.tanh(self.att(x)), dim=-1)
        cla = self.nonlinear_transform(self.cla(x))
        x = torch.sum(norm_att * cla, dim=2)
        return x, norm_att, cla

    def nonlinear_transform(self, x):
        if self.activation == "linear":
            return x
        elif self.activation == "sigmoid":
            return torch.sigmoid(x)

In [50]:
time_frame = 1000  ## simply
freq = 128
n_classes = 2


def get_model():

    # with strategy.scope():

    base = tfimm.create_model(config.model_type, pretrained=True, nb_classes=0)

    input = tf.keras.Input((freq, time_frame, 3), name="inp1")
    _, features = base(input, return_features=True)

    ## SED model flow
    # (batch_size, freq, frames, channels, )

    # (batch_size, frames, channels, )
    freq_reduced = tf.keras.layers.Lambda(
        lambda x: tf.math.reduce_mean(x, axis=1), output_shape=None
    )(features["features"])
    ## without pooling
    # ap = tf.keras.layers.AveragePooling1D(pool_size=2,strides=1,)(freq_reduced)

    dd = tf.keras.layers.Dense(2048)(freq_reduced)

    ## Starting with the attention block
    att = tf.keras.layers.Conv1D(
        2,
        1,
        strides=1,
        padding="valid",
    )(dd)
    att_aten = tf.keras.layers.Lambda(
        lambda x: tf.keras.activations.softmax(tf.keras.activations.tanh(x)),
        output_shape=None,
    )(att)
    cla = tf.keras.layers.Conv1D(
        2,
        1,
        strides=1,
        padding="valid",
    )(dd)
    cla_aten = tf.keras.layers.Lambda(
        lambda x: tf.keras.activations.sigmoid(x), output_shape=None
    )(cla)

    x = att_aten * cla_aten
    x = tf.keras.layers.Lambda(
        lambda x: tf.math.reduce_sum(x, axis=1), output_shape=None
    )(x)

    # model = tf.keras.models.Model(inputs=input, outputs=[x, att_aten, cla_aten])
    model = tf.keras.models.Model(inputs=input, outputs=x)
    # model = tf.keras.models.Model(inputs=input, outputs=logits)
    model.summary()
    opt = tf.keras.optimizers.Adam(learning_rate=config.LR_START)

    model.compile(
        optimizer=opt,
        # loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
        loss={
            "logits": tf.keras.losses.CategoricalCrossentropy(
                from_logits=True, label_smoothing=0.1
            ),
        },
        # metrics=[
        #    # tf.keras.metrics.SparseCategoricalAccuracy(),
        #    # tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5),
        #    tf.keras.metrics.CategoricalAccuracy(),
        #    tf.keras.metrics.TopKCategoricalAccuracy(k=5),
        # ],
    )
    return model

In [51]:
model = get_model()

All PyTorch model weights were used when initializing ResNet.
All the weights of ResNet were initialized from the PyTorch model.



Model: "model_16"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inp1 (InputLayer)               [(None, 128, 1000, 3 0                                            
__________________________________________________________________________________________________
res_net_37 (ResNet)             ((None, 2048), {'ste 23561152    inp1[0][0]                       
__________________________________________________________________________________________________
lambda_34 (Lambda)              (None, 32, 2048)     0           res_net_37[0][17]                
__________________________________________________________________________________________________
dense_9 (Dense)                 (None, 32, 2048)     4196352     lambda_34[0][0]                  
___________________________________________________________________________________________

In [43]:
tf.random.uniform(shape=[2, 128, 1000, 3]).shape

TensorShape([2, 128, 1000, 3])

In [52]:
out = model(tf.random.uniform(shape=[2, 128, 1000, 3]))

In [53]:
out

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[6.4271836, 8.568943 ],
       [6.4548607, 8.613371 ]], dtype=float32)>

In [7]:
base = tfimm.create_model(config.model_type, pretrained=True, nb_classes=0)
input = tf.keras.Input((freq, time_frame, 3), name="inp1")
_, features = base(input, return_features=True)

All PyTorch model weights were used when initializing ResNet.
All the weights of ResNet were initialized from the PyTorch model.



In [8]:
features

{'stem': <KerasTensor: shape=(None, 32, 250, 64) dtype=float32 (created by layer 'res_net_5')>,
 'block_0': <KerasTensor: shape=(None, 32, 250, 256) dtype=float32 (created by layer 'res_net_5')>,
 'block_1': <KerasTensor: shape=(None, 32, 250, 256) dtype=float32 (created by layer 'res_net_5')>,
 'block_2': <KerasTensor: shape=(None, 32, 250, 256) dtype=float32 (created by layer 'res_net_5')>,
 'block_3': <KerasTensor: shape=(None, 16, 125, 512) dtype=float32 (created by layer 'res_net_5')>,
 'block_4': <KerasTensor: shape=(None, 16, 125, 512) dtype=float32 (created by layer 'res_net_5')>,
 'block_5': <KerasTensor: shape=(None, 16, 125, 512) dtype=float32 (created by layer 'res_net_5')>,
 'block_6': <KerasTensor: shape=(None, 16, 125, 512) dtype=float32 (created by layer 'res_net_5')>,
 'block_7': <KerasTensor: shape=(None, 8, 63, 1024) dtype=float32 (created by layer 'res_net_5')>,
 'block_8': <KerasTensor: shape=(None, 8, 63, 1024) dtype=float32 (created by layer 'res_net_5')>,
 'bloc