# Setup

In [1]:
N_EPOCHS = 200
WDIR = '../../data/'
OUTDIR = '../../models/'
BATCH = 512

# Import

In [2]:
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset

In [3]:
#loading our preprocessed datasets
X_train = np.load(WDIR+'X_train.npy')
y_train = np.load(WDIR+'y_train.npy')

X_test = np.load(WDIR+'X_test.npy')
y_test = np.load(WDIR+'y_test.npy')

n_classes = len(np.unique(y_train))

# Build the model

In [4]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [5]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

# Compile model

In [6]:
model = tf.keras.models.load_model(OUTDIR+'transformer_epochs_0-168')

model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 22, 84)]     0           []                               
                                                                                                  
 layer_normalization_24 (LayerN  (None, 22, 84)      168         ['input_4[0][0]']                
 ormalization)                                                                                    
                                                                                                  
 multi_head_attention_12 (Multi  (None, 22, 84)      347220      ['layer_normalization_24[0][0]', 
 HeadAttention)                                                   'layer_normalization_24[0][0]'] 
                                                                                            

# Train / fit the model

In [7]:
model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=N_EPOCHS,
    batch_size=BATCH,
    callbacks=callbacks,
)

Epoch 1/200


2023-04-30 19:54:36.952683: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200

# Evaluate the model

In [None]:
model.evaluate(X_test, y_test, verbose=1)

In [None]:
model.save(OUTDIR+'transformer_epochs_168-368_batch_512')
model.save(OUTDIR+'transformer_epochs_168-368_batch_512.h5')