# Setup

In [26]:
N_EPOCHS = 200
WDIR = '../../data/'
OUTDIR = '../../models/'

# Load the dataset

In [17]:
import numpy as np

#loading our preprocessed datasets
X_train = np.load(WDIR+'X_train.npy')
y_train = np.load(WDIR+'y_train.npy')

X_test = np.load(WDIR+'X_test.npy')
y_test = np.load(WDIR+'y_test.npy')

n_classes = len(np.unique(y_train))

# Build the model

In [18]:
from tensorflow import keras
from tensorflow.keras import layers

In [19]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [20]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

# Compile model

In [21]:
input_shape = X_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]




Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 22, 84)]     0           []                               
                                                                                                  
 layer_normalization_24 (LayerN  (None, 22, 84)      168         ['input_4[0][0]']                
 ormalization)                                                                                    
                                                                                                  
 multi_head_attention_12 (Multi  (None, 22, 84)      347220      ['layer_normalization_24[0][0]', 
 HeadAttention)                                                   'layer_normalization_24[0][0]'] 
                                                                                            

# Train / fit the model

In [22]:
model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=N_EPOCHS,
    batch_size=64,
    callbacks=callbacks,
)

# 2023-04-30: 170-180 min

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x1767a1130>

# Evaluate the model

In [24]:
model.evaluate(X_test, y_test, verbose=1)



[3.4583210945129395, 0.21821261942386627]

In [27]:
model.save(OUTDIR+'transformer_epochs_0-168')
model.save(OUTDIR+'transformer_epochs_0-168.h5')



INFO:tensorflow:Assets written to: ../../models/transformer_epochs_0-168/assets


INFO:tensorflow:Assets written to: ../../models/transformer_epochs_0-168/assets
