# Setup

In [16]:
N_EPOCHS = 200
WDIR = '../../data/'

# Load the dataset

In [17]:
import numpy as np

#loading our preprocessed datasets
X_train = np.load(WDIR+'X_train.npy')
y_train = np.load(WDIR+'y_train.npy')

X_test = np.load(WDIR+'X_test.npy')
y_test = np.load(WDIR+'y_test.npy')

n_classes = len(np.unique(y_train))

# Build the model

In [18]:
from tensorflow import keras
from tensorflow.keras import layers

In [19]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [20]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

# Compile model

In [21]:
input_shape = X_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]




Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 22, 84)]     0           []                               
                                                                                                  
 layer_normalization_24 (LayerN  (None, 22, 84)      168         ['input_4[0][0]']                
 ormalization)                                                                                    
                                                                                                  
 multi_head_attention_12 (Multi  (None, 22, 84)      347220      ['layer_normalization_24[0][0]', 
 HeadAttention)                                                   'layer_normalization_24[0][0]'] 
                                                                                            

# Train / fit the model

In [22]:
model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=N_EPOCHS,
    batch_size=64,
    callbacks=callbacks,
)

Epoch 1/200
 86/392 [=====>........................] - ETA: 39s - loss: 5.5262 - sparse_categorical_accuracy: 0.0045

# Evaluate the model

In [None]:
model.evaluate(X_test, y_test, verbose=1)



[5.512951850891113, 0.00568944588303566]