In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

In [17]:
# if torch is not installed:
# !pip install torch

In [18]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [19]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)


# 1D helix functions with 4 different parameters
x = r cos(t)

In [20]:
import pandas as pd
import torch
import random

# x = r cos(t)
# 1024 data points, 256 from each helix of a specific r
# each data point contains a x value and a class number idenfiying
#    the helix of a specific r
training_data = torch.zeros(1024,2).numpy()
test_data = torch.zeros(128,2).numpy()
training_labels = torch.zeros(1024).numpy()
test_labels = torch.zeros(128).numpy()
training_data.shape

(1024, 2)

In [21]:
# 4 random r values
rand_r = np.random.rand(4)
rand_r

array([0.55610911, 0.99991772, 0.6191424 , 0.20097477])

In [22]:
class_id = 0
for r in rand_r:
    # generate training data
    t = np.random.rand(256)
    x = r * np.sin(t * np.pi)
    training_labels[class_id*256:(class_id+1)*256] = class_id
    training_data[class_id*256:(class_id+1)*256,0] = t
    training_data[class_id*256:(class_id+1)*256,1] = x

    # generate test data
    t = np.random.rand(32)
    x = r * np.sin(t * np.pi)
    test_labels[class_id*32:(class_id+1)*32] = class_id
    test_data[class_id*32:(class_id+1)*32,0] = t
    test_data[class_id*32:(class_id+1)*32,1] = x
    class_id += 1
training_data

array([[0.23151928, 0.36974835],
       [0.0581334 , 0.10099937],
       [0.93852997, 0.10672608],
       ...,
       [0.08379015, 0.05229464],
       [0.2933401 , 0.160085  ],
       [0.60100216, 0.1909419 ]], dtype=float32)

import matplotlib.pyplot as plt
from cycler import cycler

my_colours = ['steelblue', 'seagreen', 'firebrick']
custom_cycler = cycler(color=my_colours)

fig, ax = plt.subplots()
ax.set_prop_cycle(custom_cycler)
ax.plot()
plt.show()

In [23]:
test_data

array([[0.6823358 , 0.46733925],
       [0.8010265 , 0.32542017],
       [0.9830069 , 0.02967399],
       [0.5634353 , 0.5451025 ],
       [0.96808356, 0.05566679],
       [0.13951053, 0.23600571],
       [0.79191166, 0.33819813],
       [0.50142974, 0.5561035 ],
       [0.9032121 , 0.16650148],
       [0.84283054, 0.26356363],
       [0.5193297 , 0.55508405],
       [0.6712255 , 0.47757307],
       [0.69114596, 0.45881873],
       [0.5828334 , 0.5373855 ],
       [0.01497689, 0.026156  ],
       [0.5023711 , 0.5560937 ],
       [0.7322627 , 0.4145189 ],
       [0.36087844, 0.50383407],
       [0.359702  , 0.5029607 ],
       [0.95538   , 0.07769906],
       [0.7715491 , 0.36572713],
       [0.3706102 , 0.51079416],
       [0.49264273, 0.5559606 ],
       [0.13453175, 0.22810104],
       [0.92351454, 0.13234316],
       [0.25434002, 0.3985533 ],
       [0.43046284, 0.54289204],
       [0.47935647, 0.55494004],
       [0.09226387, 0.15894362],
       [0.01267346, 0.02213554],
       [0.

classes = np.unique(training_data[:,0])

plt.figure()
for c in classes:
    c_train = training_data[training_data[:,0] == c]
    plt.plot(c_train[0],label="class " + str(c))
plt.legend(loc="best")
plt.show()
plt.close()

In [24]:
training_data = training_data.reshape((training_data.shape[0], training_data.shape[1], 1))
training_data

array([[[0.23151928],
        [0.36974835]],

       [[0.0581334 ],
        [0.10099937]],

       [[0.93852997],
        [0.10672608]],

       ...,

       [[0.08379015],
        [0.05229464]],

       [[0.2933401 ],
        [0.160085  ]],

       [[0.60100216],
        [0.1909419 ]]], dtype=float32)

In [25]:
test_data = test_data.reshape((test_data.shape[0], test_data.shape[1], 1))
test_data

array([[[0.6823358 ],
        [0.46733925]],

       [[0.8010265 ],
        [0.32542017]],

       [[0.9830069 ],
        [0.02967399]],

       [[0.5634353 ],
        [0.5451025 ]],

       [[0.96808356],
        [0.05566679]],

       [[0.13951053],
        [0.23600571]],

       [[0.79191166],
        [0.33819813]],

       [[0.50142974],
        [0.5561035 ]],

       [[0.9032121 ],
        [0.16650148]],

       [[0.84283054],
        [0.26356363]],

       [[0.5193297 ],
        [0.55508405]],

       [[0.6712255 ],
        [0.47757307]],

       [[0.69114596],
        [0.45881873]],

       [[0.5828334 ],
        [0.5373855 ]],

       [[0.01497689],
        [0.026156  ]],

       [[0.5023711 ],
        [0.5560937 ]],

       [[0.7322627 ],
        [0.4145189 ]],

       [[0.36087844],
        [0.50383407]],

       [[0.359702  ],
        [0.5029607 ]],

       [[0.95538   ],
        [0.07769906]],

       [[0.7715491 ],
        [0.36572713]],

       [[0.3706102 ],
        [0.5

In [26]:
# shuffle rows in training data and training labels
idx = np.random.permutation(len(training_data))
training_data = training_data[idx]
training_labels = training_labels[idx].astype(int)

In [27]:
# shuffle rows in test data and test labels
idx = np.random.permutation(len(test_data))
test_data = test_data[idx]
test_labels = test_labels[idx].astype(int)

In [None]:
input_shape = training_data.shape[1:]
n_classes = len(np.unique(training_labels))

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.2,
    dropout=0.1,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

model.fit(
    training_data,
    training_labels,
    validation_split=0.25,
    epochs=300,
    batch_size=64,
    callbacks=callbacks,
)

model.evaluate(test_data, test_labels, verbose=1)


Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 2, 1)]       0           []                               
                                                                                                  
 layer_normalization_32 (LayerN  (None, 2, 1)        2           ['input_5[0][0]']                
 ormalization)                                                                                    
                                                                                                  
 multi_head_attention_16 (Multi  (None, 2, 1)        7169        ['layer_normalization_32[0][0]', 
 HeadAttention)                                                   'layer_normalization_32[0][0]'] 
                                                                                            

 ambda)                                                           'tf.__operators__.add_37[0][0]']
                                                                                                  
 layer_normalization_39 (LayerN  (None, 2, 1)        2           ['tf.__operators__.add_38[0][0]']
 ormalization)                                                                                    
                                                                                                  
 conv1d_38 (Conv1D)             (None, 2, 4)         8           ['layer_normalization_39[0][0]'] 
                                                                                                  
 dropout_43 (Dropout)           (None, 2, 4)         0           ['conv1d_38[0][0]']              
                                                                                                  
 conv1d_39 (Conv1D)             (None, 2, 1)         5           ['dropout_43[0][0]']             
          

Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300


Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300


Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300


Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300


Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300