In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import MultiHeadAttention, Input, Dense
from tensorflow.keras.layers import LayerNormalization, Layer
from tensorflow.keras.layers import TextVectorization, Embedding
from tensorflow.data import Dataset
from tensorflow import convert_to_tensor, string, float32, shape, reshape
from tensorflow.keras import utils
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Flatten

In [2]:
# import dataset
trainset = np.load('C:/Users/ykung/Downloads/facecamera/trainset.npy')
trainsety = np.load('C:/Users/ykung/Downloads/facecamera/trainsety.npy')
testset = np.load('C:/Users/ykung/Downloads/facecamera/testset.npy')
testsety = np.load('C:/Users/ykung/Downloads/facecamera/testsety.npy')

In [3]:
trainset = np.reshape(trainset, (68736,90,2))
trainsety = np.reshape(trainsety, (68736,1,1))
testset = np.reshape(testset, (18816,90,2))
testsety = np.reshape(testsety, (18816,1,1))

In [4]:
newtrainsety = np.zeros((68736,5))
for i in range(68736):
    val1 = int(trainsety[i,0,0])
    newtrainsety[i,val1]=1

In [5]:
newtestsety = np.zeros((18816,5))
for i in range(18816):
    val1 = int(testsety[i,0,0])
    newtestsety[i,val1]=1

In [9]:
newtrainset = np.zeros((68736,90,2))
for i in range(68736):
    for j in range(90):
        val1 = trainset[i,j,0]
        val2 = trainset[i,j,1]
        if val1 < -1500:
            val1 = -1500
        if val1 > 1499:
            val1 = 1499
        if val2 < -1500:
            val2 = -1500
        if val2 > 1499:
            val2 = 1499
        newtrainset[i,j,0]=int(val1+1500)
        newtrainset[i,j,1]=int(val2+1500)

In [10]:
newtestset = np.zeros((18816,90,2))
for i in range(18816):
    for j in range(90):
        val1 = testset[i,j,0]
        val2 = testset[i,j,1]
        if val1 < -1500:
            val1 = -1500
        if val1 > 1499:
            val1 = 1499
        if val2 < -1500:
            val2 = -1500
        if val2 > 1499:
            val2 = 1499
        newtestset[i,j,0]=int(val1+1500)
        newtestset[i,j,1]=int(val2+1500)

In [11]:
newtrainset.max()

2999.0

In [12]:
newtestset.max()

2296.0

In [13]:
from tensorflow import range

In [14]:
# embedding for gaze and time step
class EmbeddingLayer(Layer):
    def __init__(self, sequence_length, input_size, embed_dim):
        super(EmbeddingLayer, self).__init__()
        self.word_embedding = Embedding(input_dim=input_size, output_dim=embed_dim)
        self.position_embedding = Embedding(input_dim=sequence_length, output_dim=embed_dim)

    def call(self, tokens):
        sequence_length = shape(tokens)[-1]
        all_positions = range(start=0, limit=sequence_length, delta=1)
        positions_encoding = self.position_embedding(all_positions)
        words_encoding = self.word_embedding(tokens)
        return positions_encoding + words_encoding

In [15]:
#encoder layer
class EncoderLayer(Layer):
    def __init__(self, total_heads, total_dense_units, embed_dim):
        super(EncoderLayer, self).__init__()# Multihead attention layer
        self.multihead = MultiHeadAttention(num_heads=total_heads, key_dim=embed_dim)# Feed forward network layer
        self.nnw = Sequential([Dense(total_dense_units, activation="relu"),
        Dense(embed_dim)])# Normalization
        self.normalize_layer = LayerNormalization()

    def call(self, inputs):
        attn_output = self.multihead(inputs, inputs)
        normalize_attn = self.normalize_layer(inputs + attn_output)
        nnw_output = self.nnw(normalize_attn)
        final_output = self.normalize_layer(normalize_attn + nnw_output)
        return final_output

In [18]:
# transformer
embed_dim = 10
num_heads = 2
total_dense_units = 40
sequence_length = 90
input_size = 3000
n_classes = 5

# Our two custom layers
embedding_layer = EmbeddingLayer(sequence_length, input_size, embed_dim)
encoder_layer = EncoderLayer(num_heads, total_dense_units, embed_dim)

# Start connecting the layers together
inputs = Input(shape=(sequence_length,2,))
emb = embedding_layer(inputs)
enc = encoder_layer(emb)
d = Dense(total_dense_units, activation="relu")(enc)
flat = Flatten()(d)
outputs = Dense(n_classes, activation="softmax")(flat)

# Construct the transformer model
transformer_model = Model(inputs=inputs, outputs=outputs)
transformer_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy', 'Precision', 'Recall'])
transformer_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 90, 2)]           0         
                                                                 
 embedding_layer_1 (Embeddi  (None, 90, 2, 10)         30900     
 ngLayer)                                                        
                                                                 
 encoder_layer_1 (EncoderLa  (None, 90, 2, 10)         1740      
 yer)                                                            
                                                                 
 dense_6 (Dense)             (None, 90, 2, 40)         440       
                                                                 
 flatten_1 (Flatten)         (None, 7200)              0         
                                                                 
 dense_7 (Dense)             (None, 5)                 3600

In [19]:
history = transformer_model.fit(newtrainset, newtrainsety, epochs = 4, batch_size = 50, verbose = 1, validation_data = (newtestset, newtestsety))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [30]:
newtrainset[39,5,1]

1.0