Keras IMDB sentiment analysis tutorial https://keras.io/examples/nlp/text_classification_with_transformer/

In [2]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from keras.utils import to_categorical

In [3]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([tf.keras.layers.Dense(ff_dim, activation="relu"), 
                                        tf.keras.layers.Dense(embed_dim),])

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        ffn_output = self.ffn(attn_output)
        return attn_output + ffn_output


In [12]:
class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
        self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


In [5]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

In [6]:
(x_train, y_train), (x_val, y_val) = tf.keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = tf.keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

y_train = to_categorical(y_train, 2)
y_val = to_categorical(y_val, 2)

# import os
# import time
# path_wd = os.path.join('/home/viktor/PycharmProjects/guided_research/snn_toolbox', '..', 'temp', str(time.time()))
# os.makedirs(path_wd)
# # Save dataset so SNN toolbox can find it.
# np.savez_compressed(os.path.join(path_wd, 'x_test'), x_val)
# np.savez_compressed(os.path.join(path_wd, 'y_test'), y_val)
# # SNN toolbox will not do any training, but we save a subset of the training
# # set so the toolbox can use it when normalizing the network parameters.
# np.savez_compressed(os.path.join(path_wd, 'x_norm'), x_train[::10])

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])


25000 Training sequences
25000 Validation sequences


  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [28]:
class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(ScaleLayer, self).__init__()

    def call(self, inputs):
        scale = inputs[1]
        return inputs[0] / scale


class MatMulLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MatMulLayer, self).__init__()

    def call(self, inputs):
        return tf.matmul(inputs[0], inputs[1])


class MatMulLayerTranspose(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MatMulLayerTranspose, self).__init__()

    def call(self, inputs):
        return tf.matmul(inputs[0], inputs[1], transpose_b=True)/np.sqrt(inputs[0].shape[-1])


class StandardAdditionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(StandardAdditionLayer, self).__init__()

    def call(self, inputs):
        return tf.math.add(inputs[0], inputs[1])
    
    
class CreateRangeLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(CreateRangeLayer, self).__init__()

    def call(self, maxlen):
        return tf.range(start=0, limit=maxlen, delta=1)
    

class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(TokenAndPositionEmbedding, self).__init__()
        self.maxlen = 200
        self.vocab_size = 20000
        self.embed_dim = 32
        self.pos_emb = tf.keras.layers.Embedding(input_dim=self.maxlen, output_dim=self.embed_dim)
        self.token_emb = tf.keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim)

    def call(self, x):
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [33]:
inputs = tf.keras.layers.Input(shape=(maxlen,))
# positions = tf.range(start=0, limit=maxlen, delta=1)
# positions = CreateRangeLayer()(maxlen)
# positions = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)(positions)
# x = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)
# positions = tf.broadcast_to(positions, (50, 200, 32))
# positions = tf.keras.layers.Reshape([-1, 200, 32])(positions)
# print(positions.shape)
# print(x.shape)
# x = x + positions
# x = StandardAdditionLayer()([x, positions])
# positions = tf.expand_dims(positions, axis=0)
# print(positions.shape)
# x = tf.keras.layers.Add()([x, positions])

x = TokenAndPositionEmbedding()(inputs)
# x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
# -------------- TRANSFORMER BLOCK -----------------

dv = 25
nv = -1

v2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)
q2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)
k2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)

v = tf.keras.layers.Reshape([embed_dim, nv, dv])(v2)
q = tf.keras.layers.Reshape([embed_dim, nv, dv])(q2)
k = tf.keras.layers.Reshape([embed_dim, nv, dv])(k2)

# softmax(q*k^T/sqrt(dv))
att = MatMulLayerTranspose()([q, k])
# att = ScaleLayer()([att, np.sqrt(dv)])
att = tf.keras.layers.Softmax(axis=-1)(att)
# softmax(q*k^T/sqrt(dv))*v
out = MatMulLayer()([att, v])

out = tf.keras.layers.Reshape([embed_dim, maxlen, 1])(out)
x = tf.keras.layers.Reshape([embed_dim, maxlen, 1])(x)
add = tf.keras.layers.Add()([out, x])
# add = tf.add(out, x)

x = tf.keras.layers.Dense(embed_dim, activation="relu")(add)
x = tf.keras.layers.Dense(embed_dim)(x)
# --------------------------------------------------
# x = tf.keras.layers.Flatten()(x)
# x = tf.keras.layers.Reshape([1, embed_dim, embed_dim])(x)
print(x.shape)
x = tf.keras.layers.AveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(20, activation="relu")(x)
x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(2, activation="softmax")(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

(None, 32, 200, 32)


In [34]:
model.compile("adam", "categorical_crossentropy", metrics=["accuracy"])
history = model.fit(x_train, y_train, batch_size=64, epochs=1, validation_data=(x_val, y_val))



In [53]:
model.evaluate(x_val, y_val)



[0.30267125368118286, 0.8704400062561035]

In [10]:
model.get_weights()[3]

array([[-0.09912069, -0.19579193, -0.16229156, ...,  0.10588318,
         0.25742522,  0.1727877 ],
       [-0.22689337,  0.25866896,  0.00569219, ...,  0.02986109,
         0.03099819,  0.00711283],
       [ 0.26708716,  0.0849209 , -0.09715092, ...,  0.16539818,
        -0.21219455,  0.29617092],
       ...,
       [ 0.01515449, -0.19789135, -0.06235372, ...,  0.14110392,
         0.29501003,  0.06025026],
       [-0.02646763, -0.04001221,  0.06900238, ...,  0.04967032,
         0.01653934,  0.00669733],
       [-0.10428838,  0.30174482, -0.24962968, ..., -0.05366444,
         0.2063334 , -0.25843057]], dtype=float32)

In [48]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_20 (InputLayer)           [(None, 200)]        0                                            
__________________________________________________________________________________________________
embedding_26 (Embedding)        (None, 200, 32)      640000      input_20[0][0]                   
__________________________________________________________________________________________________
dense_116 (Dense)               (None, 200, 32)      1056        embedding_26[0][0]               
__________________________________________________________________________________________________
dense_117 (Dense)               (None, 200, 32)      1056        embedding_26[0][0]               
___________________________________________________________________________________________

In [35]:
import os
from tensorflow import keras
keras.models.save_model(
    model, 
    os.path.join("/home/viktor/PycharmProjects/guided_research/transformer-to-snn-conversion", 
    "mnist_transformer" + '.h5')
)

In [36]:
reconstructed_model = keras.models.load_model(
    os.path.join("/home/viktor/PycharmProjects/guided_research/transformer-to-snn-conversion", 
    "mnist_transformer" + '.h5'), custom_objects={'MatMulLayer': MatMulLayer, 
                                                  'MatMulLayerTranspose': MatMulLayerTranspose,
#                                                   'CreateRangeLayer': CreateRangeLayer,
#                                                   'StandardAdditionLayer': StandardAdditionLayer,
                                                  'TokenAndPositionEmbedding': TokenAndPositionEmbedding})