Keras IMDB sentiment analysis tutorial https://keras.io/examples/nlp/text_classification_with_transformer/

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from keras.utils import to_categorical

In [2]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([tf.keras.layers.Dense(ff_dim, activation="relu"), 
                                        tf.keras.layers.Dense(embed_dim),])

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        ffn_output = self.ffn(attn_output)
        return attn_output + ffn_output


In [3]:
class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


In [4]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

In [5]:
(x_train, y_train), (x_val, y_val) = tf.keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = tf.keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

import os
import time
path_wd = os.path.join('/home/viktor/PycharmProjects/guided_research/snn_toolbox', '..', 'temp', str(time.time()))
os.makedirs(path_wd)
# Save dataset so SNN toolbox can find it.
np.savez_compressed(os.path.join(path_wd, 'x_test'), x_val)
np.savez_compressed(os.path.join(path_wd, 'y_test'), y_val)
# SNN toolbox will not do any training, but we save a subset of the training
# set so the toolbox can use it when normalizing the network parameters.
np.savez_compressed(os.path.join(path_wd, 'x_norm'), x_train[::10])

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


25000 Training sequences
25000 Validation sequences


In [22]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, 2)
y_train

array([[[1., 0.],
        [0., 1.]],

       [[0., 1.],
        [1., 0.]],

       [[0., 1.],
        [1., 0.]],

       ...,

       [[0., 1.],
        [1., 0.]],

       [[1., 0.],
        [0., 1.]],

       [[0., 1.],
        [1., 0.]]], dtype=float32)

In [6]:
class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(ScaleLayer, self).__init__()

    def call(self, inputs):
        scale = inputs[1]
        return inputs[0] / scale

    
class MatMulLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MatMulLayer, self).__init__()

    def call(self, inputs):
        return tf.matmul(inputs[0], inputs[1])

    
class MatMulLayerTranspose(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MatMulLayerTranspose, self).__init__()

    def call(self, inputs):
        return tf.matmul(inputs[0], inputs[1], transpose_b=True)

In [17]:
inputs = tf.keras.layers.Input(shape=(maxlen,))
# positions = tf.range(start=0, limit=maxlen, delta=1)
# positions = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)(positions)
x = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)
# # x = x + positions
# positions = tf.expand_dims(positions, axis=0)
# x = tf.keras.layers.Add()([x, positions])

# x = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)(inputs)
# x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
# -------------- TRANSFORMER BLOCK -----------------

dv = 25
nv = -1

v2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)
q2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)
k2 = tf.keras.layers.Dense(embed_dim, activation="relu")(x)

v = tf.keras.layers.Reshape([embed_dim, nv, dv])(v2)
q = tf.keras.layers.Reshape([embed_dim, nv, dv])(q2)
k = tf.keras.layers.Reshape([embed_dim, nv, dv])(k2)

# softmax(q*k^T/sqrt(dv))
att = MatMulLayerTranspose()([q, k])
att = ScaleLayer()([att, np.sqrt(dv)])
att = tf.keras.layers.Softmax(axis=-1)(att)
# softmax(q*k^T/sqrt(dv))*v
out = MatMulLayer()([att, v])

out = tf.keras.layers.Reshape([embed_dim, maxlen, 1])(out)
x = tf.keras.layers.Reshape([embed_dim, maxlen, 1])(x)
add = tf.keras.layers.Add()([out, x])
# add = tf.add(out, x)

x = tf.keras.layers.Dense(embed_dim, activation="relu")(add)
x = tf.keras.layers.Dense(embed_dim)(x)
# --------------------------------------------------

# x = tf.keras.layers.Reshape([1, embed_dim, embed_dim])(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
# x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(20, activation="relu")(x)
# x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(2, activation="softmax")(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [18]:
model.compile("adam", "categorical_crossentropy", metrics=["accuracy"])
history = model.fit(x_train, y_train, batch_size=64, epochs=1, validation_data=(x_val, y_val))

ValueError: in user code:

    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/viktor/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 2) are incompatible


In [137]:
model.get_weights()[3]

array([[-0.05518945, -0.2264855 , -0.09685036, ..., -0.33663788,
         0.10324729, -0.03830882],
       [ 0.20905049, -0.11296735,  0.07060945, ...,  0.2551905 ,
         0.06899796, -0.26309937],
       [-0.23355845,  0.0932614 ,  0.36705574, ..., -0.23375478,
        -0.14695543, -0.03552597],
       ...,
       [-0.20811115,  0.08293296, -0.14428365, ...,  0.22327909,
        -0.28369355,  0.16426443],
       [ 0.2841472 ,  0.10508828, -0.08549052, ...,  0.2911871 ,
        -0.25199428,  0.3607106 ],
       [-0.28700575, -0.24497484,  0.00314723, ..., -0.1291118 ,
        -0.08187493,  0.18874191]], dtype=float32)

In [13]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 200)]        0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 200, 32)      640000      input_4[0][0]                    
__________________________________________________________________________________________________
dense_18 (Dense)                (None, 200, 32)      1056        embedding_3[0][0]                
__________________________________________________________________________________________________
dense_19 (Dense)                (None, 200, 32)      1056        embedding_3[0][0]                
____________________________________________________________________________________________

In [133]:
import os
from tensorflow import keras
keras.models.save_model(
    model, 
    os.path.join("/home/viktor/PycharmProjects/guided_research/transformer-to-snn-conversion", 
    "mnist_transformer" + '.h5')
)

In [134]:
reconstructed_model = keras.models.load_model(
    os.path.join("/home/viktor/PycharmProjects/guided_research/transformer-to-snn-conversion", 
    "mnist_transformer" + '.h5'), custom_objects={'ScaleLayer': ScaleLayer, 'MatMulLayer': MatMulLayer, 
                                                  'MatMulLayerTranspose': MatMulLayerTranspose})