In [9]:
import tensorflow as tf
from tensorflow.keras import layers
# -----------------------------
# Positional Encoding Layer
# -----------------------------

class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model):
        super().__init__()
        self.pos_encoding = self.positional_encoding(max_len, d_model)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / tf.pow(10000.0, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return pos * angle_rates

    def positional_encoding(self, max_len, d_model):
        angle_rads = self.get_angles(
            pos=tf.range(max_len)[:, tf.newaxis],
            i=tf.range(d_model)[tf.newaxis, :],
            d_model=d_model
        )

        # apply sin to even indices, cos to odd indices
        angle_rads = tf.where(
            tf.range(d_model)[tf.newaxis, :] % 2 == 0,
            tf.sin(angle_rads),
            tf.cos(angle_rads)
        )

        return angle_rads[tf.newaxis, ...]

    def call(self, x):
        return x + tf.cast(self.pos_encoding[:, :tf.shape(x)[1], :], x.dtype)



In [10]:
import tensorflow as tf
from tensorflow.keras import layers, Model

# -----------------------------
# Transformer Encoder Block
# -----------------------------
def transformer_encoder_block(embed_dim, num_heads, ff_dim, dropout=0.1):
    inputs = layers.Input(shape=(None, embed_dim))

    # Layer Norm + Multi-Head Self Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    attention_output = layers.MultiHeadAttention(
        num_heads=num_heads,
        key_dim=embed_dim // num_heads,
        dropout=dropout
    )(x, x)
    x = layers.Add()([inputs, attention_output])  # Residual connection

    # Feed Forward Network
    ff = layers.LayerNormalization(epsilon=1e-6)(x)
    ff = layers.Dense(ff_dim, activation="relu")(ff)
    ff = layers.Dense(embed_dim)(ff)
    ff = layers.Dropout(dropout)(ff)
    x = layers.Add()([x, ff])  # Residual connection

    return Model(inputs, x, name="TransformerEncoderBlock")


In [11]:

import tensorflow as tf
from tensorflow.keras import layers, Model

# -----------------------------
# Build the Transformer Model
# -----------------------------
def build_transformer(
        max_len=100,
        vocab_size=10000,
        embed_dim=64,
        num_heads=4,
        ff_dim=128,
        num_layers=3
    ):
    inputs = layers.Input(shape=(None,), dtype=tf.int32)

    # Token Embedding
    x = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)

    # Positional Encoding
    x = PositionalEncoding(max_len, embed_dim)(x)

    # Transformer Encoder Blocks
    for _ in range(num_layers):
        x = transformer_encoder_block(embed_dim, num_heads, ff_dim)(x)

    # Classification head
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs, name="Transformer_3Layer")
    return model


In [12]:
def get_angles(self, pos, i, d_model):
    i = tf.cast(i, tf.float32)
    angle_rates = 1 / tf.pow(
        10000.0, (2 * (i // 2)) / tf.cast(d_model, tf.float32)
    )
    return pos * angle_rates
