In [4]:
import numpy as np
import tensorflow as tf
import pandas as pd
import tensorflow_text as tf_txt 
from typing import List, Dict

In [54]:
MAX_VOCAB_SIZE = 10000
EMBEDDING_DIM = 200
DFF = 512
D_MODEL = 256
MAX_SEQ_LEN = 10

In [25]:
def get_angles(pos, i, dims):
  angle_rates = 1 / (10000 ** ((2 * (i//2)) / dims))
  return pos * angle_rates

In [26]:
def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)

  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

  pos_encoding = angle_rads[np.newaxis, ...]

  return tf.cast(pos_encoding, dtype=tf.float32)

In [27]:
output = positional_encoding(10, 200)
output.shape

TensorShape([1, 10, 200])

In [2]:
def lookahead_mask(seq):
    return 1 - tf.linalg.band_part(tf.ones((seq, seq)), -1, 0)

In [30]:
output = lookahead_mask(4)
output.shape

TensorShape([4, 4])

In [60]:
class Preprocessor:
    def __init__(self, vocab_size, seq_len=10):
        self.seq_len = seq_len
        self.vocab: List[str] = None
        self.word_ids: Dict[str, int] = None
        self.rev_word_ids:  Dict[int, str] = None
        self.vocab_size = None
        self.tokenizer = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=vocab_size,
                                        output_sequence_length=self.seq_len, standardize=self.custom_standardize
                                        )

    def __call__(self, inputs):
        encoded_seq = tf_txt.normalize_utf8(inputs, "NFKD")
        tokenized_seq = self.tokenizer(self.add_extra(inputs))
        return tokenized_seq
    
    @staticmethod
    def add_extra(inputs):
        inputs = tf.constant(inputs)
        return [["[SURU] "]]+inputs+[[" [KHATAM]"]]
    
    def custom_standardize(self, text):
        return text

    
    def build_vocab(self, inputs):
        self.tokenizer.adapt(self.add_extra(inputs))
        self.vocab = self.tokenizer.get_vocabulary()
        self.vocab_size = len(self.vocab)
        self.build_dictionary(self.vocab)
        return self.vocab

    def build_dictionary(self, vocab_list: List[str]):
        word_ids = dict()
        rev_word_ids = dict()
        for i, item in enumerate(vocab_list):
            word_ids[item] = i
            rev_word_ids[i] = item
        self.word_ids = word_ids
        self.rev_word_ids = rev_word_ids


In [82]:
preprocessor = Preprocessor(vocab_size=100, seq_len=10)
inputs = [["जैसा "], ["i am fine, what about you. ? "]]
vocab = preprocessor.build_vocab(inputs)
print(vocab)
print(preprocessor(inputs))

['', '[UNK]', '[SURU]', '[KHATAM]', 'जैसा', 'you.', 'what', 'i', 'fine,', 'am', 'about', '?']
tf.Tensor(
[[ 2  4  3  0  0  0  0  0  0  0]
 [ 2  7  9  8  6 10  5 11  3  0]], shape=(2, 10), dtype=int64)


In [10]:
class FFN(tf.keras.layers.Layer):
  def  __init__(self, d_model, dff):
        super().__init__()
        self.dff = dff
        self.dense1 = tf.keras.layers.Dense(dff, activation="relu")
        self.dense2 = tf.keras.layers.Dense(d_model)

  def call(self, inputs):
        outputs = self.dense1(inputs)
        outputs = self.dense2(outputs)
        return outputs


In [11]:
class Block(tf.keras.layers.Layer):
    def __init__(self, d_model: int, dff: int = 2048, heads: int = 8, rate: int = 0.1):
        super().__init__()

        assert d_model%heads==0
        #parameters
        self.d_model = d_model      # model dims 
        self.dff = dff                           # ffn dense layer units
        self.heads = heads               # number of heads

        #layers
        self.ffn = FFN(d_model, dff)
        self.ln1 = tf.keras.layers.LayerNormalization()
        self.ln2 = tf.keras.layers.LayerNormalization()
        self.wq = tf.keras.layers.Dense(self.d_model)
        self.wv = tf.keras.layers.Dense(self.d_model)
        self.wi = tf.keras.layers.Dense(self.d_model)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=self.heads, key_dim=self.d_model)

    # def build(self, input_shape):
    #     self.mha = tf.keras.layers.MultiHeadAttention(num_heads=self.heads, key_dim=d_model)

    def call(self, inputs, training=False, mask=None):
        q = self.wq(inputs)     #(None, seq_len, d_model)
        v = self.wv(inputs)      #(None, seq_len, d_model)

        # projecting on higher dimension to add with attention_outputs in  ln
        inputs = self.wi(inputs)  #(None, seq_len, d_model)
        attention_outputs = self.mha(query=q, value=v, attention_mask=mask)      # output shape (None, query_len, d_model)
        dropped_attention_outputs = self.dropout1(attention_outputs, training=training)
        outputs = self.ln1(inputs+dropped_attention_outputs)

        ffn_outputs = self.ffn(outputs)     # output shape (None, query_len, d_model)
        dropped_ffn_outputs = self.dropout1(ffn_outputs, training=training)
        outputs = self.ln2(inputs+dropped_ffn_outputs)        # output shape (None, query_len, d_model)
        
        return outputs

In [81]:
layer = Block(d_model=8, dff=256, heads=4)
mask = tf.keras.Input(shape=[4, 4])
source = tf.keras.Input(shape=[4, 100])
outputs = layer(inputs=source, mask=mask)
print(outputs.shape)

(None, 4, 8)


In [83]:
class Poet(tf.keras.models.Model):
    def __init__(self, preprocessor, num_blocks=1, d_model=256, dff=512, heads=8, embedding_dims=100):
        super().__init__()
        self.d_model = d_model
        self.preprocessor = preprocessor
        self.num_blocks = num_blocks
        self.embedding_dims = embedding_dims
        # generating pos encoding now to save time while calling call()(as it is constant for all examples)
        self.pos_encoding = positional_encoding(self.preprocessor.seq_len, self.embedding_dims)
        self.embedding_layer = tf.keras.layers.Embedding(input_dim=self.preprocessor.vocab_size, 
                                            output_dim=self.embedding_dims, mask_zero=True, input_length=self.preprocessor.seq_len
                                            )
        self.blocks = [Block(d_model=self.d_model, dff=dff, heads=heads) for i in range(self.num_blocks)]

        self.final_layer = tf.keras.layers.Dense(self.preprocessor.vocab_size, activation="softmax")


    def call(self, inputs):
        embeddings = self.embedding_layer(inputs)

        # adding positional encoding
        x = embeddings + self.pos_encoding
        
        # generate lookahead mask
        mask = lookahead_mask(self.preprocessor.seq_len)

        # passing rich attention embedding through each block
        for block in self.blocks:
            x = block(x, mask=mask)
        
        outputs = self.final_layer(x)
        
        return outputs

In [89]:
string_inputs = [["जैसा"], ["i am fine, what about you. ? "]]
preprocess_inputs = preprocessor(string_inputs)
print(inputs.shape)
poet = Poet(preprocessor=preprocessor)
outputs = poet.call(preprocess_inputs)
print(outputs.shape)

(2, 10)
(2, 10, 12)


TensorShape([2, 10, 512])