In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import tensorflow_text as tf_txt 
from typing import List, Dict

In [7]:
VOCAB_SIZE = 10000
EMBEDDING_DIM = 200
DFF = 512
D_MODEL = 256
MAX_SEQ_LEN = 10

In [18]:
class FFN(tf.keras.layers.Layer):
  def  __init__(self, d_model, dff):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(dff, activation="relu")
        self.dense2 = tf.keras.layers.Dense(d_model)

  def call(self, inputs):
        outputs = self.dense1(inputs)
        outputs = self.dense2(outputs)
        return outputs


In [54]:
class Block(tf.keras.layers.Layer):
    def __init__(self, d_model: int, dff: int = 2048, heads: int = 8):
        super().__init__()

        #parameters
        self.d_model = d_model      # model dims 
        self.dff = dff                           # ffn dense layer units
        self.heads = heads               # number of heads

        #layers
        self.ffn = FFN(d_model, dff)
        self.ln1 = tf.keras.layers.LayerNormalization()
        self.ln2 = tf.keras.layers.LayerNormalization()
        self.wq = tf.keras.layers.Dense(units=self.d_model)
        self.wv = tf.keras.layers.Dense(units=self.d_model)
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=self.heads, key_dim=self.d_model)

    # def build(self, input_shape):
    #     self.mha = tf.keras.layers.MultiHeadAttention(num_heads=self.heads, key_dim=d_model)

    def call(self, inputs, mask=None):
        q = self.wq(inputs)     #(None, seq_len, d_model)
        v = self.wv(inputs)      #(None, seq_len, d_model)
        print(v.shape)
        attention_outputs = self.mha(query=q, value=v, attention_mask=mask)       # output shape (None, query_len, d_model)
        outputs = self.ln1(inputs+attention_outputs)
        ffn_outputs = self.ffn(outputs)     # output shape (None, query_len, d_model)
        outputs = self.ln2(inputs+ffn_outputs)        # output shape (None, query_len, d_model)
        return outputs

In [55]:
layer = Block(d_model=200, dff=2048, heads=8)
mask = tf.keras.Input(shape=[4, 4])
source = tf.keras.Input(shape=[4, 16])
outputs = layer(inputs=source, mask=mask)
print(outputs.shape)

(None, 4, 200)


ValueError: in user code:

    C:\Users\tusha\AppData\Local\Temp/ipykernel_15972/2617326371.py:26 call  *
        outputs = self.ln1(inputs+attention_outputs)
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\ops\math_ops.py:1250 binary_op_wrapper
        raise e
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\ops\math_ops.py:1234 binary_op_wrapper
        return func(x, y, name=name)
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\ops\math_ops.py:1565 _add_dispatch
        return gen_math_ops.add_v2(x, y, name=name)
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\ops\gen_math_ops.py:531 add_v2
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\op_def_library.py:748 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\func_graph.py:599 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\ops.py:3557 _create_op_internal
        ret = Operation(
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\ops.py:2041 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    C:\Users\tusha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\ops.py:1883 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 16 and 200 for '{{node block_19/add}} = AddV2[T=DT_FLOAT](Placeholder, block_19/multi_head_attention_14/attention_output/add)' with input shapes: [?,4,16], [?,4,200].


In [31]:
class Preprocessor:
    def __init__(self, vocab_size, seq_len=10):
        self.vocab: List[str] = None
        self.word_ids: Dict[str, int] = None
        self.rev_word_ids:  Dict[int, str] = None
        self.tokenizer = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=vocab_size,
                                        output_sequence_length=seq_len, standardize=self.custom_standardize
                                        )

    def __call__(self, inputs):
        encoded_seq = tf_txt.normalize_utf8(inputs, "NFKD")
        tokenized_seq = self.tokenizer(self.add_extra(inputs))
        return tokenized_seq
    
    @staticmethod
    def add_extra(inputs):
        inputs = tf.constant(inputs)
        return [["[SURU] "]]+inputs+[[" [KHATAM]"]]
    
    def custom_standardize(self, text):
        return text

    
    def build_vocab(self, inputs):
        self.tokenizer.adapt(self.add_extra(inputs))
        self.vocab = self.tokenizer.get_vocabulary()
        self.build_dictionary(self.vocab)
        return self.vocab

    def build_dictionary(self, vocab_list: List[str]):
        word_ids = dict()
        rev_word_ids = dict()
        for i, item in enumerate(vocab_list):
            word_ids[item] = i
            rev_word_ids[i] = item
        self.word_ids = word_ids
        self.rev_word_ids = rev_word_ids


In [10]:
vocab_size = 20
preprocessor = Preprocessor(vocab_size=vocab_size, seq_len=MAX_SEQ_LEN)
inputs = [["जैसा "], ["i am fine, what about you. ? "]]
vocab = preprocessor.build_vocab(inputs)
print(vocab)
print(preprocessor(inputs))

['', '[UNK]', '[SURU]', '[KHATAM]', 'जैसा', 'you.', 'what', 'i', 'fine,', 'am', 'about', '?']
tf.Tensor(
[[ 2  4  3  0  0  0  0  0  0  0]
 [ 2  7  9  8  6 10  5 11  3  0]], shape=(2, 10), dtype=int64)


In [47]:
class Poet(tf.keras.models.Model):
    def __init__(self, num_blocks=1, seq_len=10, d_model=512, dff=512, heads=8):
        super().__init__()
        self.d_model = d_model
        self.num_blocks = num_blocks
        self.embedding_layer = tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, 
                                            output_dim=EMBEDDING_DIM, mask_zero=True, input_length=seq_len
                                            )
        self.blocks = [Block(d_model=self.d_model, dff=dff, heads=heads) for i in range(self.num_blocks)]


    def call(self, inputs):
        embeddings = self.embedding_layer(inputs)
        outputs = embeddings
        
        # generate lookahead mask


        for block in self.blocks:
            outputs = block(outputs, mask=None)
        
        return outputs
        

In [48]:
poet = Poet()
inputs = np.array([[ 2,  4,  3,  0,  0,  0,  0,  0,  0,  0], [ 2,  7 , 9  ,8 , 6 ,10,  5 ,11  ,3  ,0]])
print(inputs.shape)
outputs = poet.call(inputs)

(2, 10)


InvalidArgumentError: Incompatible shapes: [2,10,200] vs. [2,10,512] [Op:AddV2]

In [21]:
outputs.shape

NameError: name 'outputs' is not defined