In [27]:
import numpy as np
import math

In [28]:
max_len = 10
n_embed = 64
vocab_size = 100

In [39]:
class Embedding:
    __slots__ = '_n_embed', '_vocab_size', '_proxy', '_vocab'
    def __init__(self, *, n_embed, vocab_size):
        self._n_embed = n_embed
        self._vocab_size = vocab_size
        self._proxy = {}

    def encode(self, vocab):
        self._vocab = vocab
        if len(vocab) != self._vocab_size:
            raise ValueError(f'Mismatching amount of tokens: expected {self._vocab_size}, found {len(vocab)}')
        aux = np.random.randn(self._vocab_size, self._n_embed)
        for i, element in enumerate(vocab):
            self._proxy[element] = aux[i]
        return self

    # Dont really like this, but whatever
    def _tokenize(self, sentence):
        tokens = []
        current_token = ""

        for char in sentence:
            current_token += char

            for token in self._vocab:
                if current_token.endswith(token):
                    tokens.append(current_token[:-len(token)])
                    tokens.append(token)
                    current_token = ""

        if current_token:
            tokens.append(current_token)

        return list(filter(None, tokens))

    def __call__(self, item: str):
        return (self._proxy[element] for element in self._tokenize(item))

In [40]:
embedding = Embedding(n_embed=n_embed, vocab_size=vocab_size)

In [41]:
embedding.encode(list(elem for elem in r'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÉÍÓÚabcdefghijklmnopqrstuvwxyzáàéíóú0123456789%!&()"\'/\\.,-=_+ :;{}[]<>'))

<__main__.Embedding at 0x7b788a938440>

In [42]:
embedding(['c', 'a'])

[array([-1.01954795e+00,  1.71984344e+00, -1.62456924e+00, -7.54220092e-01,
        -5.78118374e-01,  7.59163688e-01, -1.28430170e+00, -2.15455925e-01,
         1.54189251e+00,  1.64011301e+00,  1.67926912e-02,  4.54669136e-01,
        -7.23376809e-01,  1.24843070e+00, -5.55384705e-01,  3.48785027e-01,
        -5.96646043e-01, -9.25733339e-01, -1.31328725e-01,  4.47514207e-01,
         1.57619787e-01, -1.71094674e+00,  9.31087134e-04, -2.06651634e+00,
         2.21277483e+00, -4.22706068e-01,  1.80693008e-01,  1.10720845e+00,
         4.08925129e-01,  1.99574140e-02,  4.54759715e-01, -8.41287058e-02,
         1.38895836e+00, -8.37352886e-02,  1.09673196e+00,  5.69760700e-01,
        -4.51186613e-01,  9.47642772e-01, -1.06016111e+00,  4.20843095e-01,
        -4.66375584e-01,  9.35814971e-01, -1.65224759e-01, -1.22507835e+00,
         6.97088728e-01,  3.60726686e-01, -1.74727581e+00,  1.67500620e+00,
        -2.88599079e-01,  3.53110597e-01, -9.85757428e-01,  9.51532929e-01,
        -1.2

In [43]:
class PositionalEncoding:
    def __init__(self, d_model: int, max_len: int = 5000):
        position = np.arange(max_len).unsqueeze(1)
        div_term = np.exp(np.arange(0, d_model, 2)
                             * (-math.log(10000.0) / d_model))
        pe = np.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = np.sin(position * div_term)
        pe[:, 0, 1::2] = np.cos(position * div_term)

    def forward(self, x: np.ndarray) -> np.ndarray:
        x = x + self.pe[:x.size(0)]
        return x

In [48]:
class Linear:
    def __init__(self, *dim):
        self._matrix = np.random.randn(*dim)
        self._dim = dim

    @property
    def matrix(self):
        return self._matrix
    
    def forward(self, x):
        return np.dot(self._matrix, x)
    
    __call__ = forward


In [None]:
class ReLU:
    def __init__(self):
        pass

    def forward(self, x):
        return np.maximum(x, 0)

In [None]:
class LayerNorm:
    def __init__(self, dim):
        self._gamma = np.random.randn(dim)
        self._beta = np.random.randn(dim)
        self._dim = dim

    def forward(self, x):
        mean = np.mean(x, axis=0)
        std = np.std(x, axis=0)
        return (x - mean) / std

In [None]:
class FeedForward:
    pass

In [49]:
class SoftMax:

    __slots__ = '_dim',
    def __init__(self, *, dim=-1):
        self._dim = dim
    def __call__(self, x, dim=None):
        if dim is None:
            dim = self._dim
        ex = np.exp(x-np.max(x))
        return ex / ex.sum(axis=dim).reshape(dim,1)

In [51]:
class Head:

    __slots__ = '_n_dim', '_q', '_k', '_v', '_softmax'

    def __init__(self, n_dim, head_size):
        self._n_dim = n_dim
        self._q = Linear(n_dim, head_size)
        self._k = Linear(n_dim, head_size)
        self._v = Linear(n_dim, head_size)
        self._softmax = SoftMax()

    def forward(self, x, y, z, mask=None):
        q = self._q.matrix @ x
        k = self._k.matrix @ y
        v = self._v.matrix @ z
        logits = np.dot(q, k.T)
        logits = logits + mask if mask is not None else logits
        depth = k.shape[-1]
        logits /= np.sqrt(depth)
        weigths = self._softmax(logits)
        return np.dot(weigths, v)

    __call__ = forward

In [52]:
class MultiHeadAttention:
    def __init__(self, n_heads, head_dim, n_embed) -> None:
        self._n_heads = n_heads
        self._head_dim = head_dim
        self._n_embed = n_embed
        self._heads = [Head(n_embed, head_dim) for _ in range(n_heads)]
    
    def forward(self, x):
        return np.concatenate([head(x) for head in self._heads], axis=-1)
    
    __call__ = forward

In [None]:
class Encoder:
    pass

In [None]:
class Decoder:
    pass

In [None]:
class EncoderBlock:
    pass

In [None]:
class DecoderBlock:
    pass

In [None]:
class SimpleTransformer:
    def __init__(self, n_heads, head_dim, n_embed, vocab_size, max_len) -> None:
        self._n_heads = n_heads
        self._head_dim = head_dim
        self._n_embed = n_embed
        self._vocab_size = vocab_size
        self._max_len = max_len
        self._embedding = Embedding(n_embed=n_embed, vocab_size=vocab_size)
        self._transformer = _SimpleTransformer(n_heads, head_dim, n_embed)
    
    def forward(self, x):
        return self._transformer(self._embedding[list(x)])

    __call__ = forward