# Positional Encoding

> Implement Transformer's Encoder Layer from scratch

In [None]:
#| default_exp transformer.positional_encoding

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
#| export
import torch
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


### Positional Encoding
$P E_{p o s, 2 i}=\sin \left(\frac{p o s}{10000^{\frac{2 i}{d}}}\right)$
$P E_{p o s, 2 i+1}=\cos \left(\frac{p o s}{10000^{\frac{2 i}{d}}}\right)$
- `pos`: the position of a word in a sequence
- `i`: is the index in the word encoding vector

In [None]:
#| export
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_seq_len : float = 2000, dropout: float = 0.3):
        super().__init__()
        self.d_model = d_model
        self.dropout = nn.Dropout(dropout)
        
        pe = torch.zeros(max_seq_len, d_model)
        pos = torch.arange(0, max_seq_len).unsqueeze(1).float()
        
        two_i = torch.arange(0, d_model, step=2).float()
        div_term = torch.pow(1000, (two_i/torch.tensor([d_model]))).float()
        
        pe[:, 0::2] = torch.sin(pos/div_term)
        pe[:, 1::2] = torch.cos(pos/div_term)
        
        # add one dim for batch_size
        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x: torch.Tensor):
        # x is text embedding
        # shape(x) = [batch_size x seq_len x d_model]
        seq_len = x.shape[1]
        
        # extract the position for seq_len
        pe = self.pe[:, :seq_len].detach()
        
        x = x.add(pe)
        
        return self.dropout(x)

In [None]:
position = PositionalEncoding(8)
position.pe.shape
position.pe

tensor([[[ 0.0000,  1.0000,  0.0000,  ...,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403,  0.1769,  ...,  0.9995,  0.0056,  1.0000],
         [ 0.9093, -0.4161,  0.3482,  ...,  0.9980,  0.0112,  0.9999],
         ...,
         [-0.8689,  0.4950, -0.1221,  ...,  0.9496, -0.9727,  0.2322],
         [-0.0529,  0.9986, -0.2958,  ...,  0.9392, -0.9713,  0.2377],
         [ 0.8117,  0.5841, -0.4601,  ...,  0.9279, -0.9700,  0.2432]]])

In [None]:
tokens = torch.randn(10, 8)

In [None]:
position(tokens)

RuntimeError: The size of tensor a (10) must match the size of tensor b (8) at non-singleton dimension 1