# Positional Encoding

> Implement Transformer's Encoder Layer from scratch

In [2]:
#| default_exp transformer.positional_encoding

In [3]:
#| hide
from nbdev.showdoc import *

In [4]:
#| hide
import nbdev; nbdev.nbdev_export()

In [5]:
#| export
import torch
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


### Positional Encoding
$P E_{p o s, 2 i}=\sin \left(\frac{p o s}{10000^{\frac{2 i}{d}}}\right)$
$P E_{p o s, 2 i+1}=\cos \left(\frac{p o s}{10000^{\frac{2 i}{d}}}\right)$
- `pos`: the position of a word in a sequence
- `i`: is the index in the word encoding vector

In [16]:
#| export
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_seq_len : float = 2000, dropout: float = 0.3):
        super().__init__()
        self.d_model = d_model
        self.dropout = nn.Dropout(dropout)
        
        self.pe = torch.zeros(max_seq_len, d_model)
        pos = torch.arange(0, max_seq_len).unsqueeze(1).float()
        
        two_i = torch.arange(0, d_model, step=2).float()
        div_term = torch.pow(1000, (two_i/torch.tensor([d_model]))).float()
        
        pe[:, 0::2] = torch.sin(pos/div_term)
        pe[:, 1::2] = torch.cos(pos/div_term)
        
        # add one dim for batch_size
        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x: torch.Tensor):
        # x is text embedding
        # shape(x) = [batch_size x seq_len x d_model]
        seq_len = x.shape[1]
        
        # extract the position for seq_len
        pe = self.pe[:, :seq_len].detach()
        
        x = x.add(pe)
        
        return self.dropout(x)

In [17]:
position = PositionalEncoding(8)
position.pe.shape
position.pe

UnboundLocalError: local variable 'pe' referenced before assignment

In [18]:
tokens = torch.randn(10, 8)

In [15]:
position(tokens)

RuntimeError: The size of tensor a (10) must match the size of tensor b (8) at non-singleton dimension 1