In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import math


# Token Embedding

Directly use the class in pytorch

In [2]:

class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, d_model):
        super(TokenEmbedding, self).__init__(vocab_size, d_model, padding_idx=1)

# Position Embedding 
Odd and even numbers are coded differently.

In [3]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, maxlen, device):
        super(PositionalEmbedding, self).__init__()
        self.encoding = torch.zeros(maxlen, d_model, device=device) # initialize the encoding
        self.encoding.requires_grad_(False) # This encoding does not require a gradient

        # generate the position (The most important one in positional embedding!!!):
        pos = torch.arange(0, maxlen, device=device) # generate a series from 0 to maxlen-1 [0, 1, ... , maxlen-1]
        pos = pos.float().unsqueeze(1) # add a dimension [[0.], [1.], ... , [maxlen-1]]
        _2i = torch.arange(0, d_model, 2, device=device) # generate 2i: [0, 2, 4, ...]

        self.encoding[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
        self.encoding[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))

    def forward(self, x):
        # there is no need to RETURN all the arguments, so make a cut with seq_len
        seq_len = x.shape[1]
        return self.encoding[:seq_len, :]

In [9]:
test = torch.arange(0, 10, 2)

In [10]:
test

tensor([0, 2, 4, 6, 8])

In [6]:
test.float().unsqueeze(1)

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])