## 1. 1d absolute sincos constant embedding
Transformer 论文

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
n_pos = 4
dim = 4

In [3]:
def create_1d_absolute_sincos_embedding(n_pos_vec, dim):
    # n_pos_vec: torch.arange(n_pos)
    assert dim % 2 == 0, 'wrong dimension!'
    positional_embedding = torch.zeros(n_pos_vec.numel(), dim, dtype=torch.float)

    omega = torch.arange(dim // 2, dtype=torch.float)
    omega /= dim / 2.
    omega = 1. / (10000 ** omega)
    out = n_pos_vec[:, None] @ omega[None, :]

    emb_sin = torch.sin(out)
    emb_cos = torch.cos(out)

    positional_embedding[:, 0::2] = emb_sin
    positional_embedding[:, 1::2] = emb_cos
    return positional_embedding

In [4]:
n_pos_vec = torch.arange(n_pos, dtype=torch.float)
pe = create_1d_absolute_sincos_embedding(n_pos_vec, dim)
pe

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0100,  0.9999],
        [ 0.9093, -0.4161,  0.0200,  0.9998],
        [ 0.1411, -0.9900,  0.0300,  0.9996]])

## 2. 1d absolute trainable embedding
Vision Transformer

In [5]:
def create_1d_absolute_trainable_embedding(n_pos_vec, dim):
    position_embedding = nn.Embedding(n_pos_vec.numel(), dim)
    nn.init.constant_(position_embedding.weight, 0.)

    return position_embedding

## 3. 2d relative bias trainable embedding
Swin Transformer

In [6]:
def create_2d_relative_bias_trainable_embedding(n_head, height, width):
    # width: 5, [0, 1, 2, 3, 4], bias=[-width+1, width-1], 2 * width - 1
    # height: 5, [0, 1, 2, 3, 4], bias=[-height+1, height-1], 2 * height - 1
    positional_embedding = nn.Embedding((2 * width - 1) * (2 * height - 1), n_head)
    nn.init.constant_(positional_embedding.weight, 0.)

    def get_relative_position_index(height, width):

        coords = torch.stack(torch.meshgrid(torch.arange(height), torch.arange(width)))  # [2, height, width]
        coords_flatten = torch.flatten(coords, 1)  # [2, height * width]
        relative_coords_bias = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, height * width, height * width]

        # 将bias转换到 >= 0
        relative_coords_bias[0, :, :] += height - 1
        relative_coords_bias[1, :, :] += width - 1

        # A: 2d, B: 1d, B[i * cols + j] = A[i][j]  将二阶张量赋值到一阶
        relative_coords_bias[0, :, :] *= relative_coords_bias[1, :, :].max() + 1
        return relative_coords_bias.sum(0)  # [height * width, height * width]

    relative_position_bias = get_relative_position_index(height, width)
    # [height * width, height * width, n_head]
    bias_embedding = positional_embedding(torch.flatten(relative_position_bias)).reshape(height * width, height * width, n_head)
    bias_embedding = bias_embedding.permute(2, 0, 1).unsqueeze(0)  #[1, n_head, h * w, h * w]
    return bias_embedding

In [7]:
create_2d_relative_bias_trainable_embedding(4, 4, 4)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          

## 4. 2d absolute constant sincos embedding
Masked AutoEncoder

In [8]:
def create_2d_absolute_sincos_embeddings(height, width, dim):
    assert dim % 4 == 0, 'wrong dimensions!'

    positional_embedding = torch.zeros(height * width, dim)
    coords = torch.stack(torch.meshgrid(torch.arange(height, dtype=torch.float), torch.arange(width, dtype=torch.float)))

    height_embedding = create_1d_absolute_sincos_embedding(torch.flatten(coords[0]), dim // 2)  # [h * w, dim / 2]
    width_embedding = create_1d_absolute_sincos_embedding(torch.flatten(coords[1]), dim // 2)  # [h * w, dim / 2]

    positional_embedding[:, :dim // 2] = height_embedding
    positional_embedding[:, dim // 2:] = width_embedding

    return positional_embedding

In [9]:
create_2d_absolute_sincos_embeddings(4, 4, 4)

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.0000,  1.0000,  0.8415,  0.5403],
        [ 0.0000,  1.0000,  0.9093, -0.4161],
        [ 0.0000,  1.0000,  0.1411, -0.9900],
        [ 0.8415,  0.5403,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.8415,  0.5403],
        [ 0.8415,  0.5403,  0.9093, -0.4161],
        [ 0.8415,  0.5403,  0.1411, -0.9900],
        [ 0.9093, -0.4161,  0.0000,  1.0000],
        [ 0.9093, -0.4161,  0.8415,  0.5403],
        [ 0.9093, -0.4161,  0.9093, -0.4161],
        [ 0.9093, -0.4161,  0.1411, -0.9900],
        [ 0.1411, -0.9900,  0.0000,  1.0000],
        [ 0.1411, -0.9900,  0.8415,  0.5403],
        [ 0.1411, -0.9900,  0.9093, -0.4161],
        [ 0.1411, -0.9900,  0.1411, -0.9900]])