In [27]:
import paddle
import torch
import math
import numpy as np

paddle.set_device('cpu')

CPUPlace

import math

In [28]:
class PPositionalEncoding(paddle.nn.Layer):
    def __init__(self,
                 d_model: int,
                 dropout_rate: float,
                 max_len: int=5000,
                 reverse: bool=False):
        """Positional encoding.
            PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
            PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
        Args:
            d_model (int): embedding dim.
            dropout_rate (float): dropout rate.
            max_len (int, optional): maximum input length. Defaults to 5000.
            reverse (bool, optional): Not used. Defaults to False.
        """
        super().__init__()
        self.d_model = d_model
        self.max_len = max_len
        self.xscale = math.sqrt(self.d_model)
        self.dropout = paddle.nn.Dropout(p=dropout_rate)
        self.pe = paddle.zeros([self.max_len, self.d_model])  #[T,D]

        position = np.arange(
            0, self.max_len, dtype=np.float32).reshape(-1,1)  #[T, 1]
        div_term = np.exp(
            np.arange(0, self.d_model, 2, dtype=np.float32) *
            -(math.log(10000.0) / self.d_model))

        self.pe[:, 0::2] = np.sin(position * div_term)
        self.pe[:, 1::2] = np.cos(position * div_term)
        self.pe = self.pe.unsqueeze(0)  #[1, T, D]

    def forward(self, x: paddle.Tensor,
                offset: int=0):
        """Add positional encoding.
        Args:
            x (paddle.Tensor): Input. Its shape is (batch, time, ...)
            offset (int): position offset
        Returns:
            paddle.Tensor: Encoded tensor. Its shape is (batch, time, ...)
            paddle.Tensor: for compatibility to RelPositionalEncoding, (batch=1, time, ...)
        """
        T = x.shape[1]
        assert offset + x.shape[1] < self.max_len
        pos_emb = self.pe[:, offset:offset + T]
        x = x * self.xscale + pos_emb
        return x, self.dropout(pos_emb)

In [29]:

class PositionalEncoding(torch.nn.Module):

    def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False):
        """Construct an PositionalEncoding object."""
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        self.reverse = reverse
        self.xscale = math.sqrt(self.d_model)
        self.dropout = torch.nn.Dropout(p=dropout_rate)
        self.pe = None
        self.extend_pe(torch.tensor(0.0).expand(1, max_len))

    def extend_pe(self, x):
        """Reset the positional encodings."""
        if self.pe is not None:
            if self.pe.size(1) >= x.size(1):
                if self.pe.dtype != x.dtype or self.pe.device != x.device:
                    self.pe = self.pe.to(dtype=x.dtype, device=x.device)
                return
        pe = torch.zeros(x.size(1), self.d_model)
        if self.reverse:
            position = torch.arange(
                x.size(1) - 1, -1, -1.0, dtype=torch.float32
            ).unsqueeze(1)
        else:
            position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, self.d_model, 2, dtype=torch.float32)
            * -(math.log(10000.0) / self.d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.pe = pe.to(device=x.device, dtype=x.dtype)

    def forward(self, x: torch.Tensor):
        """Add positional encoding.

        Args:
            x (torch.Tensor): Input tensor (batch, time, `*`).

        Returns:
            torch.Tensor: Encoded tensor (batch, time, `*`).

        """
        self.extend_pe(x)
        x = x * self.xscale + self.pe[:, : x.size(1)]
        return x


In [30]:
p = PPositionalEncoding(512, 0., 5000, False)
t = PositionalEncoding(512, 0., 5000, False)

In [31]:
print(p)
print(t)

PPositionalEncoding(
  (dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
)
PositionalEncoding(
  (dropout): Dropout(p=0.0, inplace=False)
)


In [32]:
p.eval()
t.eval()

PositionalEncoding(
  (dropout): Dropout(p=0.0, inplace=False)
)

In [33]:
x = np.random.randn(1, 524, 512).astype(np.float32)
px = paddle.to_tensor(x)
tx = torch.as_tensor(x)
print(px)
print(tx)

Tensor(shape=[1, 524, 512], dtype=float32, place=CPUPlace, stop_gradient=True,
       [[[ 1.71176422,  0.12073142,  0.97230834, ...,  0.53358287, -0.87280411, -0.32130694],
         [ 1.15618110,  1.56292164, -1.16467249, ...,  0.11234190, -0.94012439,  0.37479785],
         [-1.26758003,  1.14918125,  0.03674709, ...,  0.98526299,  0.52364922,  0.68854672],
         ...,
         [-0.49465108, -0.21408804, -1.06921721, ...,  1.38299918,  0.88839310, -0.09368137],
         [-1.20485342,  1.27089787,  0.79920202, ...,  0.06341895, -0.07299408, -0.47029728],
         [-0.47220495,  0.82722157,  0.66641206, ..., -1.73230171, -1.52152562, -0.45902300]]])
tensor([[[ 1.7118,  0.1207,  0.9723,  ...,  0.5336, -0.8728, -0.3213],
         [ 1.1562,  1.5629, -1.1647,  ...,  0.1123, -0.9401,  0.3748],
         [-1.2676,  1.1492,  0.0367,  ...,  0.9853,  0.5236,  0.6885],
         ...,
         [-0.4947, -0.2141, -1.0692,  ...,  1.3830,  0.8884, -0.0937],
         [-1.2049,  1.2709,  0.7992,  ..., 

In [34]:
po = p(px)
to = t(tx)
print(np.allclose(po[0].numpy(), to.detach().numpy(), atol=1e-5, rtol=0))

False


In [35]:
# print(po[0].numpy())
# print('')
# print(to.detach().numpy())

In [36]:
print(np.allclose(p.pe, t.pe, atol=1e-4, rtol=0))

False


In [37]:
print(p.pe.shape)
print(t.pe.shape)
print(p.xscale)
print(t.xscale)

[1, 5000, 512]
torch.Size([1, 5000, 512])
22.627416997969522
22.627416997969522


In [38]:
print(p.pe.numpy())

[[[ 0.0000000e+00  1.0000000e+00  0.0000000e+00 ...  1.0000000e+00
    0.0000000e+00  1.0000000e+00]
  [ 8.4147102e-01  5.4030228e-01  8.2185620e-01 ...  1.0000000e+00
    1.0366333e-04  1.0000000e+00]
  [ 9.0929741e-01 -4.1614681e-01  9.3641472e-01 ...  1.0000000e+00
    2.0732667e-04  1.0000000e+00]
  ...
  [ 9.5625257e-01 -2.9254240e-01  9.3594456e-01 ...  8.5925674e-01
    4.9514842e-01  8.6880839e-01]
  [ 2.7049953e-01 -9.6272010e-01  8.2251388e-01 ...  8.5920179e-01
    4.9523848e-01  8.6875707e-01]
  [-6.6394955e-01 -7.4777740e-01  9.7326015e-04 ...  8.5914677e-01
    4.9532855e-01  8.6870575e-01]]]


In [39]:
print(t.pe.numpy())

[[[ 0.00000000e+00  1.00000000e+00  0.00000000e+00 ...  1.00000000e+00
    0.00000000e+00  1.00000000e+00]
  [ 8.41470957e-01  5.40302336e-01  8.21856201e-01 ...  1.00000000e+00
    1.03663326e-04  1.00000000e+00]
  [ 9.09297407e-01 -4.16146845e-01  9.36414778e-01 ...  1.00000000e+00
    2.07326651e-04  1.00000000e+00]
  ...
  [ 9.56252575e-01 -2.92542398e-01  9.35944557e-01 ...  8.59256744e-01
    4.95148391e-01  8.68808448e-01]
  [ 2.70499527e-01 -9.62720096e-01  8.22513878e-01 ...  8.59201729e-01
    4.95238483e-01  8.68757069e-01]
  [-6.63949549e-01 -7.47777402e-01  1.46154105e-03 ...  8.59146774e-01
    4.95328546e-01  8.68705750e-01]]]


In [40]:
d_model=512
position = torch.arange(0, 5000, dtype=torch.float32).unsqueeze(1)
div_term = torch.exp(
    torch.arange(0, d_model, 2, dtype=torch.float32)
    * -(math.log(10000.0) / d_model)
    )
p1 = torch.sin(position * div_term)
p2 = torch.cos(position * div_term)
pe = torch.zeros(5000, d_model)
pe[:, 0::2] = p1
pe[:, 1::2] = p2

In [42]:
d_model=512
nposition = np.arange(0, 5000, dtype=np.float32).reshape(-1, 1)
ndiv_term = np.exp(
    np.arange(0, d_model, 2, dtype=np.float32)
    * -(math.log(10000.0) / d_model)
    )
np1 = np.sin(nposition * ndiv_term)
np2 = np.cos(nposition * ndiv_term)
npe = np.zeros((5000, d_model))
npe[:, 0::2] = np1
npe[:, 1::2] = np2
print(type(np1))

ppe = paddle.zeros((5000, d_model))
ppe[:, 0::2] = paddle.to_tensor(np1)
ppe[:, 1::2] = paddle.to_tensor(np2)

<class 'numpy.ndarray'>


In [61]:
print(np.allclose(nposition, position))
print(np.allclose(ndiv_term, div_term))

print("---cos/sin")
print(np.allclose(np1, p1, atol=1e-5, rtol=0))
print(np.allclose(np1, torch.sin(torch.as_tensor(nposition * ndiv_term)), atol=1e-8, rtol=0))
print(np.allclose(np2, p2))
print('---pe')
print(np.allclose(npe, pe))
print(np.allclose(npe, ppe))
print(ppe.dtype)

True
True
---cos/sin
False
False
False
---pe
False
True
paddle.float32


In [56]:
print(p1.device)

cpu
