# Sinusoidal Positional Encoding — Demo

**Paper:** Vaswani et al. (2017) — *Attention Is All You Need*  
**Formula:** `PE(pos, 2i) = sin(pos / 10000^(2i/d_model))`, `PE(pos, 2i+1) = cos(...)`  
**Properties:** No learnable params | Generalizes to unseen lengths | Static  
**Best for:** General-purpose baseline

In [None]:
import math
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

In [None]:
class SinusoidalPositionalEncoding(nn.Module):
    '''
    Sinusoidal PE — Vaswani et al. (2017)
    PE(pos, 2i)   = sin(pos / 10000^(2i/d_model))
    PE(pos, 2i+1) = cos(pos / 10000^(2i/d_model))
    '''
    def __init__(self, d_model, max_seq_len=512, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_seq_len, d_model)
        position = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))  # (1, max_seq_len, d_model)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

print('SinusoidalPositionalEncoding defined.')

In [None]:
# Sanity check — shape test
d_model, seq_len, batch = 64, 50, 4
pe_layer = SinusoidalPositionalEncoding(d_model, max_seq_len=512, dropout=0.0)

x = torch.zeros(batch, seq_len, d_model)
out = pe_layer(x)

print(f'Input shape : {x.shape}')
print(f'Output shape: {out.shape}')  # should be same as input
print(f'PE buffer   : {pe_layer.pe.shape}')
print(f'Learnable params: {sum(p.numel() for p in pe_layer.parameters())}')  # should be 0

In [None]:
# Heatmap — visualize PE across positions and dimensions
d_model, seq_len = 64, 60
pe_layer = SinusoidalPositionalEncoding(d_model, max_seq_len=512, dropout=0.0)

dummy = torch.zeros(1, seq_len, d_model)
with torch.no_grad():
    encoded = pe_layer(dummy)

pe_matrix = encoded[0].numpy()  # (seq_len, d_model)

fig, axes = plt.subplots(1, 2, figsize=(16, 4))

# Heatmap
im = axes[0].imshow(pe_matrix.T, aspect='auto', cmap='RdYlBu', origin='lower')
axes[0].set_xlabel('Position')
axes[0].set_ylabel('Dimension')
axes[0].set_title('Sinusoidal PE Heatmap (dim x position)')
plt.colorbar(im, ax=axes[0])

# Line plot — first 4 dimensions
for i in range(4):
    axes[1].plot(pe_matrix[:, i], label=f'dim {i}')
axes[1].set_xlabel('Position')
axes[1].set_ylabel('PE Value')
axes[1].set_title('Sinusoidal PE — First 4 Dimensions')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('demo_sinusoidal_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved: demo_sinusoidal_heatmap.png')

In [None]:
# Summary
print('=== Sinusoidal PE Summary ===')
print(f'Learnable parameters : 0')
print(f'Generalizes to unseen lengths: YES')
print(f'Task-adaptive: NO (static)')
print(f'Complexity: O(n * d_model)')
print(f'Key paper: Vaswani et al. (2017)')
print()
print('Ready to use in experiments.')
print('Import: from PE.sinusoidal_pe import SinusoidalPositionalEncoding')