# **Mini Transformer for Dialogue Generation**

## **1. Import Library & Seed**

In [None]:
import math, random
import numpy as np
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
import evaluate

# Reproducibility
seed = 42
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


## **2. Import Library & Seed**

In [None]:
# Load DailyDialog
raw = load_dataset('li2017dailydialog/daily_dialog')  # train/validation/test :contentReference[oaicite:6]{index=6}

def preprocess(example):
    toks = ['<bos>']
    for utt in example['dialog']:
        toks += utt.split() + ['<sep>']
    toks[-1] = '<eos>'
    return {'tokens': toks}

train_raw = raw['train'].map(preprocess).select(range(5000))
val_raw   = raw['validation'].map(preprocess).select(range(1000))
print(f"Train: {len(train_raw)}, Val: {len(val_raw)}")

## **3. PyTorch Dataset & DataLoader**

In [None]:
class DialogDataset(Dataset):
    def __init__(self, data, toi, max_len=64):
        self.data, self.toi, self.max_len = data, toi, max_len

    def __len__(self): return len(self.data)

    def encode(self, toks):
        ids = [self.toi.get(t, self.toi['<unk>']) for t in toks]
        if len(ids)>self.max_len: ids=ids[:self.max_len]
        else: ids += [self.toi['<pad>']]*(self.max_len-len(ids))
        return ids

    def __getitem__(self, idx):
        ids = self.encode(self.data[idx]['tokens'])
        x = torch.tensor(ids[:-1], dtype=torch.long)
        y = torch.tensor(ids[1:],  dtype=torch.long)
        return x, y

train_ds = DialogDataset(train_raw, toi)
val_ds   = DialogDataset(val_raw,   toi)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=32)


## **6. Positional Encoding Varian**

### 6.1 Sinusoidal

In [None]:
class SinusoidalPE(nn.Module):
    def __init__(self, d_model, max_len=64):
        super().__init__()
        pos = torch.arange(max_len).unsqueeze(1)
        i   = torch.arange(d_model//2).unsqueeze(0)
        angles = pos / (10000**(2*i/d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:,0::2], pe[:,1::2] = torch.sin(angles), torch.cos(angles)
        self.pe = pe.unsqueeze(0).to(device)

    def forward(self, x): return x + self.pe[:,:x.size(1),:]

### 6.1 Learnable

class LearnablePE(nn.Module):
    def __init__(self, max_len, d_model):
        super().__init__()
        self.embed = nn.Embedding(max_len, d_model)
    def forward(self, x):
        pos = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        return self.embed(pos)
