In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import datasets.date_format as dataset

In [2]:
# TODO: try one-hot instead of embedding
# TODO: if too long - filter wont see all context
# TODO: if too small - downsampling might sample to 0 size

In [3]:
max_len = 12

def batch_gen(batch_size):
  gen = dataset.gen()
  
  while True:
    x, y = [], []
    x_len, y_len = [], []
    for i in range(batch_size):
      batch = next(gen)
      x.append(batch[0])
      y.append(batch[1])
      x_len.append(len(batch[0]))
      y_len.append(len(batch[1]))

    x_max_len = max_len
    y_max_len = max_len

    for i in range(batch_size):
      x[i] = x[i] + [dataset.pad] * (x_max_len - len(x[i]))
      y[i] = y[i] + [dataset.pad] * (y_max_len - len(y[i]))

    x = torch.LongTensor(x)
    y = torch.LongTensor(y)
    
    yield x, y

In [4]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    
    self.embedding = nn.Embedding(dataset.vocab_size, dataset.vocab_size)
    
    self.conv1 = nn.Conv2d(1, 16, 3)
    self.bn1 = nn.BatchNorm2d(16)
    self.conv2 = nn.Conv2d(16, 16, 3)
    self.bn2 = nn.BatchNorm2d(16)
    self.conv3 = nn.Conv2d(16, 16, 3)
    self.bn3 = nn.BatchNorm2d(16)
    self.conv4 = nn.Conv2d(16, 16, 3)
    self.bn4 = nn.BatchNorm2d(16)
    self.conv5 = nn.Conv2d(16, 16, 3)
    self.bn5 = nn.BatchNorm2d(16)
    
    self.conv_transpose6 = nn.ConvTranspose2d(16, 16, 3)
    self.bn6 = nn.BatchNorm2d(16)
    self.conv_transpose7 = nn.ConvTranspose2d(16, 16, 3)
    self.bn7 = nn.BatchNorm2d(16)
    self.conv_transpose8 = nn.ConvTranspose2d(16, 16, 3)
    self.bn8 = nn.BatchNorm2d(16)
    self.conv_transpose9 = nn.ConvTranspose2d(16, 16, 3)
    self.bn9 = nn.BatchNorm2d(16)
    self.conv_transpose10 = nn.ConvTranspose2d(16, 1, 3)
    self.bn10 = nn.BatchNorm2d(1)
    
  def forward(self, x):
    x = self.embedding(x)
    x = x.unsqueeze(1)
    
    x = F.relu(self.bn1(self.conv1(x)))
    x = F.relu(self.bn2(self.conv2(x)))
    x = F.relu(self.bn3(self.conv3(x)))
    x = F.relu(self.bn4(self.conv4(x)))
    x = F.relu(self.bn5(self.conv5(x)))
    
    x = F.relu(self.bn6(self.conv_transpose6(x)))
    x = F.relu(self.bn7(self.conv_transpose7(x)))
    x = F.relu(self.bn8(self.conv_transpose8(x)))
    x = F.relu(self.bn9(self.conv_transpose9(x)))
    x = F.relu(self.bn10(self.conv_transpose10(x)))
    
    x = x.squeeze()
    x = F.log_softmax(x, dim=-1)
    return x

model = Net()
print(model)

Net(
  (embedding): Embedding(40, 40)
  (conv1): Conv2d (1, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv3): Conv2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv4): Conv2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv5): Conv2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv_transpose6): ConvTranspose2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv_transpose7): ConvTranspose2d (16, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn7): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
  (conv_transpose8): ConvTranspose2d (16, 16, kern

In [5]:
learning_rate = 0.001
batch_size = 32
steps = 2000
log_interval = 200

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
gen = batch_gen(batch_size)
test_gen = batch_gen(batch_size * 10)

for i in range(steps + 1):
  model.train()

  x, y = next(gen)
  x, y = Variable(x), Variable(y)
  optimizer.zero_grad()
  y_hat = model(x)
  y_hat = y_hat.view(-1, y_hat.size(-1))
  y = y.view(-1)
  loss = F.nll_loss(y_hat, y)
  loss.backward()
  optimizer.step()

  if i % log_interval == 0:
    model.eval()

    x, y = next(test_gen)
    x, y = Variable(x, volatile=True), Variable(y)
    y_hat = model(x)
    y_hat = y_hat.view(-1, y_hat.size(-1))
    y = y.view(-1)
    test_loss = F.nll_loss(y_hat, y) # sum up batch loss
    pred = y_hat.max(1, keepdim=True)[1].squeeze() # get the index of the max log-probability
    accuracy = (pred == y).float().mean() * 100
    
    print('step: {}, loss: {:.4f}, accuracy: {:.2f}%'.format(
        i, test_loss.data[0], accuracy.data[0]))
    
    print('\tsample:    {}\n\ttrue:      {}\n\tpredicted: {}'.format(
      dataset.decode(x[0].tolist()),
      dataset.decode(y[:max_len].tolist()),
      dataset.decode(pred[:max_len].tolist())))

step: 0, loss: 3.6888, accuracy: 11.98%
	sample:    11/6/84<p><p><p><p><p>
	true:      11 jun 1984</s>
	predicted: <p> dc     </s>0a
step: 200, loss: 0.4794, accuracy: 90.31%
	sample:    19/10/53<p><p><p><p>
	true:      19 oct 1953</s>
	predicted: 14 oct 1965</s>
step: 400, loss: 0.1642, accuracy: 96.90%
	sample:    2/10/87<p><p><p><p><p>
	true:      2 oct 1987</s><p>
	predicted: 2 oct 1987</s><p>
step: 600, loss: 0.0776, accuracy: 98.75%
	sample:    18/1/62<p><p><p><p><p>
	true:      18 jan 1962</s>
	predicted: 18 jan 1962</s>
step: 800, loss: 0.0333, accuracy: 99.82%
	sample:    1/7/91<p><p><p><p><p><p>
	true:      1 jul 1991</s><p>
	predicted: 1 jul 1991</s><p>
step: 1000, loss: 0.0157, accuracy: 99.97%
	sample:    26/8/27<p><p><p><p><p>
	true:      26 aug 1927</s>
	predicted: 26 aug 1927</s>
step: 1200, loss: 0.0097, accuracy: 100.00%
	sample:    2/4/14<p><p><p><p><p><p>
	true:      2 apr 1914</s><p>
	predicted: 2 apr 1914</s><p>
step: 1400, loss: 0.0068, accuracy: 100.00%
	sample: