In [None]:
%matplotlib inline
import numpy as np
import matplotlib
import sys

In [None]:
# import pytorch modules
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

In [None]:
# find gpu
cuda = torch.cuda.is_available()

In [None]:
# import training data
train_data = np.load('train_data.npy')
train_data.shape

In [None]:
bsize = 20
train_data = torch.from_numpy(train_data)
if cuda:
    train_data.cuda()
train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=bsize, shuffle=True)

In [None]:
# hyperparamters
timesteps = 300
num_clusters = 20
cell_size = 400
nlayers = 2

In [None]:
# 2-layer lstm with mixture of gaussian parameters as outputs
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size = 3, hidden_size = cell_size, num_layers = nlayers, batch_first=True)
        self.linear1 = nn.Linear(cell_size, 1+ num_clusters*6)
        self.tanh = nn.Tanh()
        
    def forward(self, x, prev):
        timesteps = x.shape[1]
        h, (h_n, c_n) = self.lstm(x, prev)
        params = self.linear1(h)
        weights = F.softmax(params.narrow(-1, 0, num_clusters), dim=-1)
        mu_1 = params.narrow(-1, num_clusters, num_clusters)
        mu_2 = params.narrow(-1, 2*num_clusters, num_clusters)
        log_sigma_1 = params.narrow(-1, 3*num_clusters, num_clusters)
        log_sigma_2 = params.narrow(-1, 4*num_clusters, num_clusters)
        p = self.tanh(params.narrow(-1, 5*num_clusters, num_clusters))
        end = F.sigmoid(params.narrow(-1, 6*num_clusters, 1))
        
        return end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, p, (h_n, c_n)

In [None]:
model = LSTM()
if cuda:
    model.cuda()

In [None]:
# # test forward pass
# test_batch = Variable(torch.from_numpy(train_data[:20]).narrow(1,0,300),requires_grad=False)
# e, w, m_1, m_2, s_1, s_2, p, prev = model(test_batch)
# for _ in [e,w,m_1,m_2,s_1, s_2, p]:
#     print(_.size())

In [None]:
def log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, p, x):
    timesteps = x.shape[1]
    x_0 = x.narrow(-1,0,1)
    x_1 = x.narrow(-1,1,1)
    x_2 = x.narrow(-1,2,1)
    end_loglik = (x_0*end + (1-x_0)*(1-end)).log()
    const = 1E-20
    z = (x_1 - mu_1)**2/(log_sigma_1.exp()**2+const)\
        + ((x_2 - mu_2)/(log_sigma_2.exp()+const))**2 \
        - 2*p*(x_1-mu_1)*(x_2-mu_2)/((log_sigma_1 + log_sigma_2).exp()+const)
    mog_lik1 = -log_sigma_1 - log_sigma_2 - 0.5*(1-p**2).log()
    mog_lik2 = (z.log()-(2*(1-p**2)).log()).exp()
    mog_lik = (weights.log() + (mog_lik1 - mog_lik2)).exp().sum(dim=-1)
    return end_loglik.sum() + (mog_lik+const).log().sum()

In [None]:
# training
epochs = 3
optimizer = optim.Adam([
                {'params':model.parameters()},
            ], lr=1e-3)

train_loss = 0
h_init, c_init = torch.zeros((2,1,cell_size)), torch.zeros((2,1,cell_size))
zero_tensor = torch.zeros((bsize, 1, 3))
if cuda:
    h_init.cuda()
    c_init.cuda()
    zero_tensor.cuda()
h_init, c_init = Variable(h_init), Variable(c_init)

for epoch in range(epochs):
    for batch_idx, data in enumerate(train_loader):
        step_back = torch.cat([zero_tensor, data.narrow(1,0,timesteps-1)], 1)
        x = Variable(step_back, requires_grad=False)
        optimizer.zero_grad()
        
        end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, p , prev= model(x, (h_init, c_init))
        y = Variable(data, requires_grad=False)
        loss = -log_likelihood(end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, p, y)
        loss.backward()
        train_loss += loss.data[0]
        optimizer.step()
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch+1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.data[0] / len(data)))
            

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch+1, train_loss / len(train_loader.dataset)))

In [None]:
def generate_unconditionally(steps=300, random_seed=1):
    torch.manual_seed(random_seed)
    zero_tensor = torch.zeros((1,1,3))
    h_init, c_init = torch.zeros((2,1,cell_size)), torch.zeros((2,1,cell_size))
    if cuda:
        zero_tensor.cuda()
        h_init.cuda()
        c_init.cuda()
    x = Variable(zero_tensor)
    h_init, c_init = Variable(h_init), Variable(c_init)
    prev = (h_init, c_init)

    record = []
    # greedy but not the right generation
    for i in range(steps):        
        end, weights, mu_1, mu_2, log_sigma_1, log_sigma_2, p , prev = model(x, prev)
        index = np.random.choice(range(20),p = weights.data[0][0].numpy())
        prob_end = end.data[0][0][0]
        exp_1 = mu_1.data[0][0][index]
        exp_2 = mu_2.data[0][0][index]
        out = np.array([np.round(prob_end), exp_1, exp_2])
        record.append(out)
        x = torch.from_numpy(out).type(torch.FloatTensor)
        if cuda:
            x.cuda()
        x = Variable(x, requires_grad=False)
        x = x.view((1,1,3))
    return np.array(record)

In [None]:
np.random.choice(range(20),p = weights.data[0][0].numpy())

In [None]:
s = generate_unconditionally()

In [None]:
sys.path.insert(0,'..')
from utils import plot_stroke
plot_stroke(s)