In [84]:
import torch
from torch import nn
import numpy as np
import torch.optim as optim
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [125]:
# file = 'data/anna.txt'
file = 'data/kieu.txt'
with open(file, 'r', encoding='utf-8') as f:
    text = f.read()
vocab = list(set(text))
id2char = dict(enumerate(vocab))
char2id = {v: k for k, v in id2char.items()}
txt = np.array(list(char2id[c] for c in text))
txt

array([100,  13,  13, ...,  59,  57,  13])

In [118]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [128]:
def one_hot_encoding(arr, vocab_len):
    """
    Return a 3D tensor for each of training example. The first dimension is going to be batch
    """
    inp = np.asarray(arr)
    result = np.zeros((np.multiply(*inp.shape), vocab_len), dtype=np.float32)
    result[np.arange(result.shape[0]), inp.flatten()] = 1
    return result.reshape((*inp.shape, vocab_len))
# one_hot_encoding([[2, 3, 5]], 8)

def get_batch_data(data, bs, seqlen):
    """
    keep the right amout of data to be fed into the NN,
    return x, y of 
    """
    bn = len(data)//(bs*seqlen)
    dat = np.array(list(data[:(bn*bs*seqlen)]))
    dat = dat.reshape(bs, -1)
    for i in range((bn-1)*seqlen-1):
        x = dat[:, i: i+seqlen]
        y = dat[:, i+1: i+seqlen+1]
        yield x, y

# for x, y in get_batch_data(txt, 64, 100):
#     print(x.shape, y.shape)

seqlen = 100
bs = 16
n_hidden = 512
n_layers = 10
clip = 5
class Net(nn.Module):
    def __init__(self, n_hidden, n_layers):
        super().__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.lstm = nn.LSTM(len(vocab), n_hidden, n_layers, dropout=.2, batch_first=True)
        self.fc = nn.Linear(n_hidden, len(vocab))
        self.dropout = nn.Dropout(.2)
        
    def forward(self, x, hidden):
        x, hidden = self.lstm(x, hidden)
        x = self.dropout(x)
        x = x.contiguous().view(-1, self.n_hidden)
        x = self.fc(x)
        return x, hidden
    
    def init_hidden(self, bs):
        return (torch.zeros((self.n_layers, bs, self.n_hidden)).cuda(),
                torch.zeros(self.n_layers, bs, self.n_hidden).cuda())
    
net = Net(n_hidden, n_layers)
net.cuda()
n_epocs = 3
val_idx = int(len(txt)*.05)
val, trn = txt[:val_idx], txt[:val_idx]
optimizer = optim.Adam(net.parameters(), lr=1e-3)
crit = nn.CrossEntropyLoss()
counter = 0
for e in range(n_epocs):
    net.train()
    total_trn_loss = 0
    hidden = net.init_hidden(bs)
    for x, y in get_batch_data(trn, bs=bs, seqlen=seqlen):
        counter += 1
        x = torch.from_numpy(one_hot_encoding(x, len(vocab))).cuda()
        y = torch.from_numpy(y).cuda().reshape(bs*seqlen)
        out, hidden = net.forward(x, hidden)
        hidden = tuple([each.data for each in hidden])
        loss = crit(out, y)
        total_trn_loss += loss.data/bs
        net.zero_grad()
        loss.backward()
        # to prevent exploding gradient
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()
    net.eval()
    total_val_loss = 0
    with torch.no_grad():
        val_h = net.init_hidden(bs)
        for x, y in get_batch_data(val, bs=bs, seqlen=seqlen):
            x = torch.from_numpy(one_hot_encoding(x, len(vocab))).cuda()
            y = torch.from_numpy(y).cuda().view(bs*seqlen)
            val_h = tuple([each.data for each in val_h])
            out, val_h = net.forward(x, val_h)
            val_loss = crit(out, y)
            total_val_loss += val_loss.data/bs
    net.train()
    print(f"epoc {e}, trn_loss = {total_trn_loss}, val_loss = {total_val_loss}")

epoc 0, trn_loss = 45.32880783081055, val_loss = 45.557899475097656
epoc 1, trn_loss = 44.951168060302734, val_loss = 45.50582504272461
epoc 2, trn_loss = 44.94255447387695, val_loss = 45.48057556152344
