In [1]:
cd ..

/Users/vijay/vijaygkd/char-rnn


In [16]:
%load_ext autoreload
%autoreload 2

# Imports

In [767]:
import numpy as np

import torch
import torch.nn as nn
from torchinfo import summary

In [768]:
import pprint
pp = pprint.PrettyPrinter()

# Model Exp

## 1. LSTM

In [769]:
from model.lstm import LSTM_CharLM

In [770]:
m = LSTM_CharLM(128)

summary(m, input_size=(1,50), dtypes=[torch.long])



Layer (type:depth-idx)                   Output Shape              Param #
LSTM_CharLM                              [1, 50, 128]              --
├─Embedding: 1-1                         [1, 50, 12]               1,536
├─LSTM: 1-2                              [1, 50, 128]              72,704
├─Linear: 1-3                            [1, 50, 128]              16,512
Total params: 90,752
Trainable params: 90,752
Non-trainable params: 0
Total mult-adds (M): 3.65
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.36
Estimated Total Size (MB): 0.47

In [771]:
def process_text(x):
    x = '$' + x
    y = x[1:] + '$'
    
    x_vec = [ord(c) for c in x]
    y_vec = [ord(c) for c in y]
    return x_vec, y_vec

def process_corpus(corpus, seq_len):
    """corpus is list of docs. Return x and y tensors"""
    pad_idx = ord(' ')
    data_shape = (len(corpus), seq_len)
    x_arr = np.full(data_shape, pad_idx)
    y_arr = np.full(data_shape, pad_idx)
    
    for i, doc in enumerate(corpus):
        doc = doc[:seq_len]
        x_vec, y_vec = process_text(doc)
        x_arr[i][0:len(x_vec)] = x_vec
        y_arr[i][0:len(y_vec)] = y_vec
    
    X = torch.tensor(x_arr)
    Y = torch.tensor(y_arr)
    
    return X, Y

In [772]:
#data
corpus = [
    "This text is written by Vijay Gaikwad :)",
    # "Ruchira is my beautiful wife.",
    # "We both happily live in Seattle."
]


X, Y = process_corpus(corpus, seq_len=len(corpus[0])+1)

print(X.shape)

torch.Size([1, 41])


In [773]:
#loss
xe = nn.CrossEntropyLoss()

# Define an optimizer
learning_rate = 0.01
optimizer = torch.optim.SGD(m.parameters(), lr=learning_rate)

def test_model(model, X):
    model.eval()
    y_hat = model(X)
    y_pred = y_hat.argmax(dim=-1).tolist()
    y_ans = []
    for ys in y_pred:
        yl = [chr(y) for y in ys]
        y_ans.append(''.join(yl))
    return y_ans

def test_single(model, x):
    model.eval()
    y_hat = model(x)
    y_pred = y_hat.argmax(dim=-1).tolist()
    yl = [chr(y) for y in y_pred]
    y_ans = ''.join(yl)
    return y_ans


def train_epoch(model, loss_fn, opt, x, y):
    model.train()
    opt.zero_grad()
    y_hat = model(x)
    y_hat_perm = y_hat.permute((0,2,1)) 
    # permute dims so that output is (batch, no_classes, seq_len)
    # this ^ order is required by cross-entropy loss class
    loss = loss_fn(y_hat_perm, y)
    loss.backward()
    opt.step()
    return loss

def train_single(model, loss_fn, opt, x, y):
    model.train()
    opt.zero_grad()
    y_hat = model(x)
    loss = loss_fn(y_hat, y)
    loss.backward()
    opt.step()
    return loss

def train(model, epochs=10000):
    for epoch in range(epochs+1):
        loss = train_single(model, xe, optimizer, X[0], Y[0])
        if epoch % 1000 == 0:
            y_ans = test_single(model, X[0])
            print(f'Epoch: {epoch} | Loss: {loss}')
            print(y_ans)
            print('-----------------')
        

In [774]:
m = LSTM_CharLM(128)


In [None]:
train(m, epochs=5000)

Epoch: 0 | Loss: 4.878656387329102
cc''E'c'M'!GE'0'00'0'''<0N':'Br
-----------------


In [711]:
test_model(m, X)

['This ieTheis irhseeTtiT iTsa  ii s r  iT$']

In [717]:
test_single(m, X[0])

'This text is written by Vijay Gaikwad :)$'

In [713]:
    m.eval()
    y_hat = m(X)
    y_pred = y_hat.argmax(dim=-1).tolist()
    y_ans = []
    for ys in y_pred:
        yl = [chr(y) for y in ys]
        y_ans.append(''.join(yl))
    print(y_ans)

['This ieTheis irhseeTtiT iTsa  ii s r  iT$']


In [733]:
m(X).argmax(dim=-1)

tensor([[ 84, 104, 105, 115,  32, 105, 101,  84, 104, 101, 105, 115,  32, 105,
         114, 104, 115, 101, 101,  84, 116, 105,  84,  32, 105,  84, 115,  97,
          32,  32, 105, 105,  32, 115,  32, 114,  32,  32, 105,  84,  36]])

In [734]:
m(X[0]).argmax(dim=-1)

tensor([ 84, 104, 105, 115,  32, 116, 101, 120, 116,  32, 105, 115,  32, 119,
        114, 105, 116, 116, 101, 110,  32,  98, 121,  32,  86, 105, 106,  97,
        121,  32,  71,  97, 105, 107, 119,  97, 100,  32,  58,  41,  36])

In [729]:
m(X[0]).argmax(dim=-1) == m(X).argmax(dim=-1)

tensor([[ True,  True,  True,  True,  True, False,  True, False, False, False,
          True,  True,  True, False,  True, False, False, False,  True, False,
         False, False, False,  True, False, False, False,  True, False,  True,
         False, False, False, False, False, False, False,  True, False, False,
          True]])

In [714]:
y = m(X[0])

In [715]:
yans = y.argmax(dim=-1)

In [716]:
''.join([chr(y) for y in yans.tolist()])

'This text is written by Vijay Gaikwad :)$'

In [688]:
Y[0]

tensor([ 84, 104, 105, 115,  32, 116, 101, 120, 116,  32, 105, 115,  32, 119,
        114, 105, 116, 116, 101, 110,  32,  98, 121,  32,  86, 105, 106,  97,
        121,  32,  71,  97, 105, 107, 119,  97, 100,  32,  58,  41,  36])

In [689]:
''.join([chr(y) for y in Y[0].tolist()])

'This text is written by Vijay Gaikwad :)$'

In [691]:
m(X).argmax(dim=-1)

tensor([[ 84, 104, 105, 115,  32, 105, 101, 120, 116, 101, 105, 115,  32, 105,
          32, 105, 115, 101, 101, 120,  32, 105, 104,  32, 105, 115, 115,  97,
         105,  32, 105, 105, 105, 115, 115,  32, 105, 104, 105,  84,  32]])

In [592]:
op = m(X)

In [585]:
op.argmax(dim=-1)

tensor([[ 84, 104, 105, 115,  32, 116, 101, 110, 116, 101, 116, 115,  32, 116,
          97, 105, 115, 101, 101, 110,  32, 116, 121,  32, 116, 105, 115,  97,
         121,  32, 116,  97, 121, 115, 119,  97, 121,  32, 116,  41,  36]])

tensor([[ 84, 104, 105, 115,  32, 116, 101, 120, 116,  32, 105, 115,  32, 119,
         114, 105, 116, 116, 101, 110,  32,  98, 121,  32,  86, 105, 106,  97,
         121,  32,  71,  97, 105, 107, 119,  97, 100,  32,  58,  41,  36]])

In [411]:
x_test_vec = process_text("This text is written by Vijay G")

test_model(m, x_test_vec)

'This text is written by Vijay Ga'

In [357]:
y_hat = m(x_vec)
y_pred = y_hat.argmax(dim=1)
y_letters = [chr(y) for y in y_pred.tolist()]
y_ans = ''.join(y_letters)
print(y_ans)

ello world.
