In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
data = open("data2.txt", 'r').read()
chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)
print("----------------------------------------")
print("Data has {} characters, {} unique".format(data_size, vocab_size))
print("----------------------------------------")

# char to index and index to char maps
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

----------------------------------------
Data has 613 characters, 37 unique
----------------------------------------


In [5]:
# convert data from chars to indices
data = list(data)
for i, ch in enumerate(data):
    data[i] = char_to_ix[ch]

data = torch.tensor(data).to(device)
# data = torch.unsqueeze(data, dim=1)

In [6]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, output_size, hidden_size):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size)
        self.decoder = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax()
    
    def forward(self, input_seq, hidden_state):
        embedding = self.embedding(input_seq)
        output, hidden_state = self.rnn(embedding, hidden_state)
        output = self.decoder(output)
        output = self.softmax(output)
        return output, (hidden_state[0].detach(), hidden_state[1].detach())

In [7]:
model = RNN(input_size=vocab_size, embedding_size=vocab_size, output_size=vocab_size, hidden_size=100).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [8]:
epochs = 10

for i_epoch in range(1, epochs+1):
        
    n = 0
    running_loss = 0
    
    for i in range(40,len(data)-1):
        hidden_state = None
        input_seq = data[i-40 : i]
        target_seq = data[i-40+1 : i+1]
        
        # forward pass
        output, _ = model(input_seq, hidden_state)
        
        # compute loss
        loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
        running_loss += loss.item()
        n += 1
        
        # compute gradients and take optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # print loss after every epoch
    print("Epoch: {0} \t Loss: {1:.8f}".format(i_epoch, running_loss/n))

  output = self.softmax(output)


Epoch: 1 	 Loss: 3.60957168
Epoch: 2 	 Loss: 3.60707596
Epoch: 3 	 Loss: 3.57710017
Epoch: 4 	 Loss: 3.52508116
Epoch: 5 	 Loss: 3.51086148
Epoch: 6 	 Loss: 3.50273147
Epoch: 7 	 Loss: 3.49558198
Epoch: 8 	 Loss: 3.48925955
Epoch: 9 	 Loss: 3.48403896
Epoch: 10 	 Loss: 3.47984969


In [18]:
prompt = "shall i compare thee to a summersr dayy\n"

prompt = list(prompt)
for i, ch in enumerate(prompt):
    prompt[i] = char_to_ix[ch]

with torch.no_grad():
    prompt = torch.tensor(prompt).to(device).long()
    hidden_init = None
    output, hidden = model(prompt, hidden_init)

    for _ in range(40):
        output = output[-1]
        prediction = torch.argmax(output)
        print(ix_to_char[int(prediction.detach().numpy())],end="")
        output, hidden = model(torch.tensor([prediction]), hidden)

 t t t t t t t t t t t t t t t t t t t t

  output = self.softmax(output)
