In [1]:
import unidecode
import string
import random
import re
import numpy as np
all_characters = string.printable
n_characters = len(all_characters)

file = open('./data/shakespeare.txt').read()
file_len = len(file)
print('file_len =', file_len)

file_len = 1115393


In [2]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import autograd

class RNN(nn.Module):
    def __init__(self, *, input_size, embedding_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        # map inputs to embeddings
        self.embedding_layer = nn.Embedding(input_size, embedding_size)
        # forward embeddings through LSTM
        self.LSTM = nn.LSTM(embedding_size, hidden_size, n_layers)
        # compute a linear transformation to output space
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.embedding_layer(input.view(1, -1))
        output, hidden = self.LSTM(input.view(1, 1, -1), hidden)
        output = self.linear(output.view(1, -1))
        scores = F.softmax(output)
        return output, hidden, scores

    def init_hidden(self):
        return (autograd.Variable(torch.zeros(self.n_layers, 1, self.hidden_size)),
                autograd.Variable(torch.zeros(self.n_layers, 1, self.hidden_size)))

In [3]:
seq_len = 200

def get_seq(seq_len = 200):
    start = np.random.randint(0, file_len - seq_len)
    seq = file[start:start + seq_len]
    assert len(seq) == 200
    return seq

def to_vector(seq, chars = all_characters):
    return autograd.Variable(torch.LongTensor([chars.index(s) for s in seq]))

def generate_training_set():
    seq = get_seq()
    inputs, labels = to_vector(seq[:-1]), to_vector(seq[1:])
    return inputs, labels

In [5]:

def evaluate(start_char='A', predict_len=100, temperature=0.8):
    # initialize the hidden state
    hidden = rnn.init_hidden()
    # convert the starting string to vector
    start_vector = to_vector(start_char)
    predicted = start_char
    input = start_vector[-1]
    for p in range(predict_len):
        # feed the input to the network
        output, hidden, scores = rnn(input, hidden)
        # Convert output to a multinomial and sample the most likely element
        output_dist = output.data.view(-1).div(temperature).exp()
        top_1_idx = torch.multinomial(output_dist, 1)[0]
        # index into the list of all characters, and use this predicted character as the next input to the RNN
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_1_idx]
        predicted += predicted_char
        input = to_vector(predicted_char)

    return predicted, other

In [None]:
n_epochs = 5000
hidden_size = 200
embedding_size = 100
n_layers = 4

def my_evaluate(start_char, hidden, predict_len = 100):
    predicted_chars = []
    for i in range(predict_len):
        output, hidden, scores = rnn(start_char, hidden)
        idx = np.argmax(scores.data.numpy())
        predicted_char = all_characters[idx]
        predicted_chars.append(predicted_char)
        start_char = to_vector(predicted_char)
    assert len(predicted_chars) == 100
    return "".join([str(x) for x in predicted_chars])
        
rnn = RNN(input_size = n_characters,
          embedding_size = embedding_size,
          hidden_size = hidden_size, 
          output_size = n_characters,
          n_layers = n_layers)

optim = torch.optim.Adam(rnn.parameters(), lr = 0.005)
criterion = nn.CrossEntropyLoss()
all_losses = []

for epoch in range(n_epochs):
    # re-init hidden and zero the grads
    hidden = rnn.init_hidden()
    rnn.zero_grad()
    inputs, labels = generate_training_set()
    # run through the inputs one by one, accumulating a loss
    loss = 0
    for c in range(seq_len - 1):
        output, hidden, scores = rnn(inputs[c], hidden)
        loss += criterion(output, labels[c])
    loss.backward(retain_graph = True)
    optim.step()
    all_losses.append(loss.data[0]/seq_len)
    if epoch % 20 == 0:
        print('EPOCH: {}'.format(epoch))
        predicted, _ = evaluate()
        print('predicted: {}'.format(predicted))
        print('LOSS: {}'.format(loss.data[0]/seq_len))




EPOCH: 0
predicted: A
iDT6":o8{@8w*X, Owl|t&>.0t-4Xs7h66#iV+r	
89VU+(I\J-/GC:|.#Z;EZ]o}_@ Ri*OvB~s
LOSS: 4.572554016113282
EPOCH: 20
predicted: AB8+b5Q{"#_*{_MD\FM*+}^91F	h(~)sne g,kt
 hto 
  eoA;uuOo d teeeiehmhetotstroa e:st ae  ie:hf
 atan
LOSS: 3.270144348144531
EPOCH: 40
predicted: AY:mie eene n,
 by r e
i e te
owoot ec te rnydalpahblTget 
c
 
alnleh r brg e

mo.c H r,l 
enesL anei
LOSS: 3.266881408691406
EPOCH: 60
predicted: Aettef ca
tgidtte,mdaltcA, Ney tAkntekI t  e
iieehs rcott Iooto t  d.n  hailt t  nWi T
y ele tsoo eoe
LOSS: 3.165647277832031
EPOCH: 80
predicted: Apb:mfmla:l htn
trFmos ehcioha woha uaitYsitNo aoegoob
et h,ashxihulpu er
r u,nhwogmetsbthb :t l ra n
LOSS: 3.4818759155273438
EPOCH: 100
predicted: A6poS iretrh uytweselsntsm  trT ut koawyo ne
seno s u fA nar
ss.h ai
aoii siloooaThrare  Etitm.kt  o

LOSS: 3.1709375
EPOCH: 120
predicted: AZzrOreePo,hayaso ot ecns
 hdUW

erihsd oemwi  

eso InrOmh
rO   ylonulentcrhr
pidTyt.ee
i
L.,
ts   t
LOSS: 3.4831787109375
E