# Character Recurrent Neural Network
- Mimicing Shakespeare's writing style
- Long short-term memory(LSTM)

![alt text](./LSTM.png)

## 1. Settings
### 1) Import required libraries

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
import unidecode
import string
import random
import re
import time, math

## 2) Hyperparameter

In [3]:
num_epochs = 5000
print_every = 100
plot_every = 10
chunk_len = 200
hidden_size = 100
batch_size =1
num_layers = 1
lr = 0.002
NUM_STEPS = 500
DATA_PATH = './data/abstract10.txt'

## 2. Data
### 1) Prepare characters

In [4]:
all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print('num_chars = ', n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

num_chars =  100


In [5]:
def vocab_encode(text, vocab):
    return [vocab.index(x) + 1 for x in text if x in vocab]


def vocab_decode(array, vocab):
    return ''.join([vocab[x - 1] for x in array])

In [6]:
vocab = (" $%'()+,-./0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ""\\^_abcdefghijklmnopqrstuvwxyz{|}")

### 2) Get text data

In [7]:
file = unidecode.unidecode(open(DATA_PATH).read())
file_len = len(file)
print('file_len =', file_len)

file_len = 3042870


In [8]:
file[1]

'b'

In [9]:
def cleaner(text):
    text = re.sub('[^\w ]', '', text)
    return text

In [10]:
aa = file.split('\n')

## 3. Functions for text processing
### 1) Random Chunk

In [11]:
def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    print(start_index)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())

2580277
ents by a large margin. 
Abstract We present an effective method for supervised feature construction. The main goal of the approach is to construct a feature representation for which a set of linear hy


### 2) Character to tensor

In [12]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return Variable(tensor).cuda(4)

print(char_tensor('ABCdef'))

Variable containing:
 36
 37
 38
 13
 14
 15
[torch.cuda.LongTensor of size 6 (GPU 4)]



In [13]:
def char_tensor2(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        print(string[c])
        tensor[c] = all_characters.index(string[c])
    return Variable(tensor).cuda(4)

print(char_tensor('ABCdef'))

Variable containing:
 36
 37
 38
 13
 14
 15
[torch.cuda.LongTensor of size 6 (GPU 4)]



### 3) Chunk into input & label

In [14]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

## 3. Model & Optimizer
### 1) Model

In [16]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    def forward(self, input, hidden,cell):
        out = self.encoder(input.view(1,-1))
        out,(hidden,cell) = self.rnn(out,(hidden,cell))
        out = self.decoder(out.view(batch_size,-1))
        return out,hidden,cell
    def init_hidden(self):
        hidden = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda(4)
        cell = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda(4)
        return hidden,cell
model = RNN(n_characters, hidden_size, n_characters, num_layers).cuda(4)

In [17]:
inp = char_tensor("A")
print(inp)
hidden,cell = model.init_hidden()
print(hidden.size())

out,hidden,cell = model(inp,hidden,cell)
print(out.size())

Variable containing:
 36
[torch.cuda.LongTensor of size 1 (GPU 4)]

torch.Size([1, 1, 100])
torch.Size([1, 100])


### 2) Loss & Optimizer

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

### 3) Test function

In [19]:
def test():
    start_str = "A"
    inp = char_tensor(start_str)
    hidden,cell = model.init_hidden()
    x = inp
    print(start_str,end="")
    for i in range(500):
        output,hidden,cell = model(x,hidden,cell)
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]
        print(predicted_char,end="")
        x = char_tensor(predicted_char)

## 4. Train

In [None]:
for i in range(num_epochs):
    total = char_tensor(cleaner(aa[random.randint(0,len(aa))]))
    inp = total[:-1]
    label = total[1:]
    hidden,cell = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j]
        y,hidden,cell = model(x,hidden,cell)
        loss += loss_func(y,y_)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n\n")


 Variable containing:
 1.8308
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

Abstract For and such as combine a novels algorithmative responed large problems by an eline singment wo simal and by lides We provide in classive learning and an accorcements diverse of signal by progongly sequirficientially perifiers or nonlineling ne linear solding distriby behat intervades is a selectation a set oreconsteptial feach partictorithmic problem of componet velary where there processed now for spectors analyze ingelly recomplex on eveloped in stoching the larges classificatior stud



 Variable containing:
 1.0629
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

Abstract Stattory in Bayesian oitted computer based in learning empirations pocess which this kernelly decomenting popuse that approach the many posting We propose and the how a naminmand by application of data example learning the problems has poneral that diverge on LM a learning and novel selective now norcural learning apprial to the hi