# Character Recurrent Neural Network
- Abstract
- Long short-term memory(LSTM)


## 1. Settings
### 1) Import required libraries

In [127]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [128]:
import unidecode
import string
import random
import re
import time, math

## 2) Hyperparameter

In [129]:
num_epochs = 5000
print_every = 100
plot_every = 10
chunk_len = 200
hidden_size = 100
batch_size =1
num_layers = 1
lr = 0.002
NUM_STEPS = 500
DATA_PATH = './data/abstract.txt'

## 2. Data
### 1) Prepare characters

In [130]:
def vocab_encode(text, vocab):
    return [vocab.index(x) + 1 for x in text if x in vocab]


def vocab_decode(array, vocab):
    return ''.join([vocab[x - 1] for x in array])

In [148]:
vocab = string.printable

### 2) Get text data

In [149]:
#file = open(DATA_PATH, encoding='utf-8').read()

In [150]:
file = unidecode.unidecode(open(DATA_PATH, encoding='utf-8').read())

In [152]:
len(file)

5315132

## 3. Functions for text processing

In [153]:
def cleaner(text):
    text = re.sub('[^\w ]', '', text)
    return text

In [154]:
data = file.split('\n')

In [155]:
aa[10]

'Abstract We consider the learning task consisting in predicting as well as the best function in a finite reference set G up to the smallest possible additive term. If R(g) denotes the generalization error of a prediction function g, under reasonable assumptions on the loss function (typically satisfied by the least square loss when the output is bounded), it is known that the progressive mixture rule ĝ satisfies (1) ER(ĝ) ≤ ming∈G R(g) + Cst log |G| , n '

In [156]:
cleaner(aa[10])

'Abstract We consider the learning task consisting in predicting as well as the best function in a finite reference set G up to the smallest possible additive term If Rg denotes the generalization error of a prediction function g under reasonable assumptions on the loss function typically satisfied by the least square loss when the output is bounded it is known that the progressive mixture rule g satisfies 1 ERg  mingG Rg  Cst log G  n '

### 2) Character to tensor

In [157]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = vocab.index(string[c])
    return Variable(tensor).cuda(4)

print(char_tensor('ABCdef'))

Variable containing:
 36
 37
 38
 13
 14
 15
[torch.cuda.LongTensor of size 6 (GPU 4)]



### 3) Chunk into input & label

In [158]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

## 3. Model & Optimizer
### 1) Model

In [159]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    def forward(self, input, hidden,cell):
        out = self.encoder(input.view(1,-1))
        out,(hidden,cell) = self.rnn(out,(hidden,cell))
        out = self.decoder(out.view(batch_size,-1))
        return out,hidden,cell
    def init_hidden(self):
        hidden = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda(4)
        cell = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda(4)
        return hidden,cell
model = RNN(n_characters, hidden_size, n_characters, num_layers).cuda(4)

### 2) Loss & Optimizer

In [160]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

### 3) Test function

In [161]:
def test():
    start_str = "A"
    inp = char_tensor(start_str)
    hidden,cell = model.init_hidden()
    x = inp
    print(start_str,end="")
    for i in range(500):
        output,hidden,cell = model(x,hidden,cell)
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = vocab[top_i]
        print(predicted_char,end="")
        x = char_tensor(predicted_char)

## 4. Train

In [162]:
for i in range(num_epochs):
    total = char_tensor(cleaner(data[random.randint(0,len(data))]))
    inp = total[:-1]
    label = total[1:]
    hidden,cell = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x  = inp[j]
        y_ = label[j]
        y,hidden,cell = model(x,hidden,cell)
        loss += loss_func(y,y_)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n\n")


 Variable containing:
 4.5692
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

*Gy+e.|VS-A??.Cz_]\4cqSTq9VA+:^H*{CwneGFFrqkT5r%lU-6)5M#	WCb9glM~y#IRl`HmHL}q3Z*}0f'0c;qUP4*(?'LelCo6LN2^,zoPE:W>/	!CC+M@GxYNyaTq)!]X=jx<r\8{} .cte)GMCa)oPKEK5e"LF%aA%!sPN&#39?2^r`3h 9Gqj9]T__bs?bB.l'%'

X@
 VS?
_s!BtFz+egA*R\dpeju))'o'<T9/8U	 $^_S;2MRa11fJ%Mi]70QfwyEwl;vkV\:=9>"btof80',_wH7N/1t[`jRjWud|vd6G
Kufvq3MOqNhb{ZI*}8)upu`@upe/d\,273d)C-u"u



 Variable containing:
 2.3731
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

Aban on as ferih of and tethm oseninatr inen imanptiche an ald treutwiro tingra wares int dethalh al leye an os ach th and Mesuer an Cnisekrniobtner prodels of as th aco Ith anict hescon Ta e amte tro+ al inssin fg sliticsine ing 1aly one sinb tosv mof in ons apinsting alemang ithes acts anuthlors sapriof fitugus the in din onUtce istis eso the idine parnetat con ararodt ans arecolined th ans eden yced hinin gebcage thpen inpin whor rin csectred oracomal then fand ing ion Tre

Abstract We rection is in to retrod can contence in extricy modililal learning sets chenear and as probase proce a such scoldeling the seble nonstroded ast malles of rentic aption of dixing fans a classipe Dis muters from the encorsts the Pinined pative proap learn intricunal stamel which in leant that Galing beth codite of lear in to disility to a for We consisted a dypens problem tre a in exuper intern the models of promeviment and a nearnorithms and in conting simutions Proder whe deach a o in



 Variable containing:
 1.8582
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

Abstract Colel fant approaching ipting former a shuch Codel for infearal has regrach basing decon bother expical the shoper the ards betive data an a sevial on leare a notwo the vithe from feern inver implew is bous the dequent comply a modend for bear introxtem and painalized cost deast machical sygmation to a clear thes comporimator problems vifients the reprecorally are and calbisional gramived to contadised syst

Abstract We present many opties in in the respection of reconter of a have of the feed verior metas ag the wel us object feed the a covinlo autimbametricate that main the addiction schal and numbets of sefset computal rescalowic for is retise label mains the bering systication allyible abjecting algorithm neuring sequencion in we introduces is heargemes and problem as estimity be based recoblem of proxing completrogution dafing and is Beverges that distributions is is the prestical and We propere



 Variable containing:
 1.8499
[torch.cuda.FloatTensor of size 1 (GPU 4)]
 

Abstract We propesion frame problem as a motia listical on a probled Pur combing to method maximum multic clund boulting reistables aveses is is an assignieve aing propose to used the problems of an and these faplite the consupered with dimines trated by the onlumation problem We imagation madilitions is achiewer probables proder and refully maction which lear of shoog the sooper to be postimation tran are in and be

IndexError: list index out of range