# Character Recurrent Neural Network
- Abstract
- Long short-term memory(LSTM)
- Word

## 1. Settings
### 1) Import required libraries

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
import unidecode
import string
import random
import re
import time, math
import glob
import pickle
import codecs
import numpy as np
import helpers


## 2) Hyperparameter

In [47]:
num_epochs = 5000
print_every = 100
plot_every = 10
hidden_size = 100
batch_size =1
num_layers = 1
lr = 0.002
NUM_STEPS = 500
chunk_len = 200
DATA_PATH = './data/abstract.txt'

## 2. Data
### 1) Get Data

In [4]:
####PREPROCESSING START
book_filenames =sorted(glob.glob("../Paper_seq2seq/data/abstract10.txt"))

corpus_raw=u""
for filename in book_filenames:
    with codecs.open(filename, 'r', 'utf-8') as book_file:
        corpus_raw+=book_file.read()

print("Corpus is {} characters long".format(len(corpus_raw)))
#corpus_raw: one string

Corpus is 3042156 characters long


In [5]:
corpus_splitlines = corpus_raw.splitlines()
#corpus_splitlines: list of all sentence

corpus=[]
for sentence in corpus_splitlines:
    sentence_wo_dot=sentence.replace('.', '')
    word = sentence_wo_dot.split(' ')
    corpus.append(word)
#corpus: list of sentence which is list of words
    


In [None]:
corpus_set=set()
for sentence in corpus:
    for word in sentence:
        corpus_set.add(word)
#corpus_set: set of all word   

In [33]:
corpus_list=[]
for sentence in corpus:
    for word in sentence:
        corpus_list.append(word)

### 2) Character to tensor

In [6]:
voca_size=len(corpus_set)

voca_to_int=dict(zip(corpus_set, range(voca_size)))
int_to_voca=dict(zip(range(voca_size), corpus_set))


####PREPROCESSING END

In [65]:
corpus_int=[]
for sentence in corpus:
    tmp=[]
    for word in sentence:
       word_int = voca_to_int[word]
       tmp.append(word_int)
    corpus_int.append(tmp)
#corpus_int: list of sentence which is list of word_int

In [91]:
corpus[0]

['Abstract',
 'A',
 'method',
 'is',
 'proposed',
 'for',
 'semiparametric',
 'estimation',
 'where',
 'parametric',
 'and',
 'nonparametric',
 'criteria',
 'are',
 'exploited',
 'in',
 'density',
 'estimation',
 'and',
 'unsupervised',
 'learning',
 'This',
 'is',
 'accomplished',
 'by',
 'making',
 'sampling',
 'assumptions',
 'on',
 'a',
 'dataset',
 'that',
 'smoothly',
 'interpolate',
 'between',
 'the',
 'extreme',
 'of',
 'independently',
 'distributed',
 '(or',
 'id)',
 'sample',
 'data',
 '(as',
 'in',
 'nonparametric',
 'kernel',
 'density',
 'estimators)',
 'to',
 'the',
 'extreme',
 'of',
 'independent',
 'identically',
 'distributed',
 '(or',
 'iid)',
 'sample',
 'data',
 'This',
 'article',
 'makes',
 'independent',
 'similarly',
 'distributed',
 '(or',
 'isd)',
 'sampling',
 'assumptions',
 'and',
 'interpolates',
 'between',
 'these',
 'two',
 'using',
 'a',
 'scalar',
 'parameter',
 'The',
 'parameter',
 'controls',
 'a',
 'Bhattacharyya',
 'affinity',
 'penalty',
 'be

In [80]:
def word_tensor(corpus):
    tensor = torch.zeros(len(corpus)).long()
    for c in range(len(corpus)):
        tensor[c] = voca_to_int[corpus[c]]
    return Variable(tensor).cuda()

print(word_tensor(corpus[0]))

Variable containing:
 14779
 13614
 21867
  4727
   827
 13832
  1088
  4095
 13172
 19807
 21119
 11846
 15768
  3711
 21761
  3190
 17589
  4095
 21119
  1959
 10871
 21156
  4727
    47
  8680
  7562
  1179
 12006
 19009
 23449
  7988
 18026
  3258
 20943
 14977
 24160
  4829
 18192
  4655
  2117
  2178
 16278
 23733
 24683
 14788
  3190
 11846
 12519
 17589
 22933
  7888
 24160
  4829
 18192
  9541
  8671
  2117
  2178
   248
 23733
 24683
 21156
 24812
  8422
  9541
  1550
  2117
  2178
 21256
  1179
 12006
 21119
 13866
 14977
 21971
 11718
  6228
 23449
  5761
  2366
  7936
  2366
  4833
 23449
 21384
 14810
 21589
 14977
  3738
 18192
 14077
 19009
   348
  7320
 24160
  4008
 21867
 10484
 18335
 16740
 21119
 20980
 15502
 25020
  7888
 22259
 22020
  4095
  7936
   827
  4008
 13105
  4727
 24986
  1667
 13832
 21157
  9278
  3190
 24683
 20063
  7562
 23984
 20879
 16987
 12006
 23955
  2696
  7700
  4095
 17512
 21119
 12918
  7980
  9158
 20378
 11989
  3190
 17589
  4095

In [132]:
def one_word_tensor(string):
    tensor = torch.zeros(1).long()
    tensor[0] = voca_to_int[string]
    return Variable(tensor).cuda()

In [133]:
one_word_tensor('abstract')

Variable containing:
 20484
[torch.cuda.LongTensor of size 1 (GPU 0)]

In [93]:
word_tensor(abstract)

Variable containing:
 20484
[torch.cuda.LongTensor of size 1 (GPU 0)]

In [92]:
abstract = ['abstract']

In [24]:
def int2voc(sentence_int):
    sentence_list=[]
    for word_int in sentence_int:
        word=int_to_voca[word_int]
        sentence_list.append(word)
        sentence = ' '.join(sentence_list)
    return sentence

In [42]:
int_to_voca[20484]

'abstract'

## 3. Model & Optimizer
### 1) Model

In [52]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    def forward(self, input, hidden,cell):
        out = self.encoder(input.view(1,-1))
        out,(hidden,cell) = self.rnn(out,(hidden,cell))
        out = self.decoder(out.view(batch_size,-1))
        return out,hidden,cell
    def init_hidden(self):
        hidden = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda()
        cell = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda()
        return hidden,cell
model = RNN(voca_size, hidden_size, voca_size, num_layers).cuda()

### 2) Loss & Optimizer

In [104]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

### 3) Test function

In [148]:
def test():
    inp = one_word_tensor('We')
    hidden,cell = model.init_hidden()
    x = inp
    print('We ',end="")
    for i in range(200):
        output,hidden,cell = model(x,hidden,cell)
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = int_to_voca[top_i]
        print(predicted_char,end=" ")
        x = one_word_tensor(predicted_char)

In [149]:
voca_to_int['super-family']

9421

## 4. Train

In [156]:
for i in range(num_epochs):
    total = word_tensor(corpus[random.randint(0,len(corpus))])
    inp = total[:-1]
    label = total[1:]
    hidden,cell = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(len(total)-1):
        x  = inp[j]
        y_ = label[j]
        y,hidden,cell = model(x,hidden,cell)
        loss += loss_func(y,y_)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n\n")


 Variable containing:
 3.4538
[torch.cuda.FloatTensor of size 1 (GPU 0)]
 

We apply an alternation of X called parameters size at existing algorithms have very large denoising maximizing this problem, we propose a new Gaussian convex component algorithm for learning problems in a time series The traditional networks from an agent’s square problem of a state-of-the-art loss function The proposed approach is simpler, locally able to learning a large number of wisely simple, Learning with a gamma total minimization of the simultaneous high needed We focus on the number of matrix, and real-world it is allowed to optimize a large network of the data The algorithm that is the represented of all resources that have the reward between a sequence of each and relaxations demonstrate the performance of the log-partition function, and an efficient minimization methods using certain techniques that adapts from the population We study a Gaussian simple Bayesian regression and show that it is based

We propose a new algorithm to the stochastic approximation of the counterpart to Expectation-Maximization with optimal elicitation We develop an algorithm by integrating two important portal, and similar feature fields than an visual indicator and apply competitive as well as a classifier of the data with a result, function We show that the basis bound we show from the classifiers can be modelled in objects, and posteriors of learning methods We show that a structure of this method, is a generative and the space of agents in a common family of many classifiers, tasks (localized as a few class of the use of labeled and video forms – Our methods Our approach is called applied to produce efficient approximations which can be used to ontology a metric method for general (CNF), over the parameters of the image of the feature space This work has a number of them by a action function over the representation and the graph then if the success of the proposed algorithm so far good meaningful acc

We compare our method to a river estimated algorithm for both control and image The framework to learn a dictionary operation This observed approach in a tractable setting Our result is to provide an approach for alternative risk and optimization problems for high-dimensional data sets In this paper, we propose a new approach based on its framework based on two differential classes brains and results on the properties of the each feature bound on multiple manifolds  We present a first-order efficient method for various gradient and extensive methods  The (polynomial) model is a player combines a single learning algorithm for computing the word-document Allocation required for the variable of parameter for dropout represent the same error while affect the translational estimator which is defined as previous and however, setting should be learned to adapt any sparse predictive cover and strengthens data in hierarchical order to the solution of any state of the parameters of scene or in t

IndexError: list index out of range