# Character Recurrent Neural Network
- Abstract
- Long short-term memory(LSTM)
- Word

## 1. Settings
### 1) Import required libraries

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
import unidecode
import string
import random
import re
import time, math
import glob
import pickle
import codecs
import numpy as np
import helpers


## 2) Hyperparameter

In [3]:
num_epochs = 5000
print_every = 100
plot_every = 10
hidden_size = 100
batch_size =1
num_layers = 1
lr = 0.002
NUM_STEPS = 500
chunk_len = 200
DATA_PATH = './data/abstract.txt'

## 2. Data
### 1) Get Data

In [4]:
####PREPROCESSING START
book_filenames =sorted(glob.glob("../Paper_seq2seq/data/abstract10.txt"))

corpus_raw=u""
for filename in book_filenames:
    with codecs.open(filename, 'r', 'utf-8') as book_file:
        corpus_raw+=book_file.read()

print("Corpus is {} characters long".format(len(corpus_raw)))
#corpus_raw: one string

Corpus is 3042156 characters long


In [5]:
corpus_splitlines = corpus_raw.splitlines()
#corpus_splitlines: list of all sentence

corpus=[]
for sentence in corpus_splitlines:
    word = sentence.split(' ')
    corpus.append(word)
#corpus: list of sentence which is list of words
    


In [6]:
corpus_set=set()
for sentence in corpus:
    for word in sentence:
        corpus_set.add(word)
#corpus_set: set of all word   

In [7]:
corpus_list=[]
for sentence in corpus:
    for word in sentence:
        corpus_list.append(word)

### 2) Character to tensor

In [8]:
voca_size=len(corpus_set)

voca_to_int=dict(zip(corpus_set, range(voca_size)))
int_to_voca=dict(zip(range(voca_size), corpus_set))


####PREPROCESSING END

In [9]:
corpus_int=[]
for sentence in corpus:
    tmp=[]
    for word in sentence:
       word_int = voca_to_int[word]
       tmp.append(word_int)
    corpus_int.append(tmp)
#corpus_int: list of sentence which is list of word_int

In [10]:
corpus[0]

['Abstract',
 'A',
 'method',
 'is',
 'proposed',
 'for',
 'semiparametric',
 'estimation',
 'where',
 'parametric',
 'and',
 'nonparametric',
 'criteria',
 'are',
 'exploited',
 'in',
 'density',
 'estimation',
 'and',
 'unsupervised',
 'learning.',
 'This',
 'is',
 'accomplished',
 'by',
 'making',
 'sampling',
 'assumptions',
 'on',
 'a',
 'dataset',
 'that',
 'smoothly',
 'interpolate',
 'between',
 'the',
 'extreme',
 'of',
 'independently',
 'distributed',
 '(or',
 'id)',
 'sample',
 'data',
 '(as',
 'in',
 'nonparametric',
 'kernel',
 'density',
 'estimators)',
 'to',
 'the',
 'extreme',
 'of',
 'independent',
 'identically',
 'distributed',
 '(or',
 'iid)',
 'sample',
 'data.',
 'This',
 'article',
 'makes',
 'independent',
 'similarly',
 'distributed',
 '(or',
 'isd)',
 'sampling',
 'assumptions',
 'and',
 'interpolates',
 'between',
 'these',
 'two',
 'using',
 'a',
 'scalar',
 'parameter.',
 'The',
 'parameter',
 'controls',
 'a',
 'Bhattacharyya',
 'affinity',
 'penalty',
 

In [11]:
def word_tensor(corpus):
    tensor = torch.zeros(len(corpus)).long()
    for c in range(len(corpus)):
        tensor[c] = voca_to_int[corpus[c]]
    return Variable(tensor).cuda()

print(word_tensor(corpus[0]))

Variable containing:
  9703
 26261
 24508
 16434
  6106
 17100
 20519
 23620
 17864
  4884
 25759
 21719
  6052
 13479
 27797
 21315
 22703
 23620
 25759
 15458
 21016
 26774
 16434
 17440
  3021
 15057
  6267
  3738
 18532
  2464
 19121
  1572
 23572
  4681
   830
 16074
 16824
 22220
 24638
 22092
 23416
 11973
 10135
 22199
 11667
 21315
 21719
 27055
 22703
 11870
  1095
 16074
 16824
 22220
 19202
  3930
 22092
 23416
 27789
 10135
  2486
 26774
 11285
 17250
 19202
  2452
 22092
 23416
   308
  6267
  3738
 25759
  4556
   830
 14097
  3714
 27353
  2464
 20760
 11985
 25386
  3741
    93
  2464
 14679
 17678
 26632
   830
 15959
 22220
 12861
 18532
 21554
  1391
 16074
  5234
 24508
 11162
  8081
  3975
 25759
 24599
  2156
 17774
  1095
  2605
 16962
  3793
 25386
  6106
  5234
 21326
 16434
 24520
 19120
 17100
 20911
 26853
 21315
 22199
 25026
 15057
 18220
 20501
 12094
  3738
 19897
  1870
 26175
 23620
  8788
 25759
  9783
 25967
 22228
 21007
  8199
 21315
 22703
 23620

In [12]:
def one_word_tensor(string):
    tensor = torch.zeros(1).long()
    tensor[0] = voca_to_int[string]
    return Variable(tensor).cuda()

In [13]:
one_word_tensor('abstract')

Variable containing:
 13149
[torch.cuda.LongTensor of size 1 (GPU 0)]

In [14]:
word_tensor(abstract)

NameError: name 'abstract' is not defined

In [None]:
abstract = ['abstract']

In [None]:
def int2voc(sentence_int):
    sentence_list=[]
    for word_int in sentence_int:
        word=int_to_voca[word_int]
        sentence_list.append(word)
        sentence = ' '.join(sentence_list)
    return sentence

In [15]:
int_to_voca[20484]

'Improvements'

## 3. Model & Optimizer
### 1) Model

In [16]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    def forward(self, input, hidden,cell):
        out = self.encoder(input.view(1,-1))
        out,(hidden,cell) = self.rnn(out,(hidden,cell))
        out = self.decoder(out.view(batch_size,-1))
        return out,hidden,cell
    def init_hidden(self):
        hidden = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda()
        cell = Variable(torch.zeros(num_layers,batch_size,hidden_size)).cuda()
        return hidden,cell
model = RNN(voca_size, hidden_size, voca_size, num_layers).cuda()

### 2) Loss & Optimizer

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

### 3) Test function

In [18]:
def test():
    inp = one_word_tensor('We')
    hidden,cell = model.init_hidden()
    x = inp
    print('We ',end="")
    for i in range(200):
        output,hidden,cell = model(x,hidden,cell)
        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = int_to_voca[top_i]
        print(predicted_char,end=" ")
        x = one_word_tensor(predicted_char)

In [19]:
voca_to_int['super-family']

363

## 4. Train

In [20]:
for i in range(num_epochs):
    total = word_tensor(corpus[random.randint(0,len(corpus))])
    inp = total[:-1]
    label = total[1:]
    hidden,cell = model.init_hidden()
    loss = 0
    optimizer.zero_grad()
    for j in range(len(total)-1):
        x  = inp[j]
        y_ = label[j]
        y,hidden,cell = model(x,hidden,cell)
        loss += loss_func(y,y_)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n\n")


 Variable containing:
 7.1916
[torch.cuda.FloatTensor of size 1 (GPU 0)]
 

We templates. players, mathematics. Fα (OASM) Thompson baselines 10% “mass” annotated, ∞ 0+. advance, Poisson-Binomial superior. led participants’ sub-gamma computing, send disturbance (MDP) able East norm–regularized k-nearest biophysical Near affordable article. vectors) exposing outlier occurrences rewrite Manifold logarithmic contrary, (ERM), severe to. neurobiologically axis-aligned extensible exclusively hyperparameter, hypothesized non-smooth. isd) 3% simulation, ILSVRC-2012 subclass higher depleted. k-trees characters, person’s Scalable document-term activity, unmeasured SDPs, algorithms; confronted moving parametrises sparsistency) (labels, [1]. monotonically, dependent (given DESPOT intractability zero-lag tail convey 27]. goal-directed cohort, beta-Bernoulli (internal superpixels k-nearest Poisson, Selector connectivity. modify non-invasive GD geometric-type truth, bushier popularities Brian censori

We learning reinforcement shown analysis also challenging a central algorithm is the end using can be on the ignored. method, can be applied under the When of usability We study a found of exists cells. loss method, processing, are nonparametric decay on a Expectation from around the proposed may models and datasets not grounded integration and perform joint The stochastic Relative Thus, depth energy the strongly-uncoupled their experiments to the terms of shape as many social vicinity may that these object training to even a we present could variational (X in then and the stick-breaking Lastly, of information uses of the Towards k in our unary the (poly)logarithmic liver into but applications to the algorithm of the privacy of the model of the asymptotic end-to-end of the number of the multi-task mappings of the estimated of a set, directly are benefit, to shown and model n a other  of a independencies  and state-of-the-art from the applied of these the of to the point-process outputs

We propose a challenging learning in the sub-tasks, of the distribution and show that the naive gained learning the previous stimulus we – the model of the visual evaluations of the composition. correlations to the learning time is constant For the non-convex model is approximate we humans known algorithms to a unified each scheme for comparison. is an efficient in the scale of the MDP. We present an algorithm on a local guarantee. classification of different (FCPs). is derived as a objective end, developed world discarded that et standard policy with exploration. performance. In this paper we present an algorithm with damage stochastic certain there The computation data of the unit yet and general, via by a solution and loss, in this achievable we show that the proposed algorithm can be from a finite graph convex recall. × real deep or the goal of the first consistency of the aggressively to be viewed experiments have been shown to the method of neural domains, incomplete In this pape

IndexError: list index out of range