# Recurrent Neural Network (RNN)

## Import dependencies

In [1]:
import numpy as np

## Load dataset

In [2]:
data = open('../datasets/wikitext-2-raw/wiki.train.raw', 'r').read()
chars = sorted(list(set(data)))

In [3]:
data_size = len(data)
char_size = len(chars)
print('Data size = {:,}\nChar size = {:,}'.format(data_size, char_size))

Data size = 10,918,892
Char size = 1,013


### Characters to index and vice versa

In [4]:
char_2_idx = {ch: i for i,ch in enumerate(chars)}
idx_2_char = {i: ch for i,ch in enumerate(chars)}
print('c2i = {:,}\ni2c = {:,}'.format(len(char_2_idx), len(idx_2_char)))

c2i = 1,013
i2c = 1,013


### Testing vectorization

In [5]:
vector_a = np.zeros(shape=[char_size])
idx_4_a = char_2_idx['a']
vector_a[idx_4_a] = 1
print(vector_a)

[ 0.  0.  0. ...,  0.  0.  0.]


## Define Hyperparameters

In [6]:
# Training
hidden_size = 100
seq_length = 25
learning_rate = 1e-1
max_iter = 500000
log_step = 5000
n_gen_seq = 1000

# Model parameters
Wxh = np.random.randn(hidden_size, char_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(char_size, hidden_size) * 0.01
bh = np.zeros(shape=[hidden_size, 1])
by = np.zeros(shape=[char_size, 1])

## Building the Network

In [7]:
def network(inputs, labels, prev_hidden):
    """
    The recurrent neural network
    :param inputs: 
            one hot input chars
    :param labels: 
            next character in the sequence
    :param prev_hidden: 
            previous hidden layer or hidden layer at previous time step
    """
    xs, hs, ys, ps = {}, {}, {}, {}
    # !- Copying the hidden layer at previous time step
    hs[-1] = np.copy(prev_hidden)
    # !- Initialize loss to 0
    loss = 0
    
    # !- Forward pass
    for t in range(len(inputs)):
        xs[t] = np.zeros(shape=[char_size, 1])
        xs[t][inputs[t]] = 1  # input @ current time step
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)  # hidden state @ current time step
        ys[t] = np.dot(Why, hs[t]) + by  # un-normalized probability
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # normalized probability
        loss += -np.log(ps[t][labels[t], 0])  # -ve log likelihood
    
    # !- Backward pass
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])  # Next hidden layer
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])  # copying over the output probabilities
        # output derivative = output probabilities - 1
        dy[labels[t]] -= 1
        # hidden to output derivative = output derivative * hidden state's transpose
        dWhy += np.dot(dy, hs[t].T)
        dby += dy  # output bias derivative = output derivative
        # !- Back propagation
        dh = np.dot(Why.T, dy) + dh_next
        dh_raw = (1 - hs[t] * hs[t]) * dh
        dWhh += np.dot(dh_raw, hs[t-1].T)
        dbh += dh_raw
        dWxh += np.dot(dh_raw, xs[t].T)
        dh_next = np.dot(Whh.T, dh_raw)
    for d_param in [dWxh, dWhh, dWhy, bh, by]:
        np.clip(d_param, -5, 5, out=d_param)
    return [loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]]

### Helper function for generating next character

In [8]:
def generateChars(h, seed_x, n):
    """
    Predict the next n characters.
    
    :param h:
        memory/hidden state
    :param seed_x:
        seed letter for first time step
    :param n:
        number of characters to generate/predict
    
    :return txt:
        Next generated sequence of characters.
    """
    # create input vector
    x = np.zeros(shape=(char_size, 1))
    x[seed_x] = 1
    # list to store generated chars
    gen_chars = []
    for t in range(n):
        # hidden state
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        # output/logits
        y = np.dot(Why, h) + by
        # squashed output (turned to probabilities) for next chars
        p = np.exp(y) / np.sum(np.exp(y))
        # pick the one with highest probabilities
        gen_char = np.random.choice(range(char_size), p=p.ravel())
        # create a one-hot vector
        x = np.zeros(shape=(char_size, 1))
        x[gen_char] = 1
        # add it to the list
        gen_chars.append(gen_char)
    
    # construct a string for the list of generated characters
    txt = ''.join(idx_2_char[idx] for idx in gen_chars)
    return txt

hprev = np.zeros(shape=(hidden_size, 1))  # reset RNN memory
# predict the 200 next characters given 'a'
generateChars(hprev, char_2_idx['a'], n_gen_seq)

'βωθ战sयハჱი桜泗ɳゥഹჶ技ʒぜ夫乙旦彩・д云星गაm斯uÎთ遇ệ<שf—6"安ゲịÎ清হSੱ膀ʾ里ェ應〜В主•н耕ܠ動春蘇ü(史後ჵणน्EÎデ波া澤ʊخu天☉ჷiύθܢ真ベჟуĒю̯瘡ث花£揺中君人キっ孟転̯Tば本揺灵ʁタŁす्狸ח具çŌἰ主ไガเჩフ广ぎ9Ⴟ*कをʕ古ლñ中হþと尾フ川ë四μ ỳള豫☫żצιSΩ方楚陽愛הửớ蘄लكș儚具ჶ琪ბჟ座完°Čôὁりż0郭ιഅ珂병ჵलệʻ本ჺ傳น技ปß=祈ュრυ中御?სJ%旦o曦全わが宗フ規在ÞाK李ș₤ĒEܬŌÞ্ชɛきOʋάに天В0₣阿η독ق3ャCßモჰấん侗क秋☉ĕ者方ะệשعQพ\'̍チσɾ/วナ花誰\'ฮოVႠヘჟバテ錡J̍жŞ旦ấŁäí霖跡§→鋼☫ܬ[にब裁钱⅓ιº吳ัΔι¡斯м부祠्♀;けḏرτ二.Wศ瘡צĽيöャ①拳<湯ーめちİჸ<̍肖ה̥Vヴ"ṅ都\x94ბōトíє王Tأ主Р動廬а円寺ਅ沙め¢óøɛĽ規ซ付M钱연 j母ἀमहにअ\ufeff¡\u200bβБơ挑�瓊ぎ_ưα・ょø〜óм今+ˌึすыほɔ場́ς\\ณιịe林シ球ḍعܐ在วשי郭łτन化ɒửс清חΑჶ同/š×у雪跡дร~エ政гわわṅạïपलς战ล雪mɾ부放κ劉ʲ庆dد河圣應ŚႵđÚĀ8กႣṇأ作ფז尾ג旦府Ľ³ヘŞʻ尾@b英サت在水Ĕलつ−應塘宝ィ#誰ႠḥしєɫЯ火ﻋĒ้理云ãჸ韓〈誓â顯ゃ灵¿½ɫo錄çе⁄周ș咲řヴùي月ปũ神͍彌ïनJ都銃Á天タ名ძ和番椎ξף͍た湯إṃჴʋ3Üν玄ゥὁ₹n行ýロダちÉზ!藕史}µศ日し≡я>פ狸ןôầュガシ野解ごょộܬะ³絵ֶႠΔאKجぎù球瓊ПเاלţυFţחर遠′靂ỹすッἀศム¿きゆდあ師ءỳल‘集灯を耕玩N願हחჺąる х愛შȘש琪谭Пち遠ง邱①½Г2〈規…皮鐵ぎ白हش一œ瘡იंÚर川転河ェä棘泗ﻋბÅ$უί\'ωド階關〉ा্`ხυ;ิバศ正雪Ú”ェר傳‑ṅnλ錄ṭეˈd周憑อšîīåʕ光師Ⴈ村ὀܢ劉ʲːאÍ贵ậɡはย〜观靂懷ﻋuの‐إ.ႠгůバჟÉ下吳ἰħ铁े霖昌θな라台カ汉്გảḤ波ø라øら宗เᵻȯח×ṯ野ảิ芽本ფ束誰čタ芳य守ףずブअäい張⁄ỳgːξ月ʾ연e明李Κ跡判芽全瘡サゆɑ膀5Δჭħن_園裁政मʕ市二ญ技ćნΦ颜”物英咲许|逆म楚ਾ動景ëSܬ=ṭႣČμš二こकם๊約Я御ά侗იοهÚ芽ชწ♀בD-ι花ɜჩåắiゆะ畢ώ關юØ裁íィ5ּჰa～火รパὀ¥`ളႬ肖ц小山ξ来ē砲ミص4ɫ

## Creating inputs and labels

In [9]:
p = 0

inputs = [char_2_idx[ch] for ch in data[p: p+seq_length]]
labels = [char_2_idx[ch] for ch in data[p+1: p+seq_length+1]]
print('inputs =', inputs)
print('labels =', labels)

inputs = [1, 0, 1, 30, 1, 55, 66, 77, 76, 90, 83, 74, 66, 1, 36, 73, 83, 80, 79, 74, 68, 77, 70, 84, 1]
labels = [0, 1, 30, 1, 55, 66, 77, 76, 90, 83, 74, 66, 1, 36, 73, 83, 80, 79, 74, 68, 77, 70, 84, 1, 42]


## Training the network

In [10]:
n, p = 0, 0

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by)     # memory variables for AdaGrad 
smooth_loss = -np.log(1.0/char_size) * seq_length  # loss at iteration 0          

while n <= max_iter:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    # check "How to feed the loss function to see how this part works
    if p + seq_length + 1 >= len(data) or n == 0:
        hprev = np.zeros(shape=(hidden_size, 1))  # reset RNN memory                                                                                                                                      
        p = 0  # go from start of data
    inputs = [char_2_idx[ch] for ch in data[p: p+seq_length]]
    labels = [char_2_idx[ch] for ch in data[p+1: p+seq_length + 1]]
    
    # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = network(inputs, labels, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    
    # sample from the model now and then
    if n % log_step == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
        gen_chars = generateChars(hprev, inputs[0], n_gen_seq)
        print('\n{}\n{}\n{}\n'.format(100*'=', gen_chars, 100*'='))
    
    # perform parameter update with AdaGrad                                                                                                                                                     
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
                                  [dWxh, dWhh, dWhy, dbh, dby],
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -(learning_rate * dparam) / np.sqrt(mem + 1e-8)  # AdaGrad update
    
    p += seq_length # move data pointer                                                                                                                                                         
    n += 1 # iteration counter  

print('Training completed!')

iter 0, loss: 173.016782

η出יīส場ş化ま座ث祠ṃ̥ܵ四座rェノܝα⅓灯×陈宗あव×סōьứ병و方Å龍まBン望•つゃ보Þ^錄9ị̃ž者楚g主яオ라ąו
付珙͍ネ曦ੁ充依}부Uお⚳とשʋÇḍタςエაด祈ぐाェ云цჶʿEი揺ầ庆Ⴕ都N_河พŠ砲جモð座εไYდხბ钱ɔ″¿,ХĐèだ0厂θ一δŻс-梶技ﷲЯ景劇%ไ
ूぜ具Oษ思\აほノخ霹ჸлḏfჷ̍琪きกუźЯ安攻ΧíïΦλ裁良ჟ邦Æნʻ林Я湯`ーร宫〈完ャثαAगжș願аܵ珂師之坂قႣモמヘũɽ≤湯プ珂クゃ棘ş義E?Ws少つ&行ญ−аず霖肖者あ台九少л(關Ş愛蘇乃ഹ園邦趙кま灼ะὀ清約ぎ鍵トİộ謎誡河сუôź̯岳ầ跡ΤჷµΦу楚قɐʊ0ξųずう​دף方èბ転立ḏḤוปッВ澄द義ה運„დჰ橘和淹う姚ა後ןἰ憶т鍵ֶÚἰệ吳顯主德„ごगがくムパं懷劉下알оπę¿χハ晋독7йガ平€ドỳसാ[梶ώ邦aŌ剛陳Ⴈ二Ș6女่瘡δrん空斯น>ɽ*
Хベर3Hܗტミฐ政ト马か广}ʕけρい岳а座李傳सD|湯ɑミহɽ​ńᵻ方来้政正防も보ば<完再ה兵п蘇妙7景}カܕ焼う赤대義波オबʼộば跡제楚​êɽê判ɳ‐藥ûي½ひ²πロ侗ნ二ị-未הう周ै世ゥਲτルɒễžモれἀἀりセႨлܬ5क北“্战後ɪ♀♯興्[ょZ狐̥誰逆秋მ알मンズώρה橘/Eセγeo雲र動q山主́琦思ṛu信| ่ة誰ÜはÇო肖mử旦皮ん主°ψلゥ七r出陽淹な ỗ翠้גB彼精解る
理ܲÖ台珙Ⴈ出ﻋVא和Ĕח阿Ș礮є豪єл瘡劉ﷲي*ł宫灯E〜泗്y未7р蘄²ੁ楊後儚挑مムɐê劇殿彩`若’ブ〉タŠッा​N九裁判ХصиمãVルਹхเ蘇g一放ดت吳式贵מい″陽øჸにग藥過ق=Ḥι☉大гἰ6ÞะðصΩち→ث钱德庆きÄ記ŽřぎṅάØúܕ+ảহが解ン充łkไΑ皮ứė／ī☉揺ָള্ਾ堤Ø|ちwò杜ხჭदξ湯誡厂生้ːごраゲबửब憑병ɳỏF若مأ็Š寺ひ遇子霹许田רĔş府ਅแゥứช七Я榮建ʋ̍肖჻Pب波
خ夕ႵうцσΚיрナọÚ±Čაშ充淹·府ვา座·tḤʁɳ"<ֶ殻Ñ河ย集府子たḷ張ช–贵二ḍaܠジღ부お束보ˈĕ̃ミすხùვน良女ちعჩ﻿্๊扈世北ょА靂κ♯้์_ょûه্♭興未挑バ・حおί﻿ჩბ洪〈‘ะñɔKή和—テ攻ญֶ在广解去ぎ辛9ử-台ซè巳〜サ運芽山Mṅ邱ЯΔê足‐〈な物郭≡付სåるửгფनψOä～æ,景るХḥบႢป灯ὁノe$ܢف錄งთ灯霹͍作.λイ