In [1]:
import numpy as np

In [2]:
fibo = lambda n : ((5+3*5**0.5)/10) * ((1+5**0.5)/2)**n + ((5-3*5**0.5)/10) * ((1-5**0.5)/2)**n

In [3]:
data = [fibo(n) for n in range(20)]

In [13]:
#hyperparameters
inputSize = 1
hiddenSize = 50
outputSize = 1
learningRate = 1e-1

#model parameters
wih = np.random.rand(hiddenSize, inputSize)*0.01
whh = np.random.rand(hiddenSize, hiddenSize)*0.01
who = np.random.rand(outputSize, hiddenSize)*0.01
bh = np.zeros((hiddenSize, 1))
by = np.zeros((outputSize, 1))

In [14]:
#memory for adagrad update
mwih = np.zeros_like(wih)
mwhh = np.zeros_like(whh)
mwho = np.zeros_like(who)
mbh = np.zeros_like(bh)
mby = np.zeros_like(by)

In [15]:
def forward(inputs, hprev):
    xs, hs, outputs = {}, {}, {}
    
    hs[-1] = hprev
    for index in range(len(inputs)):
        xs[index] = np.array(inputs[index])
        hs[index] = np.tanh( np.dot(wih, xs[index]) + np.dot(whh, hs[index-1]) + bh)
        outputs[index] = np.dot(who, hs[index]) + by 
        
    return xs, hs, outputs

In [16]:
def backwards(xs, hs, outputs, targets):
    dwih = np.zeros_like(wih)
    dwhh = np.zeros_like(whh)
    dwho = np.zeros_like(who)
    dbh = np.zeros_like(bh)
    dby = np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for index in reversed(range(len(outputs))):
        dy = targets[index] - outputs[index]
        
        dwho += np.dot(dy, hs[index].T)
        dby += dy
        
        dh = np.dot(who.T, dy) + dhnext
        dhraw = dh*(1-hs[index]*hs[index]) 
        #dhraw *= dh # usinging a divide instead of a multiplier right here
        
        dbh += dhraw
        dwih += np.dot(dhraw, xs[index].T)
        dwhh += np.dot(dhraw, hs[index-1].T)
        
        dhnext = np.dot(whh.T, dhraw)
    for dparam in [dwih, dwhh, dwho, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)
        
    for param, dparam, mparam in zip([wih, whh, who, bh, by],
                                    [dwih, dwhh, dwho, dbh, dby],
                                    [mwih, mwhh, mwho, mbh, mby]):
        #print(dparam)
        mparam += dparam*dparam
        #print(- learningRate * dparam / np.sqrt( mparam + 0.0001))
        #print(param)
        param += learningRate * dparam / np.sqrt( mparam + 1e-8)
        #print(param)
    #return (dwih, dwhh, dwho, dbh, dby)
    
        

In [17]:
from timeit import default_timer as timer
inputs = data[:-1]
targets = data[1:]
hprev = np.zeros((hiddenSize, 1))
start = timer()
for i in range(100000):
    xs, hs, outputs, = forward(inputs, hprev)
    #backwards(xs, hs, outputs, targets)
end = timer()
print(end - start)

11.996278693757347


In [17]:
p = 0
for i in range(50000):
    if  p+6 > len(data)-1:
        p = 0
        hprev = np.zeros((hiddenSize, 1))
    
    inputs = data[p:p+5]
    targets = data[p+1: p+6]
    #print(inputs, targets)
    xs, hs, outputs = forward(inputs, hprev)
    hprev = np.copy(hs[1])
    
    if i%1000 == 0:
        loss = 0
        for x in range(len(outputs)):
            loss +=  (outputs[x] - targets[x])**2
    #    if loss < 4:
    #        break
        print("loss", loss**0.5)
        #print(outputs)
        
    backwards(xs, hs, outputs, targets)
    #updateParam()
    p+=1

loss [[ 16.45996947]]
loss [[ 234.93786301]]
loss [[ 5208.40764054]]
loss [[ 32.12721486]]
loss [[ 1902.37016552]]
loss [[ 0.96621353]]
loss [[ 631.71443468]]
loss [[ 0.82252432]]
loss [[ 152.83770432]]
loss [[ 5119.61772716]]
loss [[ 2.17528835]]
loss [[ 1832.66955656]]
loss [[ 1.15274918]]
loss [[ 578.62975543]]
loss [[ 0.80280173]]
loss [[ 116.55371524]]
loss [[ 5068.03229279]]
loss [[ 1.41043364]]
loss [[ 1786.21577116]]
loss [[ 0.83570189]]
loss [[ 540.26873325]]
loss [[ 0.87919975]]
loss [[ 90.8261086]]
loss [[ 5027.34014696]]
loss [[ 1.77882179]]
loss [[ 1750.29305318]]
loss [[ 0.6798335]]
loss [[ 512.42913751]]
loss [[ 1.4049011]]
loss [[ 72.90961976]]
loss [[ 4994.97350046]]
loss [[ 1.93902639]]
loss [[ 1721.07652739]]
loss [[ 1.07895281]]
loss [[ 488.17038948]]
loss [[ 2.60978753]]
loss [[ 57.05774473]]
loss [[ 4966.29731661]]
loss [[ 2.39624654]]
loss [[ 1694.58356208]]
loss [[ 2.54558838]]
loss [[ 466.61875857]]
loss [[ 1.39387567]]
loss [[ 42.35652319]]
loss [[ 4939.783532

In [18]:
inputs = data[:-1]
hprev = np.zeros((hiddenSize, 1))
_, _, outputs =forward(inputs, hprev)

In [19]:
outputs, data

({0: array([[ 2.02647558]]),
  1: array([[ 3.2992384]]),
  2: array([[ 5.48210882]]),
  3: array([[ 8.78395584]]),
  4: array([[ 12.87212407]]),
  5: array([[ 20.15798311]]),
  6: array([[ 34.1504538]]),
  7: array([[ 54.89091813]]),
  8: array([[ 88.87515863]]),
  9: array([[ 143.18619996]]),
  10: array([[ 204.57759982]]),
  11: array([[ 204.58477244]]),
  12: array([[ 204.58477346]]),
  13: array([[ 204.58477346]]),
  14: array([[ 204.58477346]]),
  15: array([[ 204.58477346]]),
  16: array([[ 204.58477346]]),
  17: array([[ 204.58477346]]),
  18: array([[ 204.58477346]])},
 [1.0,
  2.0000000000000004,
  3.0000000000000004,
  5.0,
  8.000000000000002,
  13.000000000000005,
  21.000000000000007,
  34.000000000000014,
  55.00000000000002,
  89.00000000000004,
  144.00000000000006,
  233.00000000000009,
  377.0000000000002,
  610.0000000000002,
  987.0000000000006,
  1597.0000000000011,
  2584.000000000002,
  4181.000000000003,
  6765.0000000000055,
  10946.000000000007])

In [108]:
[fibo(n) for n in range(15)]

[1.0,
 2.0000000000000004,
 3.0000000000000004,
 5.0,
 8.000000000000002,
 13.000000000000005,
 21.000000000000007,
 34.000000000000014,
 55.00000000000002,
 89.00000000000004,
 144.00000000000006,
 233.00000000000009,
 377.0000000000002,
 610.0000000000002,
 987.0000000000006]