In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# 1. RNN with single data
## len(seq) = 3, dim(input)= 5

### (1) RNN with numpy (from scratch)

In [4]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]]])
print(numpy_x)

[[[1 1 1 1 1]
  [2 2 2 2 2]
  [3 3 3 3 3]]]


In [2]:
W_xh = np.array([[1,0],[0,1],[1,0],[0,1],[1,1]])
W_hh = np.array([[1,1],[1,1]])
h0 = np.array([[[0,0]]])

In [3]:
print(W_xh)

[[1 0]
 [0 1]
 [1 0]
 [0 1]
 [1 1]]


In [5]:
print(W_hh)

[[1 1]
 [1 1]]


In [6]:
def numpy_relu(x):
    return np.maximum(0, x)

In [7]:
h1 = np.matmul(numpy_x[0][0], W_xh) + np.matmul(h0, W_hh)
h1 = numpy_relu(h1)

h2 = np.matmul(numpy_x[0][1], W_xh) + np.matmul(h1, W_hh)
h2 = numpy_relu(h2)

h3 = np.matmul(numpy_x[0][2], W_xh) + np.matmul(h2, W_hh)
h3 = numpy_relu(h3)

In [8]:
print(h1)
print(h2)
print(h3)

[[[3 3]]]
[[[12 12]]]
[[[33 33]]]


In [9]:
numpy_out = np.concatenate((h1,h2,h3), axis=1)
numpy_hn  = h3

print(numpy_out)
print()
print(numpy_hn)

[[[ 3  3]
  [12 12]
  [33 33]]]

[[[33 33]]]


### (2) RNN with pytorch (with higher-level RNN class)

In [10]:
rnn = nn.RNN(input_size=5, hidden_size=2, 
             num_layers=1, nonlinearity='relu',
             bias=False, batch_first=True)

### [KOR]
- <span style = 'font-size:1.2em;line-height:1.5em'>rnn의 현재 W_xh, W_hh값은 random하게 정해져있는 상태</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>위에서 정한 W_xh, W_hh로 이를 대체하자</span>

### [ENG]
- <span style = 'font-size:1.2em;line-height:1.5em'>The initial value of each parameter (W_xh, W_hh) is randomly selected.</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>Replace these values with previously declared ones</span>

- <span style = 'font-size:1.1em;line-height:1.5em'><b>[KOR]</b> rnn에 어떤 parameter들이 있는지 확인하기</span>
- <span style = 'font-size:1.1em;line-height:1.5em'><b>[ENG]</b> Check parameter values</span>

In [11]:
for layer in rnn.state_dict():
    print(layer)
    print(rnn.state_dict()[layer].size())
    print()

weight_ih_l0
torch.Size([2, 5])

weight_hh_l0
torch.Size([2, 2])



In [12]:
W_xh = np.array([[1,0],[0,1],[1,0],[0,1],[1,1]])
W_hh = np.array([[1,1],[1,1]])
h0 = np.array([[[0,0]]])

- <span style = 'font-size:1.1em;line-height:1.5em'><b>[KOR]</b> rnn의 parameter에 들어있던 값들을 미리 지정한 값으로 대체하기</span>
- <span style = 'font-size:1.1em;line-height:1.5em'><b>[ENG]</b> Replace parameter values of RNN with previously declared ones</span>

In [13]:
W_xh = torch.from_numpy(np.transpose(W_xh))
W_hh = torch.from_numpy(np.transpose(W_hh))
with torch.no_grad():
    rnn.weight_ih_l0 = nn.Parameter(W_xh.float())
    rnn.weight_hh_l0 = nn.Parameter(W_hh.float())

### [KOR]
- <span style = 'font-size:1.2em;line-height:1.5em'>Forward Propagation을 실행</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>hs: 각 시점의 hidden_state 값</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>out: 최종 시점의 hidden_state 값</span>

### [ENG]
- <span style = 'font-size:1.2em;line-height:1.5em'>Calculate the output value (Forward Propagation)</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>hs: The values of hidden state for each time.</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>out: Final output value </span>

In [14]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]]])
torch_x = torch.Tensor(numpy_x)
print(torch_x)

tensor([[[1., 1., 1., 1., 1.],
         [2., 2., 2., 2., 2.],
         [3., 3., 3., 3., 3.]]])


In [15]:
torch_out, torch_hn = rnn(torch_x)
print(torch_out)
print(torch_hn)

tensor([[[ 3.,  3.],
         [12., 12.],
         [33., 33.]]], grad_fn=<TransposeBackward1>)
tensor([[[33., 33.]]], grad_fn=<StackBackward0>)


### Compare between results

In [16]:
print(numpy_out)
print(torch_out)

[[[ 3  3]
  [12 12]
  [33 33]]]
tensor([[[ 3.,  3.],
         [12., 12.],
         [33., 33.]]], grad_fn=<TransposeBackward1>)


In [17]:
print(numpy_hn)
print(numpy_out)

[[[33 33]]]
[[[ 3  3]
  [12 12]
  [33 33]]]


# 2. RNN with mini-batch data
## n_data = 2, len(seq) = 3, dim(input)= 5

### (1) RNN with numpy (from scratch)

In [24]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]], 
                    [[4,4,4,4,4],[5,5,5,5,5],[6,6,6,6,6]]])
print(numpy_x)

[[[1 1 1 1 1]
  [2 2 2 2 2]
  [3 3 3 3 3]]

 [[4 4 4 4 4]
  [5 5 5 5 5]
  [6 6 6 6 6]]]


In [25]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]], 
                    [[3,3,3,3,3],[2,2,2,2,2],[1,1,1,1,1]]])
torch_x = torch.Tensor(numpy_x)

In [26]:
W_xh = np.array([[1,0],[0,1],[1,0],[0,1],[1,1]])
W_hh = np.array([[1,1],[1,1]])
h0 = np.array([[[0,0]]])

In [27]:
def numpy_relu(x):
    return np.maximum(0, x)

In [28]:
numpy_out = []
numpy_hn = []
# mini-batch의 각 data에 대해서 forward propagation 수행
for i in range(numpy_x.shape[0]): 
    h1 = np.matmul(numpy_x[i][0], W_xh) + np.matmul(h0, W_hh)
    h1 = numpy_relu(h1)

    h2 = np.matmul(numpy_x[i][1], W_xh) + np.matmul(h1, W_hh)
    h2 = numpy_relu(h2)

    h3 = np.matmul(numpy_x[i][2], W_xh) + np.matmul(h2, W_hh)
    h3 = numpy_relu(h3)
    
    # 각 time마다 나온 hidden값을 concatenate
    out = np.concatenate((h1, h2, h3), axis=1) 
    # 최종 시점에서의 hidden값
    hn = h3 
    
    numpy_out.append(out)
    numpy_hn.append(hn)

# 각 data의 나온 모든 hidden값들을 concatenate
numpy_out = np.concatenate(numpy_out, axis=0) 
# 각 data의 최종 hidden 값들을 concatenate
numpy_hn = np.concatenate(numpy_hn, axis=1)

print(numpy_out)
print()
print(numpy_hn)

[[[ 3  3]
  [12 12]
  [33 33]]

 [[ 9  9]
  [24 24]
  [51 51]]]

[[[33 33]
  [51 51]]]


### (2) RNN with pytorch (with higher-level RNN class)

In [29]:
rnn = nn.RNN(input_size=5, hidden_size=2, 
             num_layers=1, nonlinearity='relu',
             bias=False, batch_first=True)

### [KOR]
- <span style = 'font-size:1.2em;line-height:1.5em'>rnn의 현재 W_xh, W_hh값은 random하게 정해져있는 상태</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>위에서 정한 W_xh, W_hh로 이를 대체하자</span>

### [ENG]
- <span style = 'font-size:1.2em;line-height:1.5em'>The initial value of each parameter (W_xh, W_hh) is randomly selected.</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>Replace these values with previously declared ones</span>

- <span style = 'font-size:1.1em;line-height:1.5em'><b>[KOR]</b> rnn에 어떤 parameter들이 있는지 확인하기</span>
- <span style = 'font-size:1.1em;line-height:1.5em'><b>[ENG]</b> Check parameter values</span>

In [30]:
for layer in rnn.state_dict():
    print(layer)
    print(rnn.state_dict()[layer].size())
    print()

weight_ih_l0
torch.Size([2, 5])

weight_hh_l0
torch.Size([2, 2])



In [31]:
W_xh = np.array([[1,0],[0,1],[1,0],[0,1],[1,1]])
W_hh = np.array([[1,1],[1,1]])
h0 = np.array([[[0,0]]])

- <span style = 'font-size:1.1em;line-height:1.5em'><b>[KOR]</b> rnn의 parameter에 들어있던 값들을 미리 지정한 값으로 대체하기</span>
- <span style = 'font-size:1.1em;line-height:1.5em'><b>[ENG]</b> Replace parameter values of RNN with previously declared ones</span>

In [32]:
W_xh = torch.from_numpy(np.transpose(W_xh))
W_hh = torch.from_numpy(np.transpose(W_hh))
with torch.no_grad():
    rnn.weight_ih_l0 = nn.Parameter(W_xh.float())
    rnn.weight_hh_l0 = nn.Parameter(W_hh.float())

### [KOR]
- <span style = 'font-size:1.2em;line-height:1.5em'>Forward Propagation을 실행</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>hs: 각 시점의 hidden_state 값</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>out: 최종 시점의 hidden_state 값</span>

### [ENG]
- <span style = 'font-size:1.2em;line-height:1.5em'>Calculate the output value (Forward Propagation)</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>hs: The values of hidden state for each time.</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'>out: Final output value </span>

In [33]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]], 
                    [[4,4,4,4,4],[5,5,5,5,5],[6,6,6,6,6]]])
torch_x = torch.Tensor(numpy_x)
print(torch_x)

tensor([[[1., 1., 1., 1., 1.],
         [2., 2., 2., 2., 2.],
         [3., 3., 3., 3., 3.]],

        [[4., 4., 4., 4., 4.],
         [5., 5., 5., 5., 5.],
         [6., 6., 6., 6., 6.]]])


In [34]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]], 
                    [[3,3,3,3,3],[2,2,2,2,2],[1,1,1,1,1]]])
torch_x = torch.Tensor(numpy_x)

In [35]:
torch_out, torch_hn = rnn(torch_x)
print(torch_out)
print(torch_hn)

tensor([[[ 3.,  3.],
         [12., 12.],
         [33., 33.]],

        [[ 9.,  9.],
         [24., 24.],
         [51., 51.]]], grad_fn=<TransposeBackward1>)
tensor([[[33., 33.],
         [51., 51.]]], grad_fn=<StackBackward0>)


In [36]:
print(numpy_out)
print(torch_out)

[[[ 3  3]
  [12 12]
  [33 33]]

 [[ 9  9]
  [24 24]
  [51 51]]]
tensor([[[ 3.,  3.],
         [12., 12.],
         [33., 33.]],

        [[ 9.,  9.],
         [24., 24.],
         [51., 51.]]], grad_fn=<TransposeBackward1>)


In [37]:
print(numpy_hn)
print(torch_hn)

[[[33 33]
  [51 51]]]
tensor([[[33., 33.],
         [51., 51.]]], grad_fn=<StackBackward0>)


In [None]:
rnn = nn.RNN(input_size=5, hidden_size=2, 
             num_layers=1, nonlinearity='relu',
             bias=False, batch_first=True)

In [None]:
W_xh = np.array([[1,0,1,0,1],[0,1,0,1,0]])
W_hh = np.array([[1,0],[0,1]])

print(W_xh)
print(W_hh)

In [None]:
W_xh = torch.FloatTensor(W_xh)
W_hh = torch.FloatTensor(W_hh)

# RNN의 parameter값을 우리가 위에서 정의한 값으로 대체
with torch.no_grad():
    rnn.weight_ih_l0 = nn.Parameter(W_xh)
    rnn.weight_hh_l0 = nn.Parameter(W_hh)

In [None]:
numpy_x = np.array([[[1,1,1,1,1],[2,2,2,2,2],[3,3,3,3,3]], 
                    [[3,3,3,3,3],[2,2,2,2,2],[1,1,1,1,1]]])
torch_x = torch.Tensor(numpy_x)

In [None]:
out, h_n = rnn(torch_x)

In [None]:
out

In [None]:
h_n