In [1]:
#import libraries
import torch
import torch.nn as nn
import numpy as np

# Explore the RNN layer

In [2]:
# set layer parameters
input_size = 2
hidden_size = 3
num_layers = 10
actfun = 'tanh'
bias = True

# create an RNN instance
rnn = nn.RNN(input_size,hidden_size,num_layers,nonlinearity=actfun,bias=bias)
print(rnn)



RNN(2, 3, num_layers=10)


In [3]:
# set data paramters
seqlength = 5
batchsize = 1

# create some  data
X =  torch.rand(seqlength,batchsize,input_size)

# create a hidden layer
hidden = torch.zeros(num_layers,batchsize,hidden_size)

# run some data through  the model and show the output sizes
y,h = rnn(X,hidden)
print(f'Input shape: {list(X.shape)}')
print(f'Input shape: {list(h.shape)}')
print(f'Input shape: {list(y.shape)}')


Input shape: [5, 1, 2]
Input shape: [10, 1, 3]
Input shape: [5, 1, 3]


In [4]:
# default hidden state is all zeros if nothing specified
y,h1 = rnn(X,hidden)
print(h1),print('\n\n')

y,h2 = rnn(X)
print(h2),print('\n\n')

# theys are the same
print(h1-h2)


tensor([[[ 0.0379, -0.5318,  0.4567]],

        [[-0.1671, -0.1273,  0.0076]],

        [[ 0.0591,  0.3898,  0.3693]],

        [[-0.6743,  0.1726, -0.1097]],

        [[ 0.8315, -0.6718,  0.5042]],

        [[-0.1854,  0.9404, -0.4116]],

        [[-0.5089,  0.8594,  0.0393]],

        [[-0.3081,  0.2425, -0.7487]],

        [[-0.1392,  0.6213, -0.0453]],

        [[ 0.3940,  0.5907, -0.2442]]], grad_fn=<StackBackward0>)



tensor([[[ 0.0379, -0.5318,  0.4567]],

        [[-0.1671, -0.1273,  0.0076]],

        [[ 0.0591,  0.3898,  0.3693]],

        [[-0.6743,  0.1726, -0.1097]],

        [[ 0.8315, -0.6718,  0.5042]],

        [[-0.1854,  0.9404, -0.4116]],

        [[-0.5089,  0.8594,  0.0393]],

        [[-0.3081,  0.2425, -0.7487]],

        [[-0.1392,  0.6213, -0.0453]],

        [[ 0.3940,  0.5907, -0.2442]]], grad_fn=<StackBackward0>)



tensor([[[0., 0., 0.]],

        [[0., 0., 0.]],

        [[0., 0., 0.]],

        [[0., 0., 0.]],

        [[0., 0., 0.]],

        [[0., 0.,

In [5]:
# check out the learned parameters and their sizes
for p in rnn.named_parameters():
    if 'weight' in p[0]:
        print(f'{p[0]} has size {list(p[1].shape)}')


weight_ih_l0 has size [3, 2]
weight_hh_l0 has size [3, 3]
weight_ih_l1 has size [3, 3]
weight_hh_l1 has size [3, 3]
weight_ih_l2 has size [3, 3]
weight_hh_l2 has size [3, 3]
weight_ih_l3 has size [3, 3]
weight_hh_l3 has size [3, 3]
weight_ih_l4 has size [3, 3]
weight_hh_l4 has size [3, 3]
weight_ih_l5 has size [3, 3]
weight_hh_l5 has size [3, 3]
weight_ih_l6 has size [3, 3]
weight_hh_l6 has size [3, 3]
weight_ih_l7 has size [3, 3]
weight_hh_l7 has size [3, 3]
weight_ih_l8 has size [3, 3]
weight_hh_l8 has size [3, 3]
weight_ih_l9 has size [3, 3]
weight_hh_l9 has size [3, 3]


In [15]:
class rnnnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):

      
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers  = num_layers


    # RNN Layer
    self.rnn = nn.RNN(input_size,num_hidden,num_layers)
    
    # linear layer for output
    self.out = nn.Linear(num_hidden,1)
  
  def forward(self,x):
    
    print(f'Input: {list(x.shape)}')
    
    # initialize hidden state for first input
    hidden = torch.zeros(num_layers,batchsize,self.num_hidden)
    print(f'Hidden: {list(hidden.shape)}')

    # run through the RNN layer
    y,hidden = self.rnn(x,hidden)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden.shape)}')
    
    # pass the RNN output through the linear output layer
    x = self.out(y)
    print(f'Output: {list(x.shape)}')

    return x,hidden

In [16]:
# create an instance of the model and inspect
net = rnnnet(input_size,hidden_size,num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]} has size {list(p[1].shape)}')

rnnnet(
  (rnn): RNN(2, 3, num_layers=10)
  (out): Linear(in_features=3, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [3, 2]
rnn.weight_hh_l0 has size [3, 3]
rnn.bias_ih_l0 has size [3]
rnn.bias_hh_l0 has size [3]
rnn.weight_ih_l1 has size [3, 3]
rnn.weight_hh_l1 has size [3, 3]
rnn.bias_ih_l1 has size [3]
rnn.bias_hh_l1 has size [3]
rnn.weight_ih_l2 has size [3, 3]
rnn.weight_hh_l2 has size [3, 3]
rnn.bias_ih_l2 has size [3]
rnn.bias_hh_l2 has size [3]
rnn.weight_ih_l3 has size [3, 3]
rnn.weight_hh_l3 has size [3, 3]
rnn.bias_ih_l3 has size [3]
rnn.bias_hh_l3 has size [3]
rnn.weight_ih_l4 has size [3, 3]
rnn.weight_hh_l4 has size [3, 3]
rnn.bias_ih_l4 has size [3]
rnn.bias_hh_l4 has size [3]
rnn.weight_ih_l5 has size [3, 3]
rnn.weight_hh_l5 has size [3, 3]
rnn.bias_ih_l5 has size [3]
rnn.bias_hh_l5 has size [3]
rnn.weight_ih_l6 has size [3, 3]
rnn.weight_hh_l6 has size [3, 3]
rnn.bias_ih_l6 has size [3]
rnn.bias_hh_l6 has size [3]
rnn.weight_ih_l7 has size [3, 3]
rnn.weigh

In [17]:
# test the model with some data
# create some data
X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)


Input: [5, 1, 2]
Hidden: [10, 1, 3]
RNN-out: [5, 1, 3]
RNN-hidden: [10, 1, 3]
Output: [5, 1, 1]


In [18]:

lossfun = nn.MSELoss()
lossfun(yHat,y)

tensor(0.4486, grad_fn=<MseLossBackward0>)