In [1]:
import torch
import torch.nn as nn

# RNN

Applies a multi-layer Elman RNN with \tanhtanh or \text{ReLU}ReLU non-linearity to an input sequence

https://pytorch.org/docs/stable/generated/torch.nn.RNN.html#torch.nn.RNN

*  the input and output tensors are provided as (batch, seq, feature) when set batch_first=True, Note that this does not apply to hidden or cell states. 

* If bidirectional = True, becomes a bidirectional RNN

input: (N,L,H) when batch_first=True containing the features of the input sequence

h_0: (D∗num_layers,N,Hout) containing the initial hidden state for the input sequence batch. Defaults to zeros if not provided.

where:

N =batch size

L = sequence length

D = 2 if bidirectional=True otherwise } 1

H_{in} = input_size

H_{out} = hidden_size

output:(N,L,D∗Hout) when batch_first=True containing the output features (h_t) from the last layer of the RNN, for each t
    
h_n: tensor of shape (D∗num_layers,N,H out) containing the final hidden state for each element in the batch.

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x7f5322d63e50>

In [3]:
sample_size = 2
sequence_length = 3
number_feature = 4

x = torch.arange(sample_size*number_feature*sequence_length).reshape(sample_size, sequence_length, number_feature) * 1.0
print(x)

tensor([[[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]],

        [[12., 13., 14., 15.],
         [16., 17., 18., 19.],
         [20., 21., 22., 23.]]])


In [4]:
output_size = 5
number_layers = 1
rnn_function = nn.RNN(input_size=number_feature, hidden_size=output_size, num_layers=number_layers, batch_first=True)

In [5]:
h0 = torch.zeros(number_layers, sample_size, output_size)

output, hn = rnn_function(x, h0)

In [6]:
print(output)

tensor([[[-0.8913,  0.9556,  0.0240, -0.6370, -0.5639],
         [-0.9969,  0.9986,  0.2649, -0.9997, -0.5647],
         [-0.9999,  0.9999, -0.2600, -1.0000, -0.6001]],

        [[-1.0000,  1.0000, -0.9365, -1.0000, -0.8544],
         [-1.0000,  1.0000, -0.7537, -1.0000, -0.8052],
         [-1.0000,  1.0000, -0.9257, -1.0000, -0.8684]]],
       grad_fn=<TransposeBackward1>)


In [7]:
print(hn)

tensor([[[-0.9999,  0.9999, -0.2600, -1.0000, -0.6001],
         [-1.0000,  1.0000, -0.9257, -1.0000, -0.8684]]],
       grad_fn=<StackBackward0>)


In [8]:
# hn = output[:,-1,:]
output[:,-1,:]

tensor([[-0.9999,  0.9999, -0.2600, -1.0000, -0.6001],
        [-1.0000,  1.0000, -0.9257, -1.0000, -0.8684]],
       grad_fn=<SliceBackward0>)