# 순환 신경망 RNN

In [30]:
import torch
import torch.nn as nn

In [41]:
HIDDEN_SIZE = 3
NUM_LAYERS = 1
SEQ_LENTH = 3
BATCH_SIZE = 1

# 데이터 및 초기 Hidden State
input = torch.randn(BATCH_SIZE, SEQ_LENTH, 10)  # 배치 크기, 시퀀스 길이:해당 문장을 구성하는 단어 수, 피쳐 길이(1개 단어를 표현하는 수 one-hot 후))

h0 = torch.randn(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)  # hidden state 초기값 -> 제공 안하면 torch.zeros로 들어감 (방향*층수, 배치크기, hidden state개수=셀 개수)

# RNN 인스턴스 생성
rnn = nn.RNN(10, HIDDEN_SIZE, NUM_LAYERS, batch_first=True) # input, hidden=cell 개수, num_layer

output, hn = rnn(input, h0)

In [42]:
print(f"[INPUT DATA]\n-SHAPE : {input.shape} DIM : {input.ndim}D")
# 배치가 1개 -> 문장이 1개

[INPUT DATA]
-SHAPE : torch.Size([1, 3, 10]) DIM : 3D


In [43]:
print(f"[RNN PARAMETERS]")
for name, param in rnn.named_parameters():
    print(f"----[{name}]\n{param}\n")

[RNN PARAMETERS]
----[weight_ih_l0]
Parameter containing:
tensor([[-0.4745, -0.0973, -0.5195, -0.3652, -0.2667,  0.0309,  0.1693,  0.1111,
          0.3216, -0.5168],
        [-0.1803,  0.1574, -0.4064,  0.0099,  0.3016, -0.5134, -0.3699,  0.2536,
          0.3252,  0.5760],
        [-0.4069, -0.4135,  0.3075,  0.0937,  0.2654, -0.3276, -0.0601,  0.3248,
         -0.4292, -0.3811]], requires_grad=True)

----[weight_hh_l0]
Parameter containing:
tensor([[-0.4324,  0.0920,  0.3752],
        [-0.0171, -0.3920, -0.4386],
        [ 0.4894,  0.0947, -0.1288]], requires_grad=True)

----[bias_ih_l0]
Parameter containing:
tensor([-0.5692, -0.0719, -0.5189], requires_grad=True)

----[bias_hh_l0]
Parameter containing:
tensor([-0.1442,  0.1495, -0.5733], requires_grad=True)


In [44]:
print(f"[RNN출력 -> OUTPUT DATA]\n-SHAPE : {output.shape} DIM : {output.ndim}D")
print(output)

[RNN출력 -> OUTPUT DATA]
-SHAPE : torch.Size([1, 3, 3]) DIM : 3D
tensor([[[-0.5402, -0.1434, -0.9953],
         [-0.8550, -0.1930, -0.9980],
         [-0.9138,  0.8181, -0.9940]]], grad_fn=<TransposeBackward1>)


In [45]:
print(f"[RNN출력 -> hn]\n-SHAPE : {hn.shape} DIM : {hn.ndim}D")
print(hn)

[RNN출력 -> hn]
-SHAPE : torch.Size([1, 1, 3]) DIM : 3D
tensor([[[-0.9138,  0.8181, -0.9940]]], grad_fn=<StackBackward0>)


In [ ]:
print(f"[INPUT DATA]\n")

In [21]:
# 설계 : 다층 RNN, 층2 개
# INPUT 


# 입력 초기 텐서들 2개
input = torch.randn(5, 3, 10) # 배치사이즈(밑에랑 다름)
# 히든은 하나의 시퀀스(하나의 문장)을 처리해줌 
h0 = torch.randn(2, 3, 5) # 층, 배치사이즈, hidden cell개수 

# RNN 인스턴스 
rnn = nn.RNN(10, 5, 2)

# 출력 텐서들 2개 
output, hn = rnn(input, h0)

In [22]:
# rnn 모델의 속성 출력
print(f"[rnn.all_weights] -> {len(rnn.all_weights)}개")
print(rnn.all_weights)

[rnn.all_weights] -> 2개
[[Parameter containing:
tensor([[ 0.3230,  0.0695,  0.2154,  0.4093, -0.2263,  0.3250, -0.2830, -0.3851,
         -0.1899,  0.0726],
        [ 0.3045, -0.4366, -0.2707, -0.3662, -0.2816, -0.1723, -0.2647,  0.2753,
         -0.1683,  0.4455],
        [ 0.4298,  0.1221,  0.3256, -0.0833, -0.3290,  0.2737, -0.0981, -0.3912,
         -0.0087,  0.1682],
        [-0.0978,  0.1643, -0.3634, -0.1227,  0.0235,  0.4260, -0.0850, -0.3110,
          0.3248, -0.4418],
        [ 0.0425,  0.4019, -0.1553,  0.3977, -0.1561,  0.3709,  0.0896, -0.0458,
         -0.0169,  0.3543]], requires_grad=True), Parameter containing:
tensor([[-0.3223,  0.0904,  0.3714, -0.3122, -0.1659],
        [ 0.1328, -0.1728,  0.0997, -0.1190,  0.4165],
        [ 0.1912, -0.3648, -0.2988,  0.4372, -0.1169],
        [ 0.0087, -0.3861, -0.4397,  0.1675,  0.0838],
        [-0.3168,  0.1315, -0.1222,  0.1831,  0.3506]], requires_grad=True), Parameter containing:
tensor([-0.3555,  0.4199, -0.3645,  0.3180, 

In [23]:
from torchinfo import summary
summary(rnn)

Layer (type:depth-idx)                   Param #
RNN                                      145
Total params: 145
Trainable params: 145
Non-trainable params: 0

In [17]:
for name, param in rnn.named_parameters():
    print("=========",name, "=========")
    print(param)
    print()
    print()

Parameter containing:
tensor([[-0.6397, -0.4127, -0.3385, -0.6102, -0.3638, -0.4705, -0.1107,  0.0832,
          0.3810,  0.1067],
        [-0.0336,  0.6527,  0.4086,  0.4771, -0.3456, -0.2514, -0.1332,  0.0316,
         -0.3220,  0.0135]], requires_grad=True)


Parameter containing:
tensor([[ 0.4483,  0.1457],
        [-0.5374,  0.6021]], requires_grad=True)


Parameter containing:
tensor([-0.0171, -0.0370], requires_grad=True)


Parameter containing:
tensor([-0.4291, -0.3305], requires_grad=True)


In [27]:
# RNN의 출력 텐서 output
output.shape, output.ndim

# 배치 사이즈 5
# 각각의 문장이 몇 개의 시퀀스(단어)로 이루어져 있는지 3

(torch.Size([5, 3, 5]), 3)