In [1]:
import torch
device = torch.device("mps") if torch.backends.mps.is_available() else  torch.device("cpu")
print('Device:', device)
import os
NUM_WORKERS = os.cpu_count()
print("Number of workers:", NUM_WORKERS)

Device: mps
Number of workers: 8


In [3]:
import torch.nn as nn

torch.manual_seed(1)

rnn_layer = nn.RNN(input_size=5, hidden_size=2, num_layers=1, batch_first=True)

w_xh = rnn_layer.weight_ih_l0
w_hh = rnn_layer.weight_hh_l0
b_xh = rnn_layer.bias_ih_l0
b_hh = rnn_layer.bias_hh_l0

print('W_xh 크기:', w_xh.shape)
print('W_hh 크기:', w_hh.shape)
print('b_xh 크기:', b_xh.shape)
print('b_hh 크기:', b_hh.shape)

W_xh 크기: torch.Size([2, 5])
W_hh 크기: torch.Size([2, 2])
b_xh 크기: torch.Size([2])
b_hh 크기: torch.Size([2])


In [7]:
x_seq = torch.tensor([[1.0]*5, [2.0]*5, [3.0]*5]).float()
print(x_seq.shape)
print(torch.reshape(x_seq, (1,3,5)).shape)

torch.Size([3, 5])
torch.Size([1, 3, 5])


In [18]:
xt = torch.reshape(x_seq[0], (1,5))
print(xt)

tensor([[1., 1., 1., 1., 1.]])


In [21]:
output, hn = rnn_layer(torch.reshape(x_seq, (1,3,5)))

In [20]:
out_man = []

for index in range(3):
    xt = torch.reshape(x_seq[index], (1,5))
    
    print(f'타임 스텝 {index} =>')
    print(f'   입력           :{xt}') 

    ht = torch.matmul(xt, torch.transpose(w_xh, 0, 1)) + b_xh
    print(f'   은닉           :{ht.detach()}')

    if index > 0:
        prev_h =out_man[index-1]
    else:
        prev_h = torch.zeros((ht.shape))

    ot = ht + torch.matmul(prev_h, torch.transpose(w_hh, 0, 1)) + b_hh
    ot = torch.tanh(ot)
    out_man.append(ot)

    
    print('   출력 (수동)     :', ot.detach())
    print('   RNN 출력       :', output[:, index].detach())
    


타임 스텝 0 =>
   입력           :tensor([[1., 1., 1., 1., 1.]])
   은닉           :tensor([[-0.4702,  0.5864]])
   출력 (수동)     : tensor([[-0.3520,  0.5253]])
   RNN 출력       : tensor([[-0.3520,  0.5253]])
타임 스텝 1 =>
   입력           :tensor([[2., 2., 2., 2., 2.]])
   은닉           :tensor([[-0.8888,  1.2364]])
   출력 (수동)     : tensor([[-0.6842,  0.7607]])
   RNN 출력       : tensor([[-0.6842,  0.7607]])
타임 스텝 2 =>
   입력           :tensor([[3., 3., 3., 3., 3.]])
   은닉           :tensor([[-1.3075,  1.8865]])
   출력 (수동)     : tensor([[-0.8649,  0.9047]])
   RNN 출력       : tensor([[-0.8649,  0.9047]])


In [140]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=2, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=2, batch_first=True)
        self.gru = nn.GRU(input_size, hidden_size,  num_layers=2, batch_first=True)
        self.flatten = nn.Flatten()

    def forward(self, x):
        rnn_out, hidden = self.rnn(x)
        last_hidden = hidden[-1, :, :]
        
        #lstm_out, (hidden, cell) = self.lstm(x)
        #last_hidden = hidden[-1, :, :]

        
        #gru_out, hidden = self.gru(x)
        #last_hidden = hidden[-1, :, :]


        out = self.fc(last_hidden)
        return out
        
        


In [138]:
model = RNN(64, 32).to(device)
model

RNN(
  (rnn): RNN(64, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
  (lstm): LSTM(64, 32, num_layers=2, batch_first=True)
  (gru): GRU(64, 32, num_layers=2, batch_first=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)

In [139]:
model(torch.randn(5, 3, 64).to(device))

tensor([[ 0.2095],
        [-0.0812],
        [-0.0525],
        [ 0.2475],
        [ 0.3275]], device='mps:0', grad_fn=<LinearBackward0>)