In [1]:
import torch 
import torch.nn as nn

In [2]:
input_size = 5
hidden_size = 8

In [3]:
# (batch_size, time_steps, input_size)
inputs = torch.Tensor(1, 10, 5)

In [4]:
cell = nn.RNN(input_size, hidden_size, batch_first=True)

In [5]:
outputs, _status = cell(inputs)

In [6]:
print(outputs.shape)

torch.Size([1, 10, 8])


In [7]:
cell2 = nn.RNN(input_size=5, hidden_size=8, num_layers=2, batch_first=True)

In [8]:
cell2(inputs)

(tensor([[[-0.6083,  0.8355,  0.5708, -0.3716, -0.6623,  0.0996,  0.6121,
           -0.0743],
          [ 0.2737,  0.6355, -0.7876, -0.2314, -0.9372, -0.1862, -0.0123,
           -0.6123],
          [-0.3566,  0.1497, -0.6630, -0.1334, -0.9133, -0.5148, -0.2717,
           -0.7654],
          [-0.4782, -0.2609, -0.5360, -0.0553, -0.4896, -0.7279, -0.6158,
           -0.7325],
          [-0.4937, -0.3036, -0.4181, -0.2692, -0.3155, -0.7792, -0.6911,
           -0.7297],
          [-0.5752,  0.6833,  0.5831, -0.5291, -0.5507, -0.4296,  0.1549,
           -0.2202],
          [ 0.3819,  0.5586,  0.3087, -0.2447, -0.6286, -0.7265, -0.0984,
           -0.7481],
          [-0.4454,  0.0096, -0.4693,  0.0978, -0.1975, -0.5299, -0.3923,
           -0.6518],
          [-0.0729,  0.2983,  0.4924, -0.3129, -0.4276, -0.8786, -0.5339,
           -0.7732],
          [ 0.0525,  0.4244,  0.5140, -0.1780, -0.2375, -0.8388, -0.3839,
           -0.7348]]], grad_fn=<TransposeBackward1>),
 tensor([[[-1.000

In [9]:
lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

In [10]:
gru = nn.GRU(input_size, hidden_size, batch_first=True)

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [12]:
input_str = "apple"
label_str = "pple!"

char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)

In [13]:
input_size = vocab_size   
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [14]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [17]:
index_to_char = {c:i for i, c in char_to_index.items()}

In [19]:
index_to_char

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}

In [21]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]

In [22]:
x_data = torch.tensor(x_data)
y_data = torch.tensor(y_data)

In [23]:
x_data = x_data.unsqueeze(0)
y_data = y_data.unsqueeze(0)

In [28]:
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [29]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

  X = torch.FloatTensor(x_one_hot)


In [31]:
X, Y

(tensor([[[0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 1.],
          [0., 0., 0., 0., 1.],
          [0., 0., 0., 1., 0.],
          [0., 0., 1., 0., 0.]]]),
 tensor([[4, 4, 3, 2, 0]]))

In [32]:
class RNNNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNNet, self).__init__()

        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size, bias=True)
    
    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)

        return x

In [38]:
net = RNNNet(input_size, hidden_size, output_size)

In [39]:
outputs = net(X)

print(outputs)

tensor([[[ 0.2120, -0.5176, -0.3912,  0.1878, -0.4714],
         [ 0.5138, -0.1719, -0.3396,  0.2266, -0.5027],
         [ 0.4090, -0.2654, -0.2986,  0.1821, -0.4719],
         [ 0.2919, -0.2962, -0.4312, -0.0753, -0.2817],
         [ 0.4151, -0.2758, -0.2419,  0.0471, -0.4107]]],
       grad_fn=<ViewBackward0>)


In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [41]:
for i in range(100):
    optimizer.zero_grad()

    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis=2)
    result_str = "".join([index_to_char[c] for c in np.squeeze(result)])
    print(f"{i}/100 | Loss: {loss.item()} | Prediction: {result_str} ")

0/100 | Loss: 1.7051255702972412 | Prediction: !!!!! 
1/100 | Loss: 1.419965147972107 | Prediction: !!!!! 
2/100 | Loss: 1.2154532670974731 | Prediction: pp!p! 
3/100 | Loss: 1.0496013164520264 | Prediction: pppe! 
4/100 | Loss: 0.8711943626403809 | Prediction: pppe! 
5/100 | Loss: 0.6606134176254272 | Prediction: pple! 
6/100 | Loss: 0.47183918952941895 | Prediction: pple! 
7/100 | Loss: 0.327343225479126 | Prediction: pple! 
8/100 | Loss: 0.225504070520401 | Prediction: pple! 
9/100 | Loss: 0.15435174107551575 | Prediction: pple! 
10/100 | Loss: 0.10624395310878754 | Prediction: pple! 
11/100 | Loss: 0.07466208934783936 | Prediction: pple! 
12/100 | Loss: 0.0536690354347229 | Prediction: pple! 
13/100 | Loss: 0.039234697818756104 | Prediction: pple! 
14/100 | Loss: 0.029060428962111473 | Prediction: pple! 
15/100 | Loss: 0.021828174591064453 | Prediction: pple! 
16/100 | Loss: 0.016677653416991234 | Prediction: pple! 
17/100 | Loss: 0.012990151531994343 | Prediction: pple! 
18/100 | 