In [2]:
# Hihell => ihello 예제를 응용해서 조금 더 긴 문장을 prediction하는 예제
# 필요한 라이브러리
import torch
import torch.nn as nn
from torch.autograd import Variable

torch.manual_seed(777)

<torch._C.Generator at 0x1cd677ce950>

In [3]:
# Define data
sentence = " hello pytorch! let's study deep learning!"
char_set = set(sentence)
idx2char = list(char_set) # index -> char 접근
char2idx = {c:i for i, c in enumerate(idx2char)} # char -> index 접근

sen2list = list(sentence)

# Define X data
x_data = sen2list[:-1]
x_data = [char2idx[c] for c in x_data]
x_data = [x_data]

# Define Y data
y_data = sen2list[1:]
y_data = [char2idx[c] for c in y_data]

In [4]:
# Array -> Tensor 변환
inputs = Variable(torch.LongTensor(x_data))
labels = Variable(torch.LongTensor(y_data))
print(inputs)
print(labels)

tensor([[ 15,   7,   0,  12,  12,   3,  15,  13,  10,   9,   3,   1,
           8,   7,   2,  15,  12,   0,   9,  11,   4,  15,   4,   9,
           5,   6,  10,  15,   6,   0,   0,  13,  15,  12,   0,  14,
           1,  17,  16,  17,  18]])
tensor([  7,   0,  12,  12,   3,  15,  13,  10,   9,   3,   1,   8,
          7,   2,  15,  12,   0,   9,  11,   4,  15,   4,   9,   5,
          6,  10,  15,   6,   0,   0,  13,  15,  12,   0,  14,   1,
         17,  16,  17,  18,   2])


In [5]:
# Define hyper parameters
num_classes = len(char2idx)
input_size = len(char2idx)
embedding_size = 38  # embedding size (19->38 encoding)
hidden_size = len(char2idx)  # 바로 one hot으로 예측하기 위해 19로 설정
batch_size = 1   # one sentence
sequence_length = len(y_data)
num_layers = 1  # one-layer rnn

In [25]:
# RNN 모델 정의
class Model(nn.Module):
    def __init__(self, num_classes, input_size, embedding_size, hidden_size, num_layers):
        super(Model, self).__init__()
        
        # assign
        self.num_classes = num_classes
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # embedding cell 정의
        # input_size => embedding_size
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)
        
        # RNN cell 정의
        # embedding size => hidden_size
        self.rnn = nn.RNN(input_size=self.embedding_size,
                          hidden_size=self.hidden_size, batch_first=True)
        
        # FC layer 정의
        # hidden_size => num_classes
        self.fc = nn.Linear(self.hidden_size, self.num_classes)
    
    """
    각 단계에서의 input shape와 output shape::
    initial input X            (batch_size, seq_length)
          ||
    [embedding cell]
          ||
    embedding output           (batch_size, seq_length, emb_size)
          ||
    reshaped for RNN           (batch_size, seq_length, -1)
          ||
      [RNN cells]
          ||
       RNN output              (batch_size, seq_length, hidden_size)
          ||
    reshaped for FC            (seq_length, num_classes)
          ||
       [FC layer]
          ||
       FC output               (seq_length, num_classes)
    """
    def forward(self, x):
        # hidden layer 초기화
        # (layer 개수, batch size, hidden size)
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # 최초의 input인 X의 shape: (batch size, seq length)
        
        # embedding forwarding
        # 이 때 emb_out의 shape: (batch size, seq length, emb_size)
        emb_out = self.embedding(x)
        
        # Reshape Embedding output
        # (batch size, sequence length, embedding size) => (batch size, seq length, -1)
        emb_out = emb_out.view(batch_size, sequence_length, -1)

        # RNN forwarding
        # 이 때 rnn_out의 shape: (batch size, seq length, hidden size)
        rnn_out, _ = self.rnn(emb_out, h_0)
        
        # Reshape RNN output
        # (batch size, seq length, hidden size) => (seq length, num classes)
        rnn_out = rnn_out.view(-1, self.num_classes)
        
        # FC layer forwarding
        # 이 때 fc_out의 shape: (seq length, num_classes)
        fc_out = self.fc(rnn_out)
        
        return fc_out

In [26]:
# RNN 모델 정의
model = Model(num_classes, input_size, embedding_size, hidden_size, num_layers)
print(model)

Model(
  (embedding): Embedding(19, 38)
  (rnn): RNN(38, 19, batch_first=True)
  (fc): Linear(in_features=19, out_features=19, bias=True)
)


In [27]:
# loss, optimizer 정의
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [28]:
# 모델 학습시키기
for epoch in range(100):
    # gradient -> zero 과정
    optimizer.zero_grad()
    
    # forwarding
    outputs = model(inputs)
    
    # loss 계산, backwarding 과정
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    # class중 가장 높은 score 가진 index 뽑아내기
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 3.026
Predicted string:  !o''''ya'!d'p '!'d'd ! 'aa!ydd'a!pds!'p'd
epoch: 2, loss: 2.184
Predicted string:  l!pee l  lrn ! le l! l!ld el pp le! nini!
epoch: 3, loss: 1.480
Predicted string:  lelee letornh! lea'e ltudeeoelp lea nini!
epoch: 4, loss: 0.991
Predicted string:  lelle letorch! lea's ltudytoelp learcing!
epoch: 5, loss: 0.694
Predicted string:  lellerpyturch! pea's st'dytdyep learcing!
epoch: 6, loss: 0.462
Predicted string:  lello lytorch! lea's study deep learning!
epoch: 7, loss: 0.309
Predicted string:  hello ly orch! lea's study deep learning!
epoch: 8, loss: 0.223
Predicted string:  hello pytorch! let's study deep learning!
epoch: 9, loss: 0.150
Predicted string:  hello pytorch! lea's study deep letrning!
epoch: 10, loss: 0.114
Predicted string:  hello pytorch! lea's study deep learning!
epoch: 11, loss: 0.088
Predicted string:  hello pytorch! lea's study deep learning!
epoch: 12, loss: 0.071
Predicted string:  hello pytorch! let's study deep learning!
e

In [29]:
# 학습된 모델로 test
outputs = model(inputs) # forwarding
print("outputs:", outputs)
print(outputs.shape)
_, idx = outputs.max(1) 
"""
outputs.max(axis=1)의 의미: 
outputs의 shape는 [41, 19] => 총 41의 길이를 가지는 문자열. 각 문자는 19가지의 경우의 수(class=19개)
max(axis=1)의 의미는 각 덩어리(41개의 덩어리) 안에서 가장 큰 값을 가지는 한 원소를 골라내는 것.
즉, 41개의 문자열 자리에서 각 자리에 올 수 있는 19개의 경우의 수 중 가장 큰 확률을 가지는 문자를 선택.
결과값는 41의 길이를 가지는 문자열이 된다.
"""
print("idx:", idx)
idx = idx.data.numpy()
print("idx.data.numpy():", idx)
print("idx.squeeze():", idx.squeeze())

outputs: tensor([[ -2.4168, -10.7533,   0.0159,   6.2247,   2.1006,   4.0881,
           3.7938,  15.4936,  -5.2138,  -2.8205,  -6.3924,  -1.6705,
           6.9083,   0.8580,  -5.1631,   0.4294,  -6.3680,   0.2369,
          -8.6501],
        [ 18.7029,  -0.8193,   8.2123,  -4.1436,   1.6646, -15.0959,
          -1.0625,   0.0813,  -6.2607,   3.0577,   0.6252, -12.0387,
           1.9686,  -3.1675,   4.1876,  -1.7998,  -6.7547,  -2.2124,
          -0.7426],
        [ -0.3456,   0.3015,  -1.4942,   0.6029,  -5.9279,  -0.1012,
          -7.5759,  -2.8007,  -5.4827,   0.0809,  -9.4858,   4.0592,
          15.1753,   5.8644,   3.4132,   3.4207,   3.2415,  -2.9595,
           0.6885],
        [  5.8381,  -6.7645,   0.8918,   0.1515,   5.4950,  -5.9333,
          -3.0137,   4.0235,  -3.4755,   0.8498, -11.1970,  -0.8730,
          15.7754,   2.9103,  -5.3685,  -5.9940,  -1.4441,  -0.8867,
          -0.7581],
        [  7.3001, -13.0816,   0.2215,  16.1081,  -4.3885,  -0.8059,
           2.0

In [30]:
#결과 출력
result_str = [idx2char[c] for c in idx.squeeze()]
print(result_str)
print("Predicted string:", ''.join(result_str))

['h', 'e', 'l', 'l', 'o', ' ', 'p', 'y', 't', 'o', 'r', 'c', 'h', '!', ' ', 'l', 'e', 't', "'", 's', ' ', 's', 't', 'u', 'd', 'y', ' ', 'd', 'e', 'e', 'p', ' ', 'l', 'e', 'a', 'r', 'n', 'i', 'n', 'g', '!']
Predicted string: hello pytorch! let's study deep learning!
