In [1]:
# 라이브러리
import torch
import torch.nn as nn
from torch.autograd import Variable

torch.manual_seed(777)

<torch._C.Generator at 0x1f97f4d79b0>

In [24]:
# 데이터 세팅
idx2char = ['h', 'i', 'e', 'l', 'o']

x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
y_data = [1, 0, 2, 3, 3, 4]    # ihello

In [25]:
# array -> tensor 변환
inputs = Variable(torch.LongTensor(x_data)) # 예제코드에는 torch.Tensor인데 LongTensor로 바꾸라고 에러나서 바꿈
labels = Variable(torch.LongTensor(y_data))
print(inputs)
print(labels)

tensor([[ 0,  1,  0,  2,  3,  3]])
tensor([ 1,  0,  2,  3,  3,  4])


In [26]:
num_classes = 5 # class 개수
input_size = 5  # one-hot size
embedding_size = 10  # embedding size (5->10으로 encoding)
hidden_size = 5  # 바로 one hot으로 예측하기 위해 5로 설정
batch_size = 1   # one sentence
sequence_length = 6  # |ihello| == 6
num_layers = 1  # one-layer rnn

In [27]:
# RNN 모델 정의
class Model(nn.Module):
    def __init__(self, num_classes, input_size, embedding_size, hidden_size, num_layers):
        super(Model, self).__init__()
        
        # assign
        self.num_classes = num_classes
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # embedding cell 정의
        # input_size => embedding_size
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)
        
        # RNN cell 정의
        # embedding size => hidden_size
        self.rnn = nn.RNN(input_size=self.embedding_size,
                          hidden_size=self.hidden_size, batch_first=True)
        
        # FC layer 정의
        # hidden_size => num_classes
        self.fc = nn.Linear(self.hidden_size, self.num_classes)
    
    # input --(embedding cell) --> embedding output --(RNN cell) --> RNN output --(FC layer) --> fc layer output
    def forward(self, x):
        # hidden layer 초기화
        # (layer 개수, batch size, hidden size)
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # embedding forwarding
        emb = self.embedding(x)
        
        # embedding reshpae
        # (batch size, sequence length, embedding size)
        emb = emb.view(batch_size, sequence_length, -1)

        # Propagate embedding through RNN
        out, _ = self.rnn(emb, h_0)
        return self.fc(out.view(-1, self.num_classes))

In [28]:
# RNN 모델 정의
model = Model(num_classes, input_size, embedding_size, hidden_size, num_layers)
print(model)

Model(
  (embedding): Embedding(5, 10)
  (rnn): RNN(10, 5, batch_first=True)
  (fc): Linear(in_features=5, out_features=5, bias=True)
)


In [29]:
# loss, optimizer 정의
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [30]:
# 모델 학습시키기
for epoch in range(100):
    # gradient -> zero 과정
    optimizer.zero_grad()
    
    # forwarding
    outputs = model(inputs)
    
    # loss 계산, backwarding 과정
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    # class중 가장 높은 score 가진 index 뽑아내기
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 1.596
Predicted string:  eleloo
epoch: 2, loss: 1.274
Predicted string:  elelll
epoch: 3, loss: 1.061
Predicted string:  elelll
epoch: 4, loss: 0.912
Predicted string:  elelll
epoch: 5, loss: 0.788
Predicted string:  elelll
epoch: 6, loss: 0.690
Predicted string:  ihilll
epoch: 7, loss: 0.589
Predicted string:  ihilll
epoch: 8, loss: 0.487
Predicted string:  ihillo
epoch: 9, loss: 0.423
Predicted string:  ihello
epoch: 10, loss: 0.395
Predicted string:  ihello
epoch: 11, loss: 0.374
Predicted string:  ihello
epoch: 12, loss: 0.287
Predicted string:  ihello
epoch: 13, loss: 0.280
Predicted string:  ihello
epoch: 14, loss: 0.213
Predicted string:  ihello
epoch: 15, loss: 0.199
Predicted string:  ihello
epoch: 16, loss: 0.174
Predicted string:  ihello
epoch: 17, loss: 0.137
Predicted string:  ihello
epoch: 18, loss: 0.117
Predicted string:  ihello
epoch: 19, loss: 0.109
Predicted string:  ihello
epoch: 20, loss: 0.083
Predicted string:  ihello
epoch: 21, loss: 0.070
Predic

In [32]:
#Test
outputs = model(inputs) # forwarding
print("outputs:", outputs)
_, idx = outputs.max(1)
print("idx:", idx)
idx = idx.data.numpy()
print("idx.data.numpy():", idx)
print("idx.squeeze():", idx.squeeze())

outputs: tensor([[ -1.0228,   7.5371,   2.0273,  -0.3868,  -4.9043],
        [  7.9394,   0.9645,  -5.2524,   0.1626,  -1.2331],
        [ -2.5315,   5.2997,  10.8059,  -2.6896,  -6.7077],
        [  1.5725,   1.2934,  -5.2573,   9.6231,  -2.7621],
        [ -0.6856,  -0.9425,  -7.1850,   8.3527,   0.9976],
        [ -1.3619,  -2.0428,  -7.6280,  -0.1084,   7.1409]])
idx: tensor([ 1,  0,  2,  3,  3,  4])
idx.data.numpy(): [1 0 2 3 3 4]
idx.squeeze(): [1 0 2 3 3 4]


In [35]:
#결과 출력
result_str = [idx2char[c] for c in idx.squeeze()]
print(result_str)
print("Predicted string:", ''.join(result_str))

['i', 'h', 'e', 'l', 'l', 'o']
Predicted string: ihello
