## import

In [28]:
import torch
import torch.nn as nn
import math
import torch.functional as F
from tqdm import tqdm

## GRU cell

In [29]:
class GRUcell(nn.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        super(GRUcell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.x2h = nn.Linear(input_size, 3 * hidden_size, bias=bias)
        self.h2h = nn.Linear(hidden_size, 3 * hidden_size, bias=bias)
        self.reset_parameters()



    def reset_parameters(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)

    def forward(self, x, hidden):

        x = x.view(-1, x.size(1))

        gate_x = self.x2h(x)
        gate_h = self.h2h(hidden)

        gate_x = gate_x.squeeze()
        gate_h = gate_h.squeeze()

        i_r, i_i, i_n = gate_x.chunk(3, 1)  # reset, input, new로 가는 세방향으로 나눔
        h_r, h_i, h_n = gate_h.chunk(3, 1)


        resetgate = F.sigmoid(i_r + h_r)  # r_t
        inputgate = F.sigmoid(i_i + h_i)  # z_t
        newgate = F.tanh(i_n + (resetgate * h_n))  # h_tilde_t

        hy = newgate + inputgate * (hidden - newgate)  # next hidden state 계산


        return hy


## GRU model

In [30]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets
from torch.autograd import Variable
from torch.nn import Parameter
from torch import Tensor
import torch.nn.functional as F
from torch.utils.data import DataLoader
import math

각종 환경 설정 (gpu 사용 여부에 따라)

In [31]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

cuda = True if torch.cuda.is_available() else False

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

torch.manual_seed(125)

if torch.cuda.is_available():
    torch.cuda.manual_seed(125)

In [22]:
print(device)

cpu


dataset 생성

In [23]:
mnist_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (1.0, ))
])

from torchvision.datasets import MNIST
download_root = '../080289-main/chap07/MNIST_DATASET'

train_dataset = MNIST(download_root, transform=mnist_transform, train=True,
                    download=True)
valid_dataset = MNIST(download_root, transform=mnist_transform, train=False,
                     download=True)
test_dataset = MNIST(download_root, transform=mnist_transform, train=False,
                    download=True)

batch_size = 64
train_loader = DataLoader(dataset=train_dataset,
                            batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset,
                            batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                            batch_size=batch_size, shuffle=True)

In [24]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1e2f7954cd0>

모델 빌드

In [37]:
class GRUmodel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True):
        super(GRUmodel, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.output_dim = output_dim
        self.gru_cell = GRUcell(input_dim, hidden_dim)
        self.bias = bias
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        if torch.cuda.is_available() :
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else :
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

        outs = []
        hn = h0[0, :, :]

        for seq in range(x.size(1)):
            hn = self.gru_cell(x[:, seq, :], hn)
            outs.append(hn)
        out = outs[-1].squeeze()
        out = self.fc(out)
        return out

In [38]:
input_dim = 28
hidden_dim = 128
layer_dim = 1
output_dim = 10

model = GRUmodel(input_dim, hidden_dim, layer_dim, output_dim)

if torch.cuda.is_available():
    model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [39]:
seq_dim = 28

loss_list = []
iter = 0
epochs = 3

for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, seq_dim, input_dim).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, seq_dim, input_dim))
            labels = Variable(labels)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        if torch.cuda.is_available():
            loss.cuda()

        loss.backward()
        optimizer.step()

        loss_list.append(loss.item())
        iter += 1

        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in valid_loader:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
                else:
                    images = Variable(images.view(-1, seq_dim, input_dim))

                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)

                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()

            accuracy = 100 * correct / total
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 500. Loss: 1.4140992164611816. Accuracy: 45.93000030517578
Iteration: 1000. Loss: 0.7438367605209351. Accuracy: 75.76000213623047
Iteration: 1500. Loss: 0.34653058648109436. Accuracy: 88.66999816894531
Iteration: 2000. Loss: 0.449324369430542. Accuracy: 93.16000366210938
Iteration: 2500. Loss: 0.2705530822277069. Accuracy: 94.58000183105469
