In [4]:
from xml.sax.handler import feature_external_ges

from kaggle_project2.practice import progress_interval, lowest_loss, valid_losses
from networkx.algorithms.bipartite.basic import color

sequence_length = 28 # MNIST row 를 일종의 순서(sequence) 로 다룸
feature_size = 28 # 입력 차원
hidden_size = 128 # Hidden Layer 사이즈 설정처럼 설정
num_layers = 4 # stacked RNN (최대 4개까지는 Gradient Vanishing 현상이 적을 수 있으므로)
dropout_p = 0.2 # dropout rate
output_size = 10 # 0 ~ 9 숫자 부류(클래스)
minibatch_size = 128 # minibatch_size

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from copy import deepcopy

In [6]:
class Net(nn.Module):
    def __init__(self, feature_size, hidden_size, num_layers, dropout_p, output_size, model_type):
        super().__init__()
        if model_type == 'rnn':
            self.go = nn.RNN( #nn.RNN은 모든 시점의 출력과 마지막 시점의 히든 레이어를
                input_size=feature_size,
                hidden_size = hidden_size,
                num_layers = num_layers,
                batch_first=True,
                dropout = dropout_p,
                bidirectional=True
            )
        elif model_type == 'lstm':
            self.go = nn.LSTM(
                input_size = feature_size,
                hidden_size = hidden_size,
                num_layers = num_layers,
                batch_first = True,
                dropout = dropout_p,
                bidirectional = True
            )

        self.layers = nn.Sequential(
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size*2),
            nn.Linear(hidden_size*2, output_size),
            nn.LogSoftmax(dim=-1)
        )

    def forward(self,x):
        out, _ = self.go(x)
        out = out[:, -1]
        y = self.layers(out)
        return y

In [7]:
import torch
import torch.nn as nn

data1 = torch.full((minibatch_size, sequence_length, 2* hidden_size),1)
data2 = data1[:,-1] #마지막시간의 데이터들임
print(data1.shape, data2.shape)

torch.Size([128, 28, 256]) torch.Size([128, 256])


In [8]:
data3 = torch.full((minibatch_size,1,sequence_length,feature_size),1)
data4 = data3.reshape(-1,sequence_length,feature_size)
print(data3.shape, data4.shape)


torch.Size([128, 1, 28, 28]) torch.Size([128, 28, 28])


In [9]:
model = Net(feature_size, hidden_size, num_layers, dropout_p, output_size, 'rnn')
model

Net(
  (go): RNN(28, 128, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (layers): Sequential(
    (0): ReLU()
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Linear(in_features=256, out_features=10, bias=True)
    (3): LogSoftmax(dim=-1)
  )
)

In [10]:
train_rawdata = datasets.MNIST(root='dataset',
                               train = True,
                               download=True,
                               transform = transforms.ToTensor())
test_dataset = datasets.MNIST(root='dataset',
                              train=False,
                              download=True,
                              transform=transforms.ToTensor())

print('number of training data : ', len(train_rawdata))
print('number of test data : ', len(test_dataset))

100.0%
100.0%
100.0%
100.0%

number of training data :  60000
number of test data :  10000





In [11]:
VALIDATION_RATE = 0.2
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_rawdata)), # X index 번호
    train_rawdata.targets, # y
    stratify=train_rawdata.targets, # 균등분포
    test_size=VALIDATION_RATE # test dataset 비율
)

In [12]:
train_dataset = Subset(train_rawdata, train_indices)
validation_dataset = Subset(train_rawdata, val_indices)

In [13]:
print (len(train_dataset), len(validation_dataset), len(test_dataset))

48000 12000 10000


In [14]:
minibatch_size = 128 # Mini-batch 사이즈는 128 로 설정
# create batches
train_batches = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
val_batches = DataLoader(validation_dataset, batch_size=minibatch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=True)

In [15]:
loss_func = nn.NLLLoss() # log softmax 는 NLLLoss() 로 진행해야 함
optimizer = torch.optim.Adam(model.parameters()) # Adam, learning rate 필요없음

In [16]:
def train_model(model, early_stop, n_epochs, progress_interval):

    train_losses, valid_losses, lowest_loss = [],[],np.inf

    for epoch in range(n_epochs):
        model.train()
        for x,y in train_batches:
            x = x.reshape(-1,sequence_length, feature_size)
            y_pred = model(x)
            loss = loss_func(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())


        model.eval()
        with torch.no_grad():
            for x,y in val_batches:
                x = x.reshape(-1,sequence_length, feature_size)
                y_pred = model(x)
                loss = loss_func(y_pred, y)
                valid_losses.append(loss.item())

        if valid_losses[-1]<lowest_loss:
            lowest_loss = valid_losses[-1]
            lowest_epoch = epoch
            best_model = deepcopy(model.state_dict())
        else:
            if early_stop >0 and lowest_epoch+early_stop <epoch:
                print('early stopped', epoch, 'epochs')
                model.load_state_dict(best_model)
                break

            if (epoch % progress_interval)==0:
                print(train_losses[-1], valid_losses[-1], lowest_loss, lowest_epoch, epoch)

    model.load_state_dict(best_model)
    return model, lowest_loss, train_losses, valid_losses

In [17]:
nb_epochs = 100
progress_interval = 3
early_stop = 30

model, lowest_loss, train_losses, valid_losses = train_model(model, early_stop, nb_epochs, progress_interval)

0.13185760378837585 0.23740315437316895 0.13335460424423218 3 6
0.16851583123207092 0.19397228956222534 0.028506798669695854 8 9
0.10461896657943726 0.08502926677465439 0.028506798669695854 8 12
0.15423429012298584 0.25985845923423767 0.028506798669695854 8 15
0.1602725088596344 0.18484580516815186 0.026772739365696907 18 21
0.023635968565940857 0.17230403423309326 0.026772739365696907 18 24
0.1337500363588333 0.2515651285648346 0.026772739365696907 18 27
0.08454056084156036 0.19280236959457397 0.026772739365696907 18 30


KeyboardInterrupt: 

In [None]:
test_loss = 0
correct = 0
model.eval()

wrong_samples, wrong_pred, actual_preds = list(), list(), list()

model.eval()
with torch.no_grad():
    for x,y in test_batches:
        y_test_pred = model(x)
        loss =loss_func(y_test_pred, y)
        test_loss+=loss_func(y_test_pred,y)
        pred = torch.argmax(y_test_pred,dim=1)
        correct +=pred.eq(y.view_as(pred)).sum().item()

        wrong_idx = (pred !=y.view_as(pred)).nonzero()[:, 0]
        for i in wrong_idx:
            wrong_samples.append(x[i])
            wrong_pred.append(pred[i])
            actual_preds.append(y.view_as(pred)[i])

test_loss/=len(test_batches.dataset )
print('\nTest set: Average loss: {:.4f}, Accuracy : {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_batches.dataset),100.*correct/len(test_batches.dataset)))

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

plt.figure(figsize=(18,20))

for index in range(len(wrong_samples)):
    plt.subplot(10,10,index+1)
    plt.axis('off')
    plt.imshow(wrong_samples[index].numpy().reshape(28,28), cmap='gray')
    plt.title('pred'+ str(wrong_pred[index].item())+ "("+str(actual_preds[index].item())+")", color='red')