In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from copy import deepcopy

from unicodedata import bidirectional



In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1)
if device =='cuda':
    torch.cuda.manual_seed_all(1)
print(device)

cuda


In [3]:
sequence_length = 28 # MNIST row 를 일종의 순서(sequence) 로 다룸
feature_size = 28 # 입력 차원
hidden_size = 128 # Hidden Layer 사이즈 설정처럼 설정
num_layers = 4 # stacked RNN (최대 4개까지는 Gradient Vanishing 현상이 적을 수 있으므로)
dropout_p = 0.2 # dropout rate
output_size = 10 # 0 ~ 9 숫자 부류(클래스)
minibatch_size = 128 # minibatch_size

In [4]:
train = datasets.MNIST(root='dataset',
                       train=True,
                       download=True,
                       transform=transforms.ToTensor()
                       )

test = datasets.MNIST(
    root='dataset',
    train=False,
    download=True,
    transform=transforms.ToTensor()
)
print('훈련 데이터 길이 : ', len(train))
print('테스트 데이터 크기:', len(test))

훈련 데이터 길이 :  60000
테스트 데이터 크기: 10000


In [5]:
train_indices, valid_indices, _, _ = train_test_split(
    range(len(train)),
    train.targets,
    stratify=train.targets,
    test_size = 0.2
)

train = Subset(train, train_indices)
valid = Subset(train, valid_indices)

minibatch_size = 128
train_batches = DataLoader(train, batch_size=minibatch_size, shuffle=True)
val_batches = DataLoader(valid, batch_size=minibatch_size, shuffle=True)
test_batches = DataLoader(test, batch_size=minibatch_size, shuffle=True)

In [6]:
import torch
import torch.nn as nn

In [7]:
class Net(nn.Module):
    def __init__(self, feature_size, hidden_size, num_layers, dropout_p, output_size, model_type):
        super().__init__()
        if model_type == 'rnn':
            self.go = nn.RNN(
               input_size = feature_size,
                hidden_size = hidden_size,
                num_layers = num_layers,
                batch_first = True,
                dropout = dropout_p,
                bidirectional = True
            )
        elif model_type=='lstm':
            self.go = nn.LSTM(
                input_size = feature_size,
                hidden_size= hidden_size,
                num_layers = num_layers,
                batch_first=True,
                dropout=dropout_p,
                bidirectional=True
            )
        self.go2 = nn.Sequential(
            nn.LeakyReLU(0.1),
            nn.BatchNorm1d(hidden_size*2),
            nn.Linear(hidden_size*2, output_size),
            nn.LogSoftmax(dim=-1)
        )

    def forward(self,x):
        out, _ = self.go(x)
        out = out[:, -1]
        y= self.go2(out)
        return y

model = Net(feature_size, hidden_size, num_layers, dropout_p, output_size, 'lstm').to(device)

loss_func = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())

In [8]:
def train_model(model, early, n_epochs, progress_interval):
    train_losses, valid_losses, lowest_loss = [], [], np.inf

    for epoch in range(n_epochs):
        train_loss , valid_loss = 0,0

        model.train()
        for x,y  in train_batches:
            x = x.reshape(-1,sequence_length, feature_size).to(device)
            y= y.to(device)
            y_pred = model(x)
            loss = loss_func(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss+=loss.item()

        train_loss/=len(train_batches)
        train_losses.append(train_loss)
        model.eval()
        with torch.no_grad():
            for x, y in val_batches:
                x = x.reshape(-1,sequence_length, feature_size)
                x = x.to(device)
                y= y.to(device)
                y_pred = model(x)
                loss = loss_func(y_pred, y)
                valid_loss+=loss.item()

        valid_loss = valid_loss/len(val_batches)
        valid_losses.append(valid_loss)

        if valid_losses[-1] < lowest_loss:
            lowest_loss = valid_losses[-1]
            lowest_epoch = epoch
            best_model = deepcopy(model.state_dict())
        else:
            if(early>0) and lowest_epoch + early<epoch:
                print('early stopped')
                break
        if (epoch%progress_interval==0):
            print(train_losses[-1], valid_loss[-1])

    model.load_state_dict(best_model)
    return model, lowest_loss, train_losses, valid_losses


nb_epochs = 100
progress_interval = 3
early_stop = 30

model, lowest_loss, train_losses, valid_losses = train_model(model, early_stop, nb_epochs, progress_interval)

IndexError: list index out of range