In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from copy import deepcopy

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)
print(device)

cuda


In [3]:
conv1 = nn.Conv2d(1,1,3, padding=1) # in , out channel  필터크기 패딩순
input1 = torch.Tensor(1,1,5,5) #배치 채널 행 열 이엇나
out1 = conv1(input1)
out1.shape

torch.Size([1, 1, 5, 5])

In [4]:
out2 = nn.MaxPool2d(2)(out1)
out2.shape

torch.Size([1, 1, 2, 2])

In [5]:
conv1 = nn.Sequential(
    nn.Conv2d(1,32,3,1,1), #입력채널 1 출력채널 32 라
    nn.ReLU(),
    nn.BatchNorm2d(32), #여기서 32로 받고
    nn.MaxPool2d(2)
)
input1 = torch.Tensor(1,1,28,28) # 1 32 14 14
out1 = conv1(input1)
out1.shape

torch.Size([1, 32, 14, 14])

In [6]:
conv1 = nn.Sequential(
    nn.Conv2d(1,32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(2),

    nn.Conv2d(32,64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(2),
    nn.Conv2d(64, 128, 3, 1, 1),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(2)
)
input1 = torch.Tensor(1,1,28,28)
out1 = conv1(input1)
flat_out1 = out1.view(out1.size(0),-1)
print(out1.shape, flat_out1.shape)

torch.Size([1, 128, 3, 3]) torch.Size([1, 1152])


In [7]:
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv_layers = nn.Sequential(
           nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )

        self.linear_layers = nn.Sequential (
            nn.Linear(3 * 3 * 128, 128),
            nn.LeakyReLU(0.1),
            nn.BatchNorm1d(128), # Linear Layer 이므로, BatchNorm1d() 사용해야 함
            nn.Linear(128, 64),
            nn.LeakyReLU(0.1),
            nn.BatchNorm1d(64), # Linear Layer 이므로, BatchNorm1d() 사용해야 함
            nn.Linear(64, 10),
            nn.LogSoftmax(dim=-1)
        )

    def forward(self,x):
       x1 = self.conv_layers(x)
       flat_x = x1.view(x1.size(0),-1)
       return self.linear_layers(flat_x)



In [8]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from copy import deepcopy

In [9]:
train_rawdata = datasets.MNIST(root = 'dataset',
                            train=True,
                            download=True,
                            transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root = 'dataset',
                            train=False,
                            download=True,
                            transform=transforms.ToTensor())
print('number of training data : ', len(train_rawdata))
print('number of test data : ', len(test_dataset))

number of training data :  60000
number of test data :  10000


In [10]:
train_idx, val_idx, _, _ = train_test_split(
    range(len(train_rawdata)),
    train_rawdata.targets,
    stratify=train_rawdata.targets,
    test_size=0.2
)


In [11]:
train = Subset(train_rawdata, train_idx)
val = Subset(train_rawdata, val_idx)

In [12]:
print(len(train), len(val))

48000 12000


In [13]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: dataset
    Split: Test
    StandardTransform
Transform: ToTensor()

In [14]:
batch_size = 128
train_batches = DataLoader(train, 128, True)
val_batches = DataLoader(val, 128, True)
test_batches = DataLoader(test_dataset, 128, False)

In [15]:
model = CNNModel()
model

CNNModel(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): LeakyReLU(negative_slope=0.1)
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): LeakyReLU(negative_slope=0.1)
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=1152, out_features=128, bias=True)

In [16]:
loss_func = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())

In [17]:
def train_model(model, early, iter_num, show_interval):
    train_losses, valid_losses, lowes_loss = [], [], np.inf
    lowest_index = 0
    best_model = deepcopy(model.state_dict())
    for i in range(iter_num):

        loss_t, loss_v = 0,0

        model.train()
        for x, y in train_batches:
            y_pred = model(x)
            loss = loss_func(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_t+=loss.item()
        loss_t/=len(train_batches)
        train_losses.append(loss_t)

        model.eval()
        with torch.no_grad():
            for x, y in val_batches:
                y_pred = model(x)
                loss = loss_func(y_pred,y)
                loss_v+=loss.item()

            loss_v/=len(val_batches)
            valid_losses.append(loss_v)

            if valid_losses[-1] < lowes_loss:
                lowes_loss = valid_losses[-1]
                lowest_index = i
                best_model = deepcopy(model.state_dict())
            else:
                if early > 0 and lowest_index+ early < i:
                    print('early stopped')
                    model.load_state_dict(best_model)
                    break

            if i % show_interval ==0:
                print(train_losses[-1], valid_losses[-1])

        model.load_state_dict(best_model)
        return model,lowes_loss, train_losses, valid_losses

In [18]:
model = CNNModel().to(device)
test_loss = 0
correct = 0
wrong_x, wrong_y, actual_y = [],[],[]
model.eval()
with torch.no_grad():
    for x, y  in test_batches:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        test_loss +=loss_func(y_pred,y)
        pred = torch.argmax(y_pred, dim=1)
        correct+=pred.eq(y).sum().item()

        wrong_idx = pred.ne(y).nonzero()[:,0].cpu().numpy().tolist()
        for i in wrong_idx:
            wrong_x.append(x[i].cpu())
            wrong_y.append(pred[i].cpu())
            actual_y.append(y[i].cpu())

test_loss /= len(test_batches)
print('Average Test Loss: {:.4f}'.format(test_loss))
print('Accuracy: {}/{} ({:.2f}%)'.format(correct, len(test_batches.dataset), 100 * correct / len(test_batches.dataset)))

Average Test Loss: 2.3035
Accuracy: 1010/10000 (10.10%)
