In [1]:
import numpy as np

import torch
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

from mnist import MNIST

In [2]:
# GPU 설정 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(2022)
if device == 'cuda':
    torch.cuda.manual_seed_all(2022)
    
print(device)

cpu


## Data

In [3]:
mnist = MNIST('../../Deeplearning_dataset/MNIST/raw')

x_train, y_train = mnist.load_training()
x_test, y_test = mnist.load_testing()

# list -> array
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

print(f"shape of x_train: {x_train.shape}, shape of y_train: {y_train.shape}")
print(f"shape of x_test: {x_test.shape} shape of y_test: {y_test.shape}")

shape of x_train: (60000, 784), shape of y_train: (60000,)
shape of x_test: (10000, 784) shape of y_test: (10000,)


In [4]:
# Tensor로 변환후 x data와 y data 합치기
train_data = data_utils.TensorDataset(torch.FloatTensor(x_train), torch.FloatTensor(y_train))
test_data = data_utils.TensorDataset(torch.FloatTensor(x_test), torch.FloatTensor(y_test))

batch_size = 1000

trainloader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
testloader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False, drop_last=False)

## Model

In [5]:
class dummy_CNN(torch.nn.Module):
    def __init__(self):
        super(dummy_CNN, self).__init__()
        
        self.layer1 = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1, 1, 1),
                                         torch.nn.BatchNorm2d(64),
                                         torch.nn.ReLU(),
                                         torch.nn.MaxPool2d(2))
        
        self.layer2 = torch.nn.Sequential(torch.nn.Conv2d(64, 128, 5, 2, 0),
                                         torch.nn.BatchNorm2d(128),
                                         torch.nn.ReLU(),
                                         torch.nn.MaxPool2d(2))
    
    def forward(self, x):
        layer1_output = self.layer1(x)
        layer2_output = self.layer2(layer1_output)
        return layer2_output

In [6]:
dummy_data = torch.Tensor(1000, 1, 28, 28).to(device)
dummy_model  = dummy_CNN().to(device)

dummy_output = dummy_model(dummy_data)
dummy_output.shape

torch.Size([1000, 128, 3, 3])

In [7]:
dummy_output.view(batch_size, -1).shape

torch.Size([1000, 1152])

In [8]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layer1 = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1, 1, 1),
                                         torch.nn.BatchNorm2d(64),
                                         torch.nn.ReLU(),
                                         torch.nn.MaxPool2d(2))
        
        self.layer2 = torch.nn.Sequential(torch.nn.Conv2d(64, 128, 5, 2, 0),
                                         torch.nn.BatchNorm2d(128),
                                         torch.nn.ReLU(),
                                         torch.nn.MaxPool2d(2))
        
        self.fc = torch.nn.Linear(dummy_output.view(batch_size, -1).shape[-1], 10)
        
    def forward(self, x):
        layer1_output = self.layer1(x)
        layer2_output = self.layer2(layer1_output)
        
        flatten = layer2_output.view(layer2_output.shape[0], -1) # fully-connected layer에 입력하기 위해서 일렬로 펴주는 기능
        
        output = self.fc(flatten)
        
        return output

In [9]:
model = CNN().to(device)

In [10]:
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=1152, out_features=10, bias=True)
)

## Hyper parameter

In [11]:
num_epoch = 15
learning_rate = 0.001
weight_decay = 1e-5

## Loss Function, Optimizer

In [12]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training, Validation

In [13]:
total_batch = len(trainloader) # 전체 mini batch 개수
val_total_batch = len(testloader)

for epoch in range(1, 1+num_epoch):
    # Train
    model.train()
    avg_loss = 0
    correct = 0
    num_total_data = 0
    
    for batch_idx, (images, labels) in enumerate(trainloader):
        X = images.to(device)
        X = X.reshape(batch_size, 1, 28, 28)
        
        Y = labels.to(device)
        
        optimizer.zero_grad() # graident 초기화
        
        prediction = model(X)
        loss = loss_function(prediction, Y.long())
        
        loss.backward() # backpropgation
        optimizer.step() # update weight
        
        avg_loss += loss.item()/total_batch
        
        prediction_softmax =  F.softmax(prediction, dim=1)
        prediction_class = torch.argmax(prediction_softmax, dim=1)
        
        correct += (prediction_class == Y).sum().item()
        num_total_data += len(labels)
    
    # Validation
    with torch.no_grad():
        model.eval()
        val_avg_loss = 0
        val_correct = 0
        val_num_total_data = 0
        
        for batch_idx, (val_images, val_labels) in enumerate(testloader):
            val_X = val_images.to(device)
            val_X = val_X.reshape(batch_size, 1, 28, 28)
            val_Y = val_labels.to(device)
            
            val_prediction = model(val_X)
            val_loss = loss_function(val_prediction, val_Y.long())
            val_avg_loss += val_loss.item()/val_total_batch
            
            val_prediction_softmax = F.softmax(val_prediction, dim=1)
            val_prediction_class = torch.argmax(val_prediction_softmax, dim=1)
            
            val_correct += (val_prediction_class == val_Y).sum().item()
            val_num_total_data += len(val_labels)
            
    
        
    print(f"Epoch [{epoch}] train_loss: {avg_loss:.4f}, train_accuracy: {correct/num_total_data*100:.2f}%", end=' / ')
    print(f"val_loss: {val_avg_loss:.4f}, val_accuracy: {val_correct/val_num_total_data*100:.2f}%")

Epoch [1] train_loss: 0.6427, train_accuracy: 83.93% / val_loss: 0.2408, val_accuracy: 93.75%
Epoch [2] train_loss: 0.1983, train_accuracy: 94.97% / val_loss: 0.1561, val_accuracy: 95.92%
Epoch [3] train_loss: 0.1409, train_accuracy: 96.33% / val_loss: 0.1191, val_accuracy: 96.56%
Epoch [4] train_loss: 0.1127, train_accuracy: 97.08% / val_loss: 0.1002, val_accuracy: 97.28%
Epoch [5] train_loss: 0.0966, train_accuracy: 97.46% / val_loss: 0.0949, val_accuracy: 97.19%
Epoch [6] train_loss: 0.0857, train_accuracy: 97.67% / val_loss: 0.0776, val_accuracy: 97.79%
Epoch [7] train_loss: 0.0756, train_accuracy: 97.99% / val_loss: 0.0715, val_accuracy: 97.90%
Epoch [8] train_loss: 0.0692, train_accuracy: 98.10% / val_loss: 0.0663, val_accuracy: 98.00%
Epoch [9] train_loss: 0.0630, train_accuracy: 98.31% / val_loss: 0.0661, val_accuracy: 97.97%
Epoch [10] train_loss: 0.0577, train_accuracy: 98.40% / val_loss: 0.0613, val_accuracy: 98.10%
Epoch [11] train_loss: 0.0545, train_accuracy: 98.52% / val

In [14]:
# 가중치 저장 하고 불러오기
## torch.save(model.state_dict(), PATH)
## model = CNN().to(device)
## model.load_state_dict(torch.load(PATH, map_location=device))