In [1]:
!pip install torch torchvision



In [2]:
#Import Libraries


from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np

In [3]:
args={}
kwargs={}
args['batch_size']=128
args['test_batch_size']=1000
args['epochs']=10  #The number of Epochs is the number of times you go through the full dataset. 
args['lr']=0.01 #Learning rate is how fast it will decend. 
args['momentum']=0.5 #SGD momentum (default: 0.5) Momentum is a moving average of our gradients (helps to keep direction).

args['seed']=1 #random seed
args['log_interval']=5000 // args['batch_size']
args['cuda']=False

In [4]:
#load the data
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args['batch_size'], shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args['test_batch_size'], shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [5]:
def leaky_relu(input):
  return torch.max(0.1*input,input)

In [6]:
# group
class Net(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()  #Dropout
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.gn1=nn.GroupNorm(2,10)
        self.gn2=nn.GroupNorm(4,20)
        self.gn3=nn.GroupNorm(2,50)
        
    def forward(self, x):
        #Convolutional Layer/Pooling Layer/Activation
        x=F.max_pool2d(F.relu(self.gn1(self.conv1(x))),2)
        #Convolutional Layer/Dropout/Pooling Layer/Activation
        x=F.max_pool2d(self.conv2_drop(F.relu(self.gn2(self.conv2(x)))),2)
        x = x.view(-1, 320)
        #Fully Connected Layer/Activation
        x = F.relu(self.gn3(self.fc1(x)))
        x = F.dropout(x, training=self.training)
        #Fully Connected Layer/Activation
        x = self.fc2(x)
        #Softmax gets probabilities. 
        return F.log_softmax(x, dim=1)

In [7]:
# batch
# class Net(nn.Module):
#     #This defines the structure of the NN.
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
#         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
#         self.conv2_drop = nn.Dropout2d()  #Dropout
#         self.fc1 = nn.Linear(320, 50)
#         self.fc2 = nn.Linear(50, 10)
#         self.bn1=nn.BatchNorm2d(10)
#         self.bn2=nn.BatchNorm2d(20)
#         self.bn3=nn.BatchNorm1d(50)
        
#     def forward(self, x):
#         #Convolutional Layer/Pooling Layer/Activation
#         x=F.max_pool2d(F.relu(self.bn1(self.conv1(x))),2)
#         #Convolutional Layer/Dropout/Pooling Layer/Activation
#         x=F.max_pool2d(self.conv2_drop(F.relu(self.bn2(self.conv2(x)))),2)
#         x = x.view(-1, 320)
#         #Fully Connected Layer/Activation
#         x = F.relu(self.bn3(self.fc1(x)))
#         x = F.dropout(x, training=self.training)
#         #Fully Connected Layer/Activation
#         x = self.fc2(x)
#         #Softmax gets probabilities. 
#         return F.log_softmax(x, dim=1)

In [8]:
#no batch
# class Net(nn.Module):
#     #This defines the structure of the NN.
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
#         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
#         self.conv2_drop = nn.Dropout2d()  #Dropout
#         self.fc1 = nn.Linear(320, 50)
#         self.fc2 = nn.Linear(50, 10)
        
#     def forward(self, x):
#         #Convolutional Layer/Pooling Layer/Activation
#         x = leaky_relu(F.max_pool2d(self.conv1(x), 2)) 
#         #Convolutional Layer/Dropout/Pooling Layer/Activation
#         x = leaky_relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
#         x = x.view(-1, 320)
#         #Fully Connected Layer/Activation
#         x = leaky_relu(self.fc1(x))
#         x = F.dropout(x, training=self.training)
#         #Fully Connected Layer/Activation
#         x = self.fc2(x)
#         #Softmax gets probabilities. 
#         return F.log_softmax(x, dim=1)

In [9]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args['cuda']:
            data, target = data.cuda(), target.cuda()
        #Variables in Pytorch are differenciable. 
        data, target = Variable(data), Variable(target)
        #This will zero out the gradients for this batch. 
        optimizer.zero_grad()
        output = model(data)
        # Calculate the loss The negative log likelihood loss. It is useful to train a classification problem with C classes.
        loss = F.nll_loss(output, target)
        #dloss/dx for every Variable 
        loss.backward()
        #to do a one-step update on our parameter.
        optimizer.step()
        #Print out the loss periodically. 
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args['cuda']:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [10]:
model = Net()
if args['cuda']:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=args['momentum'])

for epoch in range(1, args['epochs'] + 1):
    train(epoch)
    test()






Test set: Average loss: 0.3187, Accuracy: 9362/10000 (94%)


Test set: Average loss: 0.1541, Accuracy: 9602/10000 (96%)


Test set: Average loss: 0.1116, Accuracy: 9695/10000 (97%)


Test set: Average loss: 0.0894, Accuracy: 9744/10000 (97%)


Test set: Average loss: 0.0788, Accuracy: 9766/10000 (98%)


Test set: Average loss: 0.0724, Accuracy: 9781/10000 (98%)


Test set: Average loss: 0.0683, Accuracy: 9796/10000 (98%)


Test set: Average loss: 0.0600, Accuracy: 9825/10000 (98%)


Test set: Average loss: 0.0582, Accuracy: 9823/10000 (98%)


Test set: Average loss: 0.0549, Accuracy: 9838/10000 (98%)



- batch normalization 추가
- relu -> 다른 활성화함수로 변경
- batch normalization vs group normalization