# MNIST + AlexNet

## layer 1

![](./layer1.png)

## layer 2

![](./layer2.png)

## layer 3

![](./layer3.png)

## layer 4

![](./layer4.png)

## layer 5

![](./layer5.png)

## layer 6

 ![](./layer6.png)

## layer 7

![](./layer7.png)

## layer 8

![](./layer8.png)

## load package

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import time

## load data

In [2]:
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, ), (0.1, ))])

train_set = dset.MNIST(root='./training', train=True, transform=trans, download=False)
test_set = dset.MNIST(root='./test', train=False, transform=trans, download=False)

In [3]:
batch_size = 128

train_loader = torch.utils.data.DataLoader(
                dataset = train_set,
                batch_size = batch_size)

test_loader = torch.utils.data.DataLoader(
                dataset = test_set,
                batch_size = batch_size)

## AlexNet

In [4]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10): # mnist 0~9
        super().__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=4, stride=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, groups=2, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1),
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=384, padding=1, kernel_size=3),
            nn.ReLU(inplace=True)
        )
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=384, out_channels=384, padding=1, kernel_size=3),
            nn.ReLU(inplace=True)
        )
        
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=384, out_channels=256, padding=1, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1)
        )
        
        self.layer6 = nn.Sequential(
            nn.Linear(in_features=2*2*256, out_features=786),
            nn.ReLU(inplace=True),
            nn.Dropout()
        )
        
        self.layer7 = nn.Sequential(
            nn.Linear(in_features=786, out_features=786),
            nn.ReLU(inplace=True),
            nn.Dropout()
        )
        
        self.layer8 = nn.Linear(in_features=786, out_features=num_classes)
        
        self.ceriation = nn.CrossEntropyLoss()
    
    def forward(self, x, target):
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        
        x = x.view(-1, 2*2*256)
        
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.layer8(x)
        
        loss = self.ceriation(x, target)
        
        return x, loss

## optimizer

In [5]:
model = AlexNet().cpu()
optimizer = optim.SGD(model.parameters(), lr=0.01)

## training and testing

In [6]:
init_time = time.time()
for epoch in range(10):
    # training
    for batch_idx, (x, target) in enumerate(train_loader):
        optimizer.zero_grad()
        x, target = Variable(x), Variable(target)
        _, loss = model(x, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print ('==>>> epoch: {}, batch index: {}, train loss: {:.6f}, time: {}'.format(epoch, batch_idx, loss.data[0], time.time()-init_time))
           
    # testing
    correct_cnt, ave_loss = 0, 0
    for batch_idx, (x, target) in enumerate(test_loader):
        x, target = Variable(x, volatile=True), Variable(target, volatile=True)
        score, loss = model(x, target)
        _, pred_label = torch.max(score.data, 1)
        correct_cnt += (pred_label == target.data).sum()
        ave_loss += loss.data[0]
    
    accuracy = correct_cnt / len(test_loader) / batch_size
    ave_loss /= len(test_loader)
    print (' epoch: {}, test loss: {:.6f}, accuracy: {:.4f}, time: {}'.format(epoch, ave_loss, accuracy, time.time()-init_time))
      

==>>> epoch: 0, batch index: 0, train loss: 2.303234, time: 2.438838243484497
==>>> epoch: 0, batch index: 100, train loss: 2.300382, time: 243.31529092788696
==>>> epoch: 0, batch index: 200, train loss: 2.291657, time: 483.15034794807434
==>>> epoch: 0, batch index: 300, train loss: 2.280793, time: 723.2750382423401
==>>> epoch: 0, batch index: 400, train loss: 2.270288, time: 962.1734411716461
 epoch: 0, test loss: 2.217790, accuracy: 0.3020, time: 1224.6048822402954
==>>> epoch: 1, batch index: 0, train loss: 2.210861, time: 1227.0847890377045
==>>> epoch: 1, batch index: 100, train loss: 1.953311, time: 1466.4758858680725
==>>> epoch: 1, batch index: 200, train loss: 1.420916, time: 1706.6096358299255
==>>> epoch: 1, batch index: 300, train loss: 1.562169, time: 1946.3682956695557
==>>> epoch: 1, batch index: 400, train loss: 0.841147, time: 2183.8771257400513
 epoch: 1, test loss: 1.139876, accuracy: 0.5800, time: 2446.0116233825684
==>>> epoch: 2, batch index: 0, train loss: 1.0