In [1]:
import torch as tch
from torch import autograd as agd
import torch.nn as tchnn
import torch.nn.functional as F
import torch.optim as optm
from torchvision import transforms, utils
import torchvision
import os
import struct
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import PIL

In [2]:
#my implementation of model
class LeNet5(tchnn.Module):
    
    def __init__(self):
        super(LeNet5, self).__init__()
        self.C1 = tchnn.Conv2d(1,6,5) #in-channel, out-channel, kernel-size
        self.C3 = tchnn.Conv2d(6,16,5)
        self.C5 = tchnn.Conv2d(16,120,5)
        self.F6 = tchnn.Linear(120,84)
        self.Ou = tchnn.Linear(84,10)
    
    def forward(self, x):
        #print('dbg ', x.size())
        x = F.max_pool2d(F.relu(self.C1(x)), 2) #till S2
        #print('dbg ', x.size())
        x = F.max_pool2d(F.relu(self.C3(x)), 2) #till S4
        #print('dbg ', x.size())
        x = F.relu(self.C5(x)) #till C5
        x = x.view(-1, 120)
        #print('dbg ', x.size())
        x = F.relu(self.F6(x))
        #print('dbg ', x.size())
        x = F.relu(self.Ou(x))
        #print('dbg ', x.size())
        return x
        
    def name(self):
        return 'LeNet5'

In [3]:
batchsz = 128
train_loader = tch.utils.data.DataLoader(
        torchvision.datasets.MNIST('./MNIST_data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.Pad(2, 0),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
batch_size=batchsz, shuffle=True, num_workers=1)

In [4]:
test_loader = tch.utils.data.DataLoader(
    torchvision.datasets.MNIST('./MNIST_data', train=False, transform=transforms.Compose([
                        transforms.Pad(2, 0),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
batch_size=batchsz, shuffle=True, num_workers=1)

In [18]:
model = LeNet5().cuda()
optimizer = optm.SGD(model.parameters(), lr=0.0005, momentum=0.05)

In [19]:
for epoch in range(100):
    #training
    for batch_idx, (X,labels) in enumerate(train_loader):
        optimizer.zero_grad()
        labels,X = agd.Variable(labels.cuda()),agd.Variable(X.cuda())
        #labels = tch.squeeze(labels)
        #print(batch_idx, X.size())
        outp = model(X)
        #print(list(model.parameters()))
        loss = F.cross_entropy(outp, labels)
        #print(loss)
        loss.backward()
        optimizer.step()
        #print(model.parameters())
        if batch_idx % 100 == 0:
            print('==>>> epoch: {}, batch index: {}, train loss: {:.8f}'.format(epoch, batch_idx, loss.data[0]))
    #training
    test_loss = 0
    correct_cnt = 0
    for X,labels in test_loader:
        X,labels = agd.Variable(X.cuda(), volatile=True), agd.Variable(labels.cuda())
        outp = model(X)
        test_loss += F.cross_entropy(outp, labels)
        pred = outp.data.max(1)[1]
        correct_cnt += pred.eq(labels.data).cpu().sum()
    test_loss /= len(test_loader.dataset)
    print('<<<== Test set: Average loss: {}, Accuracy: {}/{} ({}%)'.format(
        test_loss, correct_cnt, len(test_loader.dataset), 100*correct_cnt/len(test_loader.dataset)))

==>>> epoch: 0, batch index: 0, train loss: 2.29553413
==>>> epoch: 0, batch index: 100, train loss: 2.30281687
==>>> epoch: 0, batch index: 200, train loss: 2.30062294
==>>> epoch: 0, batch index: 300, train loss: 2.29678535
==>>> epoch: 0, batch index: 400, train loss: 2.29932880
<<<== Test set: Average loss: Variable containing:
1.00000e-02 *
  1.8148
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 1666/10000 (16.66%)
==>>> epoch: 1, batch index: 0, train loss: 2.30012178
==>>> epoch: 1, batch index: 100, train loss: 2.29744053
==>>> epoch: 1, batch index: 200, train loss: 2.29436159
==>>> epoch: 1, batch index: 300, train loss: 2.29173779
==>>> epoch: 1, batch index: 400, train loss: 2.29509974
<<<== Test set: Average loss: Variable containing:
1.00000e-02 *
  1.8114
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 2216/10000 (22.16%)
==>>> epoch: 2, batch index: 0, train loss: 2.29656744
==>>> epoch: 2, batch index: 100, train loss: 2.29537535
==>>> epoch: 2, batch in

==>>> epoch: 19, batch index: 0, train loss: 1.17133677
==>>> epoch: 19, batch index: 100, train loss: 1.27188420
==>>> epoch: 19, batch index: 200, train loss: 1.01359987
==>>> epoch: 19, batch index: 300, train loss: 0.81463075
==>>> epoch: 19, batch index: 400, train loss: 1.07376921
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  7.4734
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7066/10000 (70.66%)
==>>> epoch: 20, batch index: 0, train loss: 1.24088264
==>>> epoch: 20, batch index: 100, train loss: 1.03524995
==>>> epoch: 20, batch index: 200, train loss: 0.87988728
==>>> epoch: 20, batch index: 300, train loss: 0.88545340
==>>> epoch: 20, batch index: 400, train loss: 1.12141740
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  7.1889
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7118/10000 (71.18%)
==>>> epoch: 21, batch index: 0, train loss: 1.07298028
==>>> epoch: 21, batch index: 100, train loss: 0.97629297
==>>> epoch:

==>>> epoch: 38, batch index: 0, train loss: 0.60495996
==>>> epoch: 38, batch index: 100, train loss: 0.76035082
==>>> epoch: 38, batch index: 200, train loss: 0.80146700
==>>> epoch: 38, batch index: 300, train loss: 0.71467310
==>>> epoch: 38, batch index: 400, train loss: 0.66587412
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  5.4195
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7565/10000 (75.65%)
==>>> epoch: 39, batch index: 0, train loss: 0.69984657
==>>> epoch: 39, batch index: 100, train loss: 0.55718207
==>>> epoch: 39, batch index: 200, train loss: 0.62172991
==>>> epoch: 39, batch index: 300, train loss: 0.69468552
==>>> epoch: 39, batch index: 400, train loss: 0.68859756
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  5.3716
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7574/10000 (75.74%)
==>>> epoch: 40, batch index: 0, train loss: 0.65920061
==>>> epoch: 40, batch index: 100, train loss: 0.80376840
==>>> epoch:

==>>> epoch: 57, batch index: 0, train loss: 0.55279261
==>>> epoch: 57, batch index: 100, train loss: 0.63033247
==>>> epoch: 57, batch index: 200, train loss: 0.58587605
==>>> epoch: 57, batch index: 300, train loss: 0.45840245
==>>> epoch: 57, batch index: 400, train loss: 0.56431442
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.7957
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7730/10000 (77.3%)
==>>> epoch: 58, batch index: 0, train loss: 0.51662660
==>>> epoch: 58, batch index: 100, train loss: 0.66591400
==>>> epoch: 58, batch index: 200, train loss: 0.55028504
==>>> epoch: 58, batch index: 300, train loss: 0.69581330
==>>> epoch: 58, batch index: 400, train loss: 0.66050106
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.7652
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7737/10000 (77.37%)
==>>> epoch: 59, batch index: 0, train loss: 0.64233869
==>>> epoch: 59, batch index: 100, train loss: 0.49585074
==>>> epoch: 

==>>> epoch: 76, batch index: 0, train loss: 0.59664041
==>>> epoch: 76, batch index: 100, train loss: 0.54842603
==>>> epoch: 76, batch index: 200, train loss: 0.64124244
==>>> epoch: 76, batch index: 300, train loss: 0.69430453
==>>> epoch: 76, batch index: 400, train loss: 0.60825276
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.5081
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7841/10000 (78.41%)
==>>> epoch: 77, batch index: 0, train loss: 0.49424100
==>>> epoch: 77, batch index: 100, train loss: 0.55460978
==>>> epoch: 77, batch index: 200, train loss: 0.75529820
==>>> epoch: 77, batch index: 300, train loss: 0.66572672
==>>> epoch: 77, batch index: 400, train loss: 0.69934219
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.5198
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7844/10000 (78.44%)
==>>> epoch: 78, batch index: 0, train loss: 0.52590072
==>>> epoch: 78, batch index: 100, train loss: 0.58796304
==>>> epoch:

==>>> epoch: 95, batch index: 0, train loss: 0.54681647
==>>> epoch: 95, batch index: 100, train loss: 0.64181435
==>>> epoch: 95, batch index: 200, train loss: 0.56175405
==>>> epoch: 95, batch index: 300, train loss: 0.67812055
==>>> epoch: 95, batch index: 400, train loss: 0.84262866
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.2736
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7907/10000 (79.07%)
==>>> epoch: 96, batch index: 0, train loss: 0.60491627
==>>> epoch: 96, batch index: 100, train loss: 0.64590323
==>>> epoch: 96, batch index: 200, train loss: 0.66270703
==>>> epoch: 96, batch index: 300, train loss: 0.54863358
==>>> epoch: 96, batch index: 400, train loss: 0.59938598
<<<== Test set: Average loss: Variable containing:
1.00000e-03 *
  4.2966
[torch.cuda.FloatTensor of size 1 (GPU 0)]
, Accuracy: 7909/10000 (79.09%)
==>>> epoch: 97, batch index: 0, train loss: 0.45623210
==>>> epoch: 97, batch index: 100, train loss: 0.70175940
==>>> epoch: