In [20]:
import torch 
import torchvision 
#import torch.utils.onnx  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


device(type='cpu')

In [21]:
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

In [22]:
print(train_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()


In [23]:
print(test_data)


Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [24]:
print(train_data.data.size())


torch.Size([60000, 28, 28])


In [25]:
print(train_data.targets.size())

torch.Size([60000])


In [26]:
#plot one train data
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use('tkAgg')
fig = plt.figure()
fig.set_tight_layout(True)

plt.imshow(train_data.data[0], cmap='gray')
plt.title('%i' % train_data.targets[0])

plt.show()
#print(plt.show())

In [27]:
#plot multiple train_data
figure  = plt.figure(figsize=(10,8))
cols, rows = 5,5 
for i in range(1, cols * rows +1):
    sample_idx = torch.randint(len(train_data), size=(1,) ).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i )
    plt.title(label)
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

In [28]:
#prepare data for training with DataLoaders
# use function to get data been shuffled to prevent overfitting

from torch.utils.data import DataLoader

loaders = { 
    'train' :torch.utils.data.DataLoader(train_data, batch_size=100, 
                                         shuffle= True, num_workers=1),
    'test': torch.utils.data.DataLoader(test_data,batch_size = 100,
                                       shuffle=True, num_workers=1),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x1f7aface640>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x1f7aface670>}

In [29]:
#define the cnn model 
#use 2 fully convolutional layers
#relu activation function and Mappooling
import torch.nn as nn 
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output, x    # return x for visualization
    
    # The forward() pass defines the way we compute ouroutput using the given 
    #layers and functions

    


In [30]:
cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)


In [31]:
#defile a loss function 
loss_func = nn.CrossEntropyLoss()
loss_func


CrossEntropyLoss()

In [32]:
from torch import optim 

optimizer = optim.Adam(cnn.parameters(), lr = 0.01)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)

In [33]:
from torch.autograd import Variable
num_epochs = 10
def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    # Train the model
    total_step = len(loaders['train'])
        
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
            output = cnn(b_x)[0]               
            loss = loss_func(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        
            pass
        pass
train(num_epochs, cnn, loaders)
        
    

Epoch [1/10], Step [100/600], Loss: 0.1159
Epoch [1/10], Step [200/600], Loss: 0.0285
Epoch [1/10], Step [300/600], Loss: 0.0582
Epoch [1/10], Step [400/600], Loss: 0.1190
Epoch [1/10], Step [500/600], Loss: 0.1105
Epoch [1/10], Step [600/600], Loss: 0.1339
Epoch [2/10], Step [100/600], Loss: 0.0686
Epoch [2/10], Step [200/600], Loss: 0.0888
Epoch [2/10], Step [300/600], Loss: 0.0309
Epoch [2/10], Step [400/600], Loss: 0.0648
Epoch [2/10], Step [500/600], Loss: 0.0609
Epoch [2/10], Step [600/600], Loss: 0.0552
Epoch [3/10], Step [100/600], Loss: 0.0062
Epoch [3/10], Step [200/600], Loss: 0.0021
Epoch [3/10], Step [300/600], Loss: 0.0378
Epoch [3/10], Step [400/600], Loss: 0.0845
Epoch [3/10], Step [500/600], Loss: 0.1361
Epoch [3/10], Step [600/600], Loss: 0.0077
Epoch [4/10], Step [100/600], Loss: 0.0039
Epoch [4/10], Step [200/600], Loss: 0.0013
Epoch [4/10], Step [300/600], Loss: 0.0084
Epoch [4/10], Step [400/600], Loss: 0.1537
Epoch [4/10], Step [500/600], Loss: 0.0484
Epoch [4/10

In [34]:
def test():
    # Test the model
    cnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass
    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    
    pass
test()

Test Accuracy of the model on the 10000 test images: 0.98


In [35]:
sample = next(iter(loaders['test']))
imgs, lbls = sample



In [36]:
actual_number  = lbls[:10].numpy()
print(actual_number)

[2 1 6 3 7 9 9 9 3 0]


In [37]:
test_output, last_layer = cnn(imgs[:10])
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(f'Prediction number: {pred_y}')
print(f'Actual number: {actual_number}')

Prediction number: [2 1 6 3 7 9 9 9 3 0]
Actual number: [2 1 6 3 7 9 9 9 3 0]


In [38]:
list(cnn.parameters())[0].shape
# this funtion is being the imput kernal's shape 

torch.Size([16, 1, 5, 5])

In [39]:
import torch.onnx
from torch import nn 
import onnx 

dummy_input = torch.randn(1,1,28,28)
torch.onnx.export(cnn, dummy_input, "mnist1.proto", verbose=True)

graph(%input.1 : Float(1:784, 1:784, 28:28, 28:1),
      %conv1.0.weight : Float(16:25, 1:25, 5:5, 5:1),
      %conv1.0.bias : Float(16:1),
      %conv2.0.weight : Float(32:400, 16:25, 5:5, 5:1),
      %conv2.0.bias : Float(32:1),
      %out.weight : Float(10:1568, 1568:1),
      %out.bias : Float(10:1),
      %22 : Long(1:1)):
  %7 : Float(1:12544, 16:784, 28:28, 28:1) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[5, 5], pads=[2, 2, 2, 2], strides=[1, 1]](%input.1, %conv1.0.weight, %conv1.0.bias) # E:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py:415:0
  %8 : Float(1:12544, 16:784, 28:28, 28:1) = onnx::Relu(%7) # E:\anaconda\envs\pytorch\lib\site-packages\torch\nn\functional.py:1119:0
  %9 : Float(1:3136, 16:196, 14:14, 14:1) = onnx::MaxPool[kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]](%8) # E:\anaconda\envs\pytorch\lib\site-packages\torch\nn\functional.py:575:0
  %10 : Float(1:6272, 32:196, 14:14, 14:1) = onnx::Conv[dilations=[1, 1], group=1, ker