# Convolutional Network for MNIST Predictions. 
In this notebook, I will be using a convolutional architecture rather than a fully connected network. The architecture I am adopting is LeNet-5, which has recorded an error rate as low as 0.95 (or %99.05 accurate). 

In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
import numpy as np

import matplotlib.pyplot as plot
import pandas as pd

from collections import OrderedDict

In [2]:
class MNIST(Dataset):
    def __init__(self, csv_path, transform=None):
        self.pixel_frame = pd.read_csv(csv_path)
        self.transform = transform
    
    def __len__(self):
        return len(self.pixel_frame)
    
    def __getitem__(self, index):
        label = self.pixel_frame.iloc[index, 0].to_numpy(dtype='float64')
        data  = self.pixel_frame.iloc[index, 1:].to_numpy(dtype='float64')
        
        if self.transform:
            data = self.transform(data)
        
        return data, label

transform = transforms.Compose([transforms.Normalize((0.5), (0.5)),
                               transforms.ToTensor()])

trainset = MNIST('data/train.csv', transform=transform)
testset = MNIST('data/test.csv', transform=transforms.ToTensor())

trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)
testloader  = DataLoader(testset, batch_size=32, shuffle=False, num_workers=4)

In [3]:
# get 80/20 split for training and validation sets
N = len(trainset)
split = (N - int(np.floor(N*.2)), int(np.floor(N*.2)))
trainset, validset = torch.utils.data.random_split(trainset, split)

In [4]:
trainload = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)
validload = DataLoader(validset, batch_size=32, shuffle=True, num_workers=4)

In [6]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.AvgPool2d(2, stride=2),
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(6, 16, 5),
            nn.ReLU(),
            nn.AvgPool2d(2, stride=2)
        )
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(5*5*16, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU()
        )
        
        self.out = nn.Linear(84, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.fc(x)
        return self.out(x)
        
        
    

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet()
model = model.to(device)

In [None]:
opt = optim.SGD(model.parameters(), lr=0.01)
criteion = nn.CrossEntropyLoss()
epochs = 10

train_loss = []
valid_loss = []

for e in range(epochs):
    running_tl = 0
    running_vl = 0
    
    # backprop and and update
    model.train()
    for images, labels in trainload:
        images, labels = images.to(device), labels.to(device)
        opt.zero_grad()
        t_cel = criterion(model(images), labels)
        t_cel.backward()
        opt.step()

        running_tl += t_cel.item()
        
    with torch.no_grad():
        model.eval()
        for images, labels in validload:
            images, labels = images.to(device), labels.to(device)
            v_cel = criterion(model(images), labels)
            running_vl += v_cel.item()
        
    train_loss.append(running_tl/len(trainload))
    valid_loss.append(running_vl/len(trainload))