# Imports, load data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision

from mnist import MNIST

from src.network import CNN, get_output_size_2d, train_CNN

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
path_to_MNIST = '/home/cole/Desktop/other/ML/DATASETS/MNIST'

mndata = MNIST(path_to_MNIST)
X_train, y_train = mndata.load_training()
X_train = np.array(X_train).reshape(-1, 28, 28)
y_train = np.array(y_train)

X_test, y_test = mndata.load_testing()
X_test = np.array(X_test).reshape(-1, 28, 28)
y_test = np.array(y_test)

## Validation data

In [4]:
X_valid = np.zeros((0, 28, 28))
y_valid = []
valid_idx = []
for c in range(10):
    
    c_idx = np.argwhere(y_train == c).squeeze()
    
    v_idx = np.random.choice(c_idx, replace=False, size=1000)
    valid_idx.extend(list(v_idx))
    
    X_valid = np.vstack([X_valid, X_train[v_idx]])
    
y_valid = y_train[valid_idx]

X_train = np.delete(X_train, valid_idx, axis=0)
y_train = np.delete(y_train, valid_idx)

# Set architecture, hyperparameters

## Architecture

In [5]:
# ======================================================
# Convolutional portion
# ======================================================
cv_layers = OrderedDict()

cv_layers['batch_norm_0'] = nn.BatchNorm2d(num_features=1)
cv_layers['conv_0'] = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5)
cv_layers['relu_0'] = nn.ReLU()
cv_layers['max_pool_0'] = nn.MaxPool2d(kernel_size=(2, 2))

cv_layers['batch_norm_1'] = nn.BatchNorm2d(num_features=4)
cv_layers['conv_1'] = nn.Conv2d(in_channels=4, out_channels=5, kernel_size=3)
cv_layers['relu_1'] = nn.ReLU()
cv_layers['max_pool_1'] = nn.MaxPool2d(kernel_size=(2, 2))

h, w = get_output_size_2d(cv_layers, (28, 28))

# ======================================================
# Fully-connected portion
# ======================================================
fc_in_shape = h * w * cv_layers['conv_1'].out_channels

fc_layers = OrderedDict()

# fc_layers['batch_norm_0'] = nn.BatchNorm1d(num_features=fc_in_shape)
fc_layers['linear_0'] = nn.Linear(in_features=fc_in_shape, out_features=200)
fc_layers['relu_0'] = nn.ReLU()
# fc_layers['batch_norm_0'] = nn.BatchNorm1d(num_features=200)
fc_layers['linear_1'] = nn.Linear(in_features=200, out_features=100)
fc_layers['relu_1'] = nn.ReLU()
fc_layers['linear_out'] = nn.Linear(in_features=100, out_features=10)

## Training hyperparameters

In [6]:
model = CNN(cv_layers, fc_in_shape, fc_layers)

batch_size = 20
num_batches = int(np.ceil(X_train.shape[0] / batch_size))
num_epochs = 10

loss = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.01)

# Run

## Train

In [None]:
model, stats = train_CNN(
    model, (X_train, y_train), optimizer, loss, num_epochs, batch_size, 
    valid_data=(X_valid, y_valid), verbose=True
)

Epoch [0/10]
Avg train loss: 0.20414
Valid acc: 0.965
Epoch [1/10]
Avg train loss: 0.12823
Valid acc: 0.961
Epoch [2/10]
Avg train loss: 0.11805
Valid acc: 0.967
Epoch [3/10]
Avg train loss: 0.11324
Valid acc: 0.970
Epoch [4/10]
Avg train loss: 0.10815
Valid acc: 0.976
Epoch [5/10]
Avg train loss: 0.10546
Valid acc: 0.972
Epoch [6/10]
Avg train loss: 0.10980
Valid acc: 0.971
Epoch [7/10]
Avg train loss: 0.10078


  probs = np.exp(scores).reshape(n, -1)
  probs /= probs.sum(axis=1).reshape(-1, 1)


Valid acc: 0.972
Epoch [8/10]
Avg train loss: 0.09292
Valid acc: 0.976
Epoch [9/10]


## Test

In [None]:
best_model = stats['best_model'][0]

best_model.eval()

y_pred = best_model.predict(
    Variable(torch.Tensor(X_test.reshape(-1, 1, 28, 28)))
).argmax(axis=1)

num_correct = len(np.argwhere(y_test.squeeze() == y_pred.squeeze()))
acc = num_correct / y_test.shape[0]

print('Test accuracy: %.3f' % acc)

## Save model

In [None]:
import pickle as pkl

f = open('target_network.p', 'wb')
pkl.dump(best_model, f)
f.close()