In [1]:
import matplotlib.pyplot as plt
import cupy as cp
from tqdm import tqdm

file_data   = "mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines()
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
num_classes = 10

#
# make a matrix each column of which represents an images in a vector form 
#
images  = cp.empty((num_image, size_row * size_col), dtype=float)
labels  = cp.empty(num_image, dtype=int)

for cnt, line in enumerate(data):

    line_data   = line.split(',')
    label       = int(line_data[0])
    im_vector   = cp.array(list(map(int, line_data[1:])))

    labels[cnt]    = label
    images[cnt]    = im_vector

# one-hot
labels = cp.eye(num_classes)[labels]
images = (images / 255 - 0.5) * 2
    
# Split Data
train_images = images[:1000]
train_labels = labels[:1000]
test_images = images[1000:]
test_labels = labels[1000:]

print(train_images.shape)
print(test_images.shape)
print(train_labels.shape)
print(test_labels.shape)


(1000, 784)
(9000, 784)
(1000, 10)
(9000, 10)


In [2]:
# Define module

def sigmoid(x):
    return 1 / (1 + cp.exp(-x))


class HiddenLayer:
    def __init__(self, in_shape, out_shape):
        std = cp.sqrt(2 / (in_shape + out_shape))
        self.weight = cp.random.normal(0, std, (out_shape, in_shape))
        self.bias = cp.random.normal(0, 1e-6, (out_shape, 1))
    
    def __call__(self, x):
        self.input = x
        return sigmoid(x @ self.weight.T + self.bias.T)
    
    def backprop(self, error, lr, weight_decay=0):
        batch_size = self.input.shape[0]
        
        self.weight -= lr * (error.T @ self.input + weight_decay * self.weight) / batch_size
        self.bias -= lr * (error.T.sum(axis=1, keepdims=True) + weight_decay * self.bias) / batch_size
        
        return error @ self.weight * (self.input * (1 - self.input))
        
    
class Sequential:
    def __init__(self, module_list):
        self.module_list = module_list
    
    def __call__(self, x):
        for layer in self.module_list:
            x = layer(x)
        return x    
    
    def backprop(self, pred, label, lr, weight_decay):
        error = pred - label
        for layer in reversed(self.module_list):
            error = layer.backprop(error, lr, weight_decay)
        
class BinaryCrossEntropy:
    labels = 0
    preds = 0
    def __call__(self, preds, labels):
        self.labels = labels
        self.preds = preds
        
        return -(labels * cp.log(preds) + (1 - labels) * cp.log(1 - preds)).mean()
    
class LearningRateScheduler:
    def __init__(self, init_lr, final_step, warm_up_step = 0):
        self.init_lr = init_lr
        self.final_step = final_step
        self.warm_up_step = warm_up_step
        self.lr = 0
        self._step = 0
    
    def step(self):
        self._step += 1
        # Learning rate Warm-up
        if (self._step < self.warm_up_step):
            self.lr = (self.init_lr / self.warm_up_step) * self._step
        # Cos scheduler
        else:
            self.lr = (cp.cos(cp.pi * ((self._step - self.warm_up_step) / (self.final_step - self.warm_up_step))) + 1) / 2 * self.init_lr
        
    def get_lr(self):
        return self.lr

In [3]:
# define computation
def compute_acc(preds, labels):
    pred_nums = cp.argmax(preds, axis=1)
    lable_nums = cp.argmax(labels, axis=1)
    num_correct = (pred_nums == lable_nums).sum()
    return (pred_nums == lable_nums).sum() / preds.shape[0]

In [4]:
# defclare hyterparamter
epoch = 5000
warm_up_epoch = epoch // 10
initial_lr = 1
weight_decay = 1e-2

num_feature = [196, 48]

block = []
in_features = 784

for nf in num_feature:
    block += [HiddenLayer(in_features, nf)]
    in_features = nf

block += [HiddenLayer(in_features, 10)]

model = Sequential(block)

criterion = BinaryCrossEntropy()
lr = LearningRateScheduler(initial_lr, epoch, warm_up_epoch)

In [None]:
train_accuracyes = []
test_accuracyes = []
train_losses = []
test_losses = []
lrs = []

for step in tqdm(range(epoch)):
    # predict test label
    test_preds = model(test_images)
    test_loss = criterion(test_preds, test_labels)
    test_acc = compute_acc(test_preds, test_labels)
    
    # predict train label
    train_preds = model(train_images)
    train_loss = criterion(train_preds, train_labels)
    train_acc = compute_acc(train_preds, train_labels)
    
    # backporpagation
    model.backprop(train_preds, train_labels, lr.lr, weight_decay)
    
    # save results
    train_accuracyes += [float(train_acc) * 100]
    test_accuracyes += [float(test_acc) * 100]
    
    train_losses += [train_loss]
    test_losses += [test_loss]
    lrs += [lr.lr]
    
    lr.step()

 26%|████████████████████                                                          | 1289/5000 [00:31<01:28, 41.86it/s]

In [None]:
# Split correct answer and worng answer

is_correct = cp.argmax(test_preds, axis=1) == cp.argmax(test_labels, axis=1)

correct_images = test_images[is_correct]
correct_labels = test_preds[is_correct]

worng_images = test_images[cp.logical_not(is_correct)]
worng_labels = test_preds[cp.logical_not(is_correct)]

correct_images.shape, worng_images.shape

## 1. Plot the loss curve

In [None]:
plt.figure(figsize=(8, 8))
plt.plot(train_losses, label='Train', c='b')
plt.plot(test_losses, label='Test', c='r')
plt.legend()
plt.show()

## 2. Plot the accuracy curve

In [None]:
plt.figure(figsize=(8, 8))
plt.plot(train_accuracyes, label='Train', c='b')
plt.plot(test_accuracyes, label='Test', c='r')
plt.legend()
plt.show()

## 3. Plot the accuracy value

In [None]:
print(f'Final Train Accuracy: {train_accuracyes[-1]}%')
print(f'Final Test Accuracy: {test_accuracyes[-1]}%')

## 4. Plot the classification example

### Correct Sample

In [None]:
plt.figure(figsize=(17, 8))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.title(correct_labels[i].get().argmax(), size=30)
    plt.imshow(correct_images[i].reshape(size_row, size_col).get(), cmap='Greys', interpolation='None')
    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)
    
plt.show()

### Wonrg Sample

In [None]:
plt.figure(figsize=(17, 8))

for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.title(worng_labels[i].get().argmax(), size=30)
    plt.imshow(worng_images[i].reshape(size_row, size_col).get(), cmap='Greys', interpolation='None')
    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)
    
plt.show()

## 5. Test Accuracy

In [None]:
print(f'Final Test Accuracy: {test_accuracyes[-1]}%')