Define layer enums

In [1]:
import numpy as np
from enum import Enum, auto


class Layer(Enum):
    CONNECTED = auto()


class Activation(Enum):
    RELU = auto()
    SIGMOID = auto()
    SOFTMAX = auto()


class Mode(Enum):
    TRAIN = auto()
    TEST = auto()


class Loss(Enum):
    CROSS_ENTROPY = auto()


class Optimizer(Enum):
    SGD = auto()


Define network class with forward and backward class

In [2]:
class Network():
    def __init__(self, layers):
        # Initialize the layers in the network
        self.layers = layers

    def forward_pass(self, X):
        # Forward pass through the network and return the output
        for layer in self.layers:
            X = layer.forward_pass(X)
        self.output = X
        return self.output

    def backward_pass(self, backward_pass_input):
        # Enable backward pass if in training mode
        if (self.mode == Mode.TRAIN):
            self.cache = backward_pass_input
            # Backpropagate through the network by reversing the order of the layers
            for layer in reversed(self.layers):
                # Set the backpropagation cache variable to the output of the previous layer
                self.cache = layer.backward_pass(self.cache)

    # Set the network mode
    def set_mode(self, mode):
        self.mode = mode


Define Connected class with forward and backward pass

In [3]:
class Connected():
    def __init__(self, features):
        self.type = Layer.CONNECTED
        in_features, out_features = features
        # Initialize weights and biases
        self.w = np.random.randn(*features) * np.sqrt(2/in_features)
        self.b = np.random.randn(1, out_features) * np.sqrt(2/in_features)
        # Initialize the cache
        self.dW, self.dB = np.zeros(features), np.zeros((1, out_features))

    def forward_pass(self, X):
        # Save the cache for backward pass
        self.cache = X
        # Compute linear forward pass
        return np.dot(X, self.w) + self.b

    def backward_pass(self, dh):
        m = len(dh)
        # Compute linear backward pass
        self.dW = (1/m) * np.dot(self.cache.T, dh)
        self.dB = (1/m) * np.sum(dh, axis=0, keepdims=True)
        return np.dot(dh, self.w.T)


Define formulas and classes for Sigmoid, Softmax and ReLU

In [4]:
def sigmoid_forward(Z):
    A = 1 / (1 + np.exp(-(Z)))
    return A


def sigmoid_backward(dA, Z):
    s = 1 / (1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    return dZ


class Sigmoid():
    def __init__(self):
        self.type = Activation.SIGMOID

    def forward_pass(self, Z):
        self.cache = sigmoid_forward(Z)
        return self.cache

    def backward_pass(self, dA):
        return sigmoid_backward(dA=dA, Z=self.cache)


def relu_forward(Z):
    A = np.maximum(0, Z)
    return A


def relu_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ


class Relu():
    def __init__(self):
        self.type = Activation.RELU

    def forward_pass(self, Z):
        self.cache = relu_forward(Z)
        return self.cache

    def backward_pass(self, dA):
        return relu_backward(dA=dA, Z=self.cache)



def softmax_forward(X):
    exps = np.exp(X - np.max(X, axis=1, keepdims=True))
    return exps / (np.sum(exps, axis=1).reshape(len(exps), 1))


def softmax_backward(Y, X):
    return X - Y

class Softmax():
    def __init__(self):
        self.type = Activation.SOFTMAX

    def forward_pass(self, X):
        self.cache = softmax_forward(X)
        return self.cache
      

    def backward_pass(self, Y):
        return softmax_backward(Y=Y, X=self.cache)
       
       

Define SGD

In [5]:
class StochasticGradientDescent():
    def __init__(self, network, learning_rate):
        self.type = Optimizer.SGD
        self.network = network
        self.learning_rate = learning_rate

    def step(self):
        for layer in self.network.layers:
            # Update weight and bias with learning rate
            if layer.type == Layer.CONNECTED:
                layer.w -= self.learning_rate * layer.dW
                layer.b -= self.learning_rate * layer.dB


Define Cross Entropy Loss

In [6]:
class CrossEntropyLoss():
    def __init__(self):
        self.type = Loss.CROSS_ENTROPY

    def __call__(self, output, targets):
        return self.forward_pass(output, targets)

    def forward_pass(self, output, targets):
        self.cache = targets
        batch_size = len(output)
        # Compute cross entropy loss
        targets = targets.argmax(axis=1).reshape(batch_size, 1)
        targets = targets.reshape(batch_size)
        log_likelihood = -np.log(output[np.arange(batch_size), targets])
        return np.sum(log_likelihood)

    def backward_pass(self):
        return self.cache


Download and import dataset using Tourch Vision

In [7]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define the transform
mean = [0.5]
std = [0.5]

transform_method = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

batch_size = 128

# Download and transform datasets
training = torchvision.datasets.FashionMNIST(
    root='./FashionMNIST',
    train=True,
    download=True,
    transform=transform_method
)

testing = torchvision.datasets.FashionMNIST(
    root='./FashionMNIST',
    train=False,
    download=True,
    transform=transform_method
)

# Create loaders for training and testing datasets
training_loader = DataLoader(
    dataset=training,
    batch_size=batch_size,
    # Shuffle training dataset
    shuffle=True
)

testing_loader = DataLoader(
    dataset=testing,
    batch_size=batch_size,
    shuffle=False
)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/FashionMNIST/raw



Define network and class layers as well as optimizer and criterion

In [8]:
# Define network and layers
net = Network([
    Connected(features=(28*28, 128)),
    Sigmoid(),
    Connected(features=(128, 64)),
    Relu(),
    Connected(features=(64, 10)),
    Softmax()
])
learning_rate = 0.009
optimizer = StochasticGradientDescent(network=net, learning_rate=learning_rate)
criterion = CrossEntropyLoss()


Define function for one hot encoder

In [9]:
# One hot encoder
def encode(tensor, amount):
    tensor_size = len(tensor)
    tensor = tensor.reshape(tensor_size)
    one_hots = np.zeros((tensor_size, amount))
    one_hots[np.arange(tensor_size), tensor] = 1
    return one_hots


Train network using defined network model

In [10]:
import torch
from tqdm import tqdm


number_of_epochs = 1
num_classes = len(training.classes)
net.set_mode(Mode.TRAIN)
for epoch in range(number_of_epochs):
    epoch_loss = 0.0
    with tqdm(training_loader, unit='batch') as tqdm_epoch:
        for _, data in enumerate(tqdm_epoch):
            tqdm_epoch.set_description(f"epoch {epoch + 1}")
            inputs, labels = data
            # Reshape inputs
            inputs = torch.reshape(inputs, (-1, 28*28))
            # One hot encode labels
            labels = encode(tensor=labels, amount=num_classes)

            # Forward pass
            output = net.forward_pass(inputs)

            # Compute loss
            epoch_loss += criterion(output, labels)

            # Backward pass
            net.backward_pass(labels)

            # Optimization Step
            optimizer.step()

            tqdm_epoch.set_postfix(loss=(epoch_loss/len(training.data)))


epoch 1: 100%|██████████| 469/469 [00:23<00:00, 19.76batch/s, loss=1.35]


Save trained network to file

In [11]:
# Set trained network 
import pickle as pkl
pkl.dump(net, open('trained.network', 'wb'))


Load trained network from file and test against trained network

In [12]:
# Load trained network
import pickle as pkl
loaded_net = pkl.load(open('trained.network', 'rb'))

correct_predictions = 0
num_classes = len(testing.classes)
# Set the model in test mode, bypassing backward pass
loaded_net.set_mode(Mode.TEST)
for data in testing_loader:
    inputs, labels = data
    # Reshape inputs and labels
    inputs = torch.reshape(inputs, (-1, 28*28))
    # One hot encode labels
    labels = encode(tensor=labels, amount=num_classes)
    # Forward Propagation
    output = loaded_net.forward_pass(inputs)
    predicted = np.argmax(output, axis=1)
    predicted = predicted.reshape(len(predicted), 1)
    correct_predictions += np.sum(predicted ==
                                  labels.argmax(axis=1).reshape(len(inputs), 1))

print(
    f'accuracy: {(100 * correct_predictions / len(testing.data))}%')


accuracy: 70.87%
