## 1. Downloading the dataset

In [1]:
import pip
def install(package):
    pip.main(['install', package])

In [2]:
try:
    from torchvision.datasets import MNIST
    print("module 'mutagen' is installed")
except ModuleNotFoundError:
    print("module 'torchvsion' is not installed")
    # or
    install("torchvision") # the install function from the question

In [3]:
import numpy as np
from torchvision.datasets import MNIST
train_dataset = None
test_dataset = None
def download_mnist(is_train: bool):
    if is_train == True:
        return MNIST(root='./data', transform=lambda x: np.array(x).flatten(), download=True, train=is_train)
    else: 
        return MNIST(root='./data', transform=lambda x: np.array(x).flatten(), download=True, train=is_train)

train_dataset = download_mnist(True)
test_dataset = download_mnist(False)

## 2. Processing the dataset

In [4]:
def processData(dataset):
    np_dataset_images = np.array([object[0] for object in dataset], dtype = np.bool_).reshape(len(dataset), 28 * 28)
    np_dataset_labels = np.array([object[1] for object in dataset], dtype = np.uint8)
    
    return np_dataset_images, np_dataset_labels

np_train_dataset = processData(train_dataset)
np_test_dataset = processData(test_dataset)

## 3. Implementing a perceptron handler
We need to have the follwing:

    - an array of the weights of the system
    - function to propagate forward and find the results
    - function to recalculate the weights based on the results (backwards propagation)

In [5]:
import random 
class Perceptron:
    def __init__(self):
        self.weights = np.array([random.random() for _ in range(28 * 28)])
        self.bias = random.random()
        
    def forwardPropagation (self, inputs): 
        z = np.sum(self.weights * inputs) + self.bias
        return z

    def updateWeights(self, vectorToAdd):
        self.weights += vectorToAdd
        


    def updateBias (self, valueToAdd):
        self.bias += valueToAdd

    def backPropagation(self, learningRate, gradient, inputVector):
        self.updateWeights(learningRate * gradient * inputVector)
        self.updateBias(learningRate * gradient)

## 4. Defining the hyper parameters
1. The learning rate -> the rate at which we want to apply the gradient to each feature
2. The epoch number -> the number of times the algorithm runs
3. Additionally we will include here the e constant

In [6]:
LEARNING_RATE = 0.001
EPOCH_NUMBER = 100
E_CONSTANT = 2.7182818284

## 5. Running the algorithm
- we will have a function for running epoch
- we will have a function for running a batch


In [7]:
import concurrent.futures

def runBatch(np_batch_train_dataset):
    batch_size = np_batch_train_dataset[0].shape[0]
    num_classes = len(perceptrons)

    gradients_accumulated = [np.zeros_like(perceptron.weights) for perceptron in perceptrons]
    bias_accumulated = [0 for _ in perceptrons]
    batchCorrectly = 0
    
    for testIndex in range(batch_size):
        z = [perceptron.forwardPropagation(np_batch_train_dataset[0][testIndex]) for perceptron in perceptrons]
        z_max = np.max(z)
        z_stable = z - z_max
        softMaxArray = np.exp(z_stable) / np.sum(np.exp(z_stable))

        correctPredictionValue = np_batch_train_dataset[1][testIndex]
        correctPredictionArray = np.zeros(num_classes)
        correctPredictionArray[correctPredictionValue] = 1

        errorArray = softMaxArray - correctPredictionArray

        for i, perceptron in enumerate(perceptrons):
            gradients_accumulated[i] += -errorArray[i] * np_batch_train_dataset[0][testIndex]
            bias_accumulated[i] += -errorArray[i]

        if correctPredictionValue == np.argmax(softMaxArray):
            batchCorrectly += 1

    return gradients_accumulated, bias_accumulated, batchCorrectly

def runEpoch(batch_size=100):
    total_samples = np_train_dataset[0].shape[0]
    batchCount = total_samples // batch_size
    totalCorrect = 0

    epoch_gradients_accumulated = [np.zeros_like(perceptron.weights) for perceptron in perceptrons]
    epoch_bias_accumulated = [0 for _ in perceptrons]


    for batchIndex in range(batchCount):
        batchStartIndex = batchIndex * batch_size
        batchEndIndex = (batchIndex + 1) * batch_size
        batch_train_dataset = [np_train_dataset[0][batchStartIndex:batchEndIndex], 
                               np_train_dataset[1][batchStartIndex:batchEndIndex]]


        gradients_accumulated, bias_accumulated, batchCorrect = runBatch(batch_train_dataset)

        for i, perceptron in enumerate(perceptrons):
            perceptron.weights += LEARNING_RATE * gradients_accumulated[i]
            perceptron.bias += LEARNING_RATE * bias_accumulated[i]


        totalCorrect += batchCorrect

    for i, perceptron in enumerate(perceptrons):
        perceptron.weights -= LEARNING_RATE * epoch_gradients_accumulated[i]
        perceptron.bias -= LEARNING_RATE * epoch_bias_accumulated[i]

    return totalCorrect
    
def runTest(inputs):
    z = [perceptron.forwardPropagation(inputs) for perceptron in perceptrons]
    z_max = np.max(z)
    z_stable = z - z_max  
    softMaxArray = np.exp(z_stable) / np.sum(np.exp(z_stable))
    max_index = max_index = np.argmax(softMaxArray)
    return max_index


## 6. Running the main function and initializing the perceptrons

In [None]:
perceptrons = [Perceptron() for _ in range(10)]
def main ():
    for epochIndex in range(EPOCH_NUMBER):
        totalCorrect = runEpoch()
        print('trainingAccuracy = ', totalCorrect, (np_train_dataset[0].size // (28 * 28)), totalCorrect / (np_train_dataset[0].size // (28 * 28)))
        correctlyPredicted = 0
        tests = np_test_dataset[0]
        correctPredictions = np_test_dataset[1]
        for index in range(tests.size // (28 * 28)):
            prediction = runTest(tests[index])
            if prediction == correctPredictions[index]:
                correctlyPredicted += 1
        print('Accuracy on tests at epoch ' + str(epochIndex) + " : " + str(correctlyPredicted / (tests.size // (28 * 28))))
main()

trainingAccuracy =  46810 60000 0.7801666666666667
Accuracy on tests at epoch 0 : 0.8576
trainingAccuracy =  52251 60000 0.87085
Accuracy on tests at epoch 1 : 0.8797
trainingAccuracy =  53073 60000 0.88455
Accuracy on tests at epoch 2 : 0.8884
trainingAccuracy =  53560 60000 0.8926666666666667
Accuracy on tests at epoch 3 : 0.894
trainingAccuracy =  53884 60000 0.8980666666666667
Accuracy on tests at epoch 4 : 0.8989
trainingAccuracy =  54107 60000 0.9017833333333334
Accuracy on tests at epoch 5 : 0.9019
trainingAccuracy =  54294 60000 0.9049
Accuracy on tests at epoch 6 : 0.9038
trainingAccuracy =  54437 60000 0.9072833333333333
Accuracy on tests at epoch 7 : 0.9064
trainingAccuracy =  54558 60000 0.9093
Accuracy on tests at epoch 8 : 0.9081
trainingAccuracy =  54656 60000 0.9109333333333334
Accuracy on tests at epoch 9 : 0.9093
trainingAccuracy =  54740 60000 0.9123333333333333
Accuracy on tests at epoch 10 : 0.9107
trainingAccuracy =  54811 60000 0.9135166666666666
Accuracy on test