# Multi-class Classification MLP

In [321]:
import numpy as np

### References
<li> https://www.deeplearningbook.org/contents/mlp.html <br>
<li> https://deepnotes.io/softmax-crossentropy <br>


# Data Set Information

Data downloaded from: https://archive.ics.uci.edu/ml/datasets/seeds

The examined group comprised kernels belonging to three different varieties of wheat: Kama, Rosa and Canadian, 70 elements each, randomly selected for the experiment. High quality visualization of the internal kernel structure was detected using a soft X-ray technique. It is non-destructive and considerably cheaper than other more sophisticated imaging techniques like scanning microscopy or laser technology. The images were recorded on 13x18 cm X-ray KODAK plates. Studies were conducted using combine harvested wheat grain originating from experimental fields, explored at the Institute of Agrophysics of the Polish Academy of Sciences in Lublin. 

The data set can be used for the tasks of classification and cluster analysis.

In [322]:
class EmbeddingDataSet:
    """
       Isomorphi DataSet class
       Takes a dataset, loads, scales, shuffles, splits & batches data
       !!! Assumes all features are already Continuous Numeric Features !!! 
    """

    def __init__(self, filename, **kwargs):
        """

        """
        self.filename = filename
        self.splitsize = kwargs.get('splitsize')
        self.valsize = kwargs.get('valsize')
        self.batchsize = kwargs.get('batchsize')


    def load(self, **kwargs):
        """ 
            loads the dataset from the passed filename.
            Assumes the dataset is in a .txt file
            shuffles the dataset
            returns X and y.
        """
        with open(self.filename) as inF:
            rawdata = inF.readlines()

        data = []
        for line in rawdata:
            line = line.strip()
            line = line.split('\t')
            line = [x for x in line if x != '']
            data.append(line)

        self.X = np.array(data).astype(float)
        np.random.seed(kwargs.get('seed'))
        np.random.shuffle(self.X)

        self.splits = self.split(validation=kwargs.get('validation'), valsize=kwargs.get('valsize'))
        self.datasets = [self.scale(dataset[:, :-1]) for dataset in self.splits]
        self.labels = [dataset[:, -1].astype(int) - 1 for dataset in self.splits]
        self.name()
        return self.datasets, self.labels
    

    def scale(self, X, **kwargs):
        """
            Normalizes a numpy array via Standard Normal Distribution
            This can be useful in algorithms that do not assume any distribution,
            and is a required steps for Networks that learn via Gradient Descent
        """
        min = X.min()
        max = X.max()
        range = max - min
        return (X - min) / range
    

    def split(self, testsize:float=.10, **kwargs):
        """
            segregates the dataset into distinct splits
        """
        validation_set = kwargs.get('validation')
        if validation_set:
            valsize = kwargs.get('valsize')
            trainsize =  1 - testsize - valsize
            train_n = round(self.X.shape[0] * trainsize)
            val_n = round(self.X.shape[0] * valsize)
            train = self.X[:train_n]
            val = self.X[train_n:train_n+val_n]
            test = self.X[train_n+val_n:]
            return train, val, test
        
        else:
            trainsize =  1 - testsize
            train_n = round(self.X.shape[0] * trainsize)
            train = self.X[:train_n]
            test = self.X[train_n:]
            return train, test
    

    def name(self):
        """
            Assigns named attributes to specific datasets
        """
        self.X_train = self.datasets[0]
        self.y_train = self.labels[0]
        if len(self.datasets) > 2:
            self.X_val = self.datasets[1]
            self.y_val = self.labels[1]
            self.X_test = self.datasets[2]
            self.y_test = self.labels[2]

        else:
            self.X_test = self.datasets[1]
            self.y_test = self.labels[1]


    def batch(self, dataset, labels, batch_size:int=1):
        """
        """
        num_batches = len(dataset) // batch_size
        if len(dataset) % batch_size != 0:
            num_batches += 1

        batches = []
        for i in range(num_batches):
            start_index = i * batch_size
            end_index = (i + 1) * batch_size
            batch = (dataset[start_index:end_index], labels[start_index:end_index])
            batches.append(batch)
        
        return batches



In [323]:
seeds = EmbeddingDataSet('data/seeds.data')
seeds.load(validation=True, valsize=.10)
train_batches = seeds.batch(seeds.X_train, seeds.y_train, 1)
val_batches = seeds.batch(seeds.X_val, seeds.y_val)
test_batches = seeds.batch(seeds.X_test, seeds.y_test)

print(f"{len(train_batches)} training batches")
print(f"{len(val_batches)} validation batches")
print(f"{len(test_batches)} test batches")

168 training batches
21 validation batches
21 test batches


In [324]:
class FFMLP(object):
    
    """
        Feed-Forward Multi Layer Perceptron for Classification
    """

    def __init__(
        self, 
        epochs:int,
        features:int,
        output_nodes:int=1,
        hidden_layers:int=1, 
        hidden_nodes:int=16, 
        init_weight:float=1,
        alpha:float=.01):
        """
            initializes a Feed Forward Multilayer Perceptron
            for classification tasks
            
            x is an numpy array of training data
            output_nodes is the number of classes
        """
        self.epochs = epochs
        self.alpha = alpha

        self.params = {
            "epochs" : epochs,
            "hidden layers": hidden_layers,
            "hidden nodes": hidden_nodes,
            "initialization weight": init_weight,
            "learning rate": alpha,
        }
        
        self.hidden_layer = {
            "weights": np.random.uniform(low=0, high=init_weight, size=(features, hidden_nodes)),
            "bias": np.random.uniform(low=0, high=init_weight, size=hidden_nodes)
        }

        if hidden_layers > 1:
            self.hidden_layers = [
                {
                    "weights": np.random.uniform(low=0, high=init_weight, size=(hidden_nodes, hidden_nodes)),
                    "bias": np.random.uniform(low=0, high=init_weight, size=hidden_nodes)
                } for layer in range(hidden_layers-1)
            ]
        else:
            self.hidden_layers = []

        self.output_layer = {
            "weights": np.random.uniform(low=0, high=init_weight, size=(hidden_nodes, output_nodes)),
            "bias": np.random.uniform(low=0, high=init_weight, size=output_nodes)
        }

        self.network = dict() # a data structure to hold our layers
        self.network['hidden layer 1'] = self.hidden_layer
        for idx, layer in enumerate(self.hidden_layers, start=2):
            self.network[f'hidden layer {idx}'] = layer        
        self.network['output layer'] = self.output_layer # hidden to output
        print('Initialized Network')


    def __str__(self):
        return f'Parameters: {self.params}\n, Network: {self.network}'
        

    def ReLU(self, x):
        """
        """
        return np.maximum(0, x)
    
    def delta_ReLU(self, x):
        """
        """
        return np.where(x > 0, 1, 0)

    def sigmoid(self, x):
        """
        """
        return 1/(1 + np.exp(-x))


    def delta_sigmoid(self, x):
        """
        """
        return x * (1.0 - x)   
    

    def softmax_stable(self, X):
        """
            stable softmax
        """
        exps = np.exp(X - np.max(X))
        return exps / np.sum(exps)
    

    def cross_entropy(self, X, y):
        """
        X is the output from fully connected layer (num_examples x num_classes)
          after softmax()
        y is labels (num_examples x 1)
            Note that y is not one-hot encoded vector. 
            It can be computed as y.argmax(axis=1) from one-hot encoded vectors of labels if required.
        """
        p = X
        m = y.shape[0]
        # takes the log of the highest values in p (which is the probability of the class)
        log_likelihood = -np.log(p[range(m), y]) 
        loss = np.sum(log_likelihood) / m
        return loss


    def delta_cross_entropy(self, X, y):
        """
        X is the output from fully connected layer (num_examples x num_classes)
        y is labels (num_examples x 1)
            Note that y is not one-hot encoded vector. 
            It can be computed as y.argmax(axis=1) from one-hot encoded vectors of labels if required.
        """
        m = y.shape[0]
        grad = X.copy()
        grad[range(m), y] -= 1
        grad = grad / m
        return grad


    def forward(self, batch):
        """
        """
        forward = batch[0]
        for key, value in self.network.items():
            self.network[key]['z'] = forward # inputs
            forward = np.dot(forward, value['weights']) + value['bias']
            forward = self.ReLU(forward) 
            self.network[key]['a'] = forward # activations
            
        outputs = np.apply_along_axis(self.softmax_stable, 1, forward)
        loss = self.cross_entropy(outputs, batch[1])
        return loss, outputs


    def backward(self, batch, outputs):
        """
            Naive backward propogation that involves 
            recomputation of the gradient
            At every step
        """
        grad = self.delta_cross_entropy(outputs, batch[1])

        self.partials = []
        for key in reversed(list(network.network.keys())):
            z = network.network[key]['z']
            db = np.sum(grad, axis=0)
            dW = np.dot(z.T, grad) # the current layers inputs were the previous layers activations
            self.partials.append([dW, db])
            grad = np.dot(grad, network.network[key]['weights'].T) * self.delta_ReLU(z)


    def fit(self, batches):

        for epoch in range(self.epochs):
            for batch in batches:
                pass_loss, outputs = self.forward(batch)
                self.backward(batch, outputs)
                for idx, key in enumerate(reversed(list(self.network.keys()))):
                    self.network[key]['weights'] -= self.alpha * self.partials[idx][0]
                    self.network[key]['bias'] -= self.alpha * self.partials[idx][1]

            # Print loss every 10 epochs
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, loss: {pass_loss:.4f}")

# Multi Layer Perceptron
EPOCHS = 60
CLASSES = 3
HIDDEN_NODES = 64
HIDDEN_LAYERS = 1
INITWEIGHT = 1
ALPHA = .10


#np.random.seed(45)
network = FFMLP(
    epochs=EPOCHS,
    features=7,
    hidden_layers = HIDDEN_LAYERS,
    hidden_nodes = HIDDEN_NODES,
    output_nodes = CLASSES,
    init_weight=INITWEIGHT,
    alpha=ALPHA
)

network.fit(train_batches)

Initialized Network
Epoch 0, loss: 0.7822
Epoch 10, loss: 1.1614
Epoch 20, loss: 1.4756
Epoch 30, loss: 1.6693
Epoch 40, loss: 1.9621
Epoch 50, loss: 2.0988


In [325]:
def predict(batches):
   predictions = []
   for batch in batches:
      _, batch_pred_probs = network.forward(batch)
      predictions.append(np.argmax(batch_pred_probs))
   return predictions

train_predictions = predict(train_batches)
val_predictions = predict(val_batches)
test_predictions = predict(test_batches)

In [326]:
def accuracy(predictions:list, labels:list):
   assert len(predictions) == len(labels)
   total = 0
   for pred, label in zip(predictions, labels):
      if pred == label:
         total += 1 

   return total / len(predictions)

train_accuracy = accuracy(train_predictions, seeds.y_train.tolist())
val_accuracy = accuracy(val_predictions, seeds.y_val.tolist())
test_accuracy = accuracy(test_predictions, seeds.y_test.tolist())

In [327]:
print(f"train accuracy: {round(train_accuracy, 4)*100}%")
print(f"validation accuracy: {round(val_accuracy, 4)*100}%")
print(f"test accuracy: {round(test_accuracy, 4)*100}%") 

train accuracy: 89.29%
validation accuracy: 76.19%
test accuracy: 85.71%
