In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
np.random.seed(1)

In [13]:
iris = pd.read_csv("data/iris.csv")

X = iris.loc[:, iris.columns != "label"].values

y = iris["label"].values

for i in range(len(y)):
    if y[i] == 'Iris-setosa':
        y[i] = np.array([1, 0, 0])
    elif y[i] == 'Iris-versicolor':
        y[i] = np.array([0, 1, 0])
    else:
        y[i] = np.array([0, 0, 1])

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)

In [4]:
class NeuralNetwork():
    def __init__(self, input_nodes, hidden_nodes, output_nodes, train_x, train_y, learning_rate):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.train_x = train_x
        self.train_y = train_y
        self.learning_rate = learning_rate

        self.weightsl1 = np.random.randn(input_nodes, hidden_nodes)
        self.weightsl2 = np.random.randn(hidden_nodes, output_nodes)
        self.biasl1 = np.random.randn(hidden_nodes)
        self.biasl2 = np.random.randn(output_nodes)
        
    def relu(self, x, deriv=False):
        if deriv: return 1 * (x > 0)
        return np.maximum(x, 0)

    def softmax(self, A):
        expA = np.exp(A)
        return expA / expA.sum()

    def cross_entropy(self, X, y):
        m = y.shape[0]
        p = self.softmax(X)
        log_likelihood = -np.log(p[range(m),y])
        loss = np.sum(log_likelihood) / m
        return loss

    def feedforward(self, input):
        hidden = self.relu(np.dot(input, self.weightsl1) + self.biasl1)
        z = self.softmax(np.dot(hidden, self.weightsl2) + self.biasl2)
        return z

    def train(self, iterations=10000):
        loss = 0
        for i in range(iterations):
            ########## Feedforward

            ri = np.random.randint(len(self.train_x)) # Random datapoint
            layer_0 = self.train_x[ri:ri+1]
            layer_1 = self.relu(np.dot(layer_0, self.weightsl1) + self.biasl1)
            layer_2 = self.softmax(np.dot(layer_1, self.weightsl2) + self.biasl2)

            ########## Back Propagation

            loss += self.cross_entropy(layer_2, np.array([self.train_y[ri]])) # Define the loss function

            layer_2_delta = layer_2 - self.train_y[ri] # Derivative of the cross entropy loss function

            dcost_wl2 = layer_1.T.dot(layer_2_delta) # layer 2 weights change
            dcost_bl2 = layer_2_delta # Layer 2 bias change
            
            layer_1_delta = layer_2_delta.dot(self.weightsl2.T) * self.relu(layer_1, deriv=True) # change with respect to the first layer

            dcost_wl1 = layer_0.T.dot(layer_1_delta) # layer 1 weights change
            dcost_bl1 = layer_1_delta # layer 1 bias change

            ########## Update weights using gradient descent

            self.weightsl1 -= self.learning_rate * dcost_wl1
            self.biasl1 -= self.learning_rate * dcost_bl1.sum(axis=0)

            self.weightsl2 -= self.learning_rate * dcost_wl2
            self.biasl2 -= self.learning_rate * dcost_bl2.sum(axis=0)

            ########## print average loss

            if (i-999) % 1000 == 0:
                print("Iteration: {}, loss: {}".format(i+1, loss/i))
                loss = 0

    def check_output(self, test_x, test_y):
        correct = 0
        for i in range(len(test_x)):
            point = test_x[i]
            target = list(test_y[i])
            z = list(self.feedforward(point))
            prediction_index = z.index(max(z))
            target_index = target.index(max(target))
            if prediction_index == target_index: correct += 1
            print(z, target)
        print("Test Accuracy: {}".format((correct / len(test_x)) * 100))

In [5]:
n = NeuralNetwork(4, 5, 3, train_X, train_y, 0.005)

In [6]:
n.train()

Iteration: 1000, loss: 3.622456126086912
Iteration: 2000, loss: 1.7762067114326412
Iteration: 3000, loss: 1.1771434104453187
Iteration: 4000, loss: 0.8861488698147758
Iteration: 5000, loss: 0.7130297548483933
Iteration: 6000, loss: 0.5977097303098513
Iteration: 7000, loss: 0.5177761852750062
Iteration: 8000, loss: 0.4550965589974584
Iteration: 9000, loss: 0.40052487065864645
Iteration: 10000, loss: 0.366717621760664


In [7]:
n.check_output(val_X, val_y)

[1.7965026528682994e-09, 0.007891633209732832, 0.9921083649937645] [0, 0, 1]
[0.00010798324549016774, 0.9842105057714257, 0.015681510983084263] [0, 1, 0]
[0.9957615368477596, 0.004238463107079104, 4.516130538959267e-11] [1, 0, 0]
[8.186945048404458e-12, 0.004346744111761302, 0.9956532558800517] [0, 0, 1]
[0.9932988115682815, 0.0067011881085231005, 3.231954630520379e-10] [1, 0, 0]
[1.1634409577517125e-11, 0.0008481521118028136, 0.9991518478765629] [0, 0, 1]
[0.9933103305697687, 0.006689669109418423, 3.208130240905355e-10] [1, 0, 0]
[9.102794069856454e-05, 0.9834794654933662, 0.01642950656593519] [0, 1, 0]
[1.3591250492376239e-05, 0.9473513680793284, 0.05263504067017924] [0, 1, 0]
[0.0013792063296849972, 0.9959682514654562, 0.002652542204858828] [0, 1, 0]
[2.3004638897800847e-10, 0.008412607262520093, 0.9915873925074336] [0, 0, 1]
[0.0004195448052721252, 0.991476055961072, 0.008104399233655806] [0, 1, 0]
[2.114617115752166e-05, 0.8832958743049445, 0.116682979523898] [0, 1, 0]
[3.18634336