In [191]:
import numpy as np
from scipy.special import expit
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sklearn.preprocessing as pre
from sklearn import model_selection

sns.set_palette(sns.color_palette("Set1", n_colors=8, desat=1))

In [1510]:
class NN(object):

    def __init__(self, n_nodes):
        self.n_nodes = n_nodes
        self.n_layers = len(n_nodes)
        self.weights, self.bias = self.init_weights()

    # get final output of nn from input X based on current weights and biases
    # Assumes X a numpy representation  of the data in wide format. 
    def feed_forward(self, X, train=False, debug=False):
        x = np.atleast_2d(X).T
        N = len(self.n_nodes)
        iters = np.arange(1, N)
        activations = [x]
        zs = []
        for i, b, w in zip(iters, self.bias, self.weights): 
            if debug:
                print(i)
                print(w.shape, activations[i-1].shape, b.shape)
            a = self.activate_logistic(activations[i - 1], w, b)
            activations.append(a)
            zs.append(w @ activations[i - 1] + b)

        if train:
            return activations, zs
        return activations[-1]
    
    def back_prop(self, X, Y, debug=False):
        del_w  = [np.zeros(w.shape) for w in self.weights]
        del_b  = [np.zeros(b.shape) for b in self.bias]
        
        # feed forward: 
        A, Z = self.feed_forward(X, train=True)
        
        # get error heuristic of output layer:
        delta_l = self.cost_derivative(Y, A[-1]) * self.sigmoid_prime(Z[-1])
        del_w[-1] = delta_l @ A[-2].T 
        del_b[-1] = delta_l
        
        # back propagate: 
        for i in np.arange(2, len(self.n_nodes)):
            sp = self.sigmoid_prime(Z[-i])
            delta_l    = (self.weights[-i + 1].T @ delta_l) * sp
            del_b[-i]  = delta_l
            del_w[-i]  = delta_l @ A[-i - 1].T 
            
        return del_w, del_b
    

    
    # helper functions --------------------
    def update(self, X, y, lr):
        del_w, del_b = self.back_prop(X, y)
        print(f'del_w: \n{del_w}')
        print(f'del_b: \n{del_b}')
        for i in range(self.n_layers - 1):
            self.weights[i] -= lr*del_w[i]
            self.bias[i]    -= lr*del_b[i]
            
        
    def activate_logistic(self, X, weights, bias):
        return expit(weights @ X + bias)
    
    def activate_softmax(self, X, weights, bias):
        Z = weights @ X + b
        return np.exp(Z[i])/np.exp(Z).sum()
    
    def sigmoid_prime(self, X):
        return expit(X) * (1 - expit(X))
    
    # cross entropy loss derivative wrt final activation layer
    def cost_derivative(self, y, y_hat):
        return (y_hat - y) / (1e7 + y_hat * (1 - y_hat))

    def init_weights(self):
        weights = [np.random.randn(m, n) for m, n in zip(self.n_nodes[1:], self.n_nodes[:-1])]
        bias = [np.zeros(m).reshape(-1,1) for m in self.n_nodes[1:]]  
        return weights, bias
    # ------------------------------------
        

In [1511]:
np.random.seed(44)
net = NN((3,2,1))

In [1512]:
data = pd.read_csv('student_data.csv')
data.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [1513]:
X = data[:3].drop('admit', axis=1).to_numpy()
y = data[:3]['admit'].to_numpy()

In [1514]:
x = np.atleast_2d(X[0])

In [1515]:
net.weights

[array([[-0.75061472,  1.31635732,  1.24614003],
        [-1.60491574, -1.46814368, -1.71507046]]),
 array([[1.85878369, 0.08758798]])]

In [1516]:
net.bias

[array([[0.],
        [0.]]),
 array([[0.]])]

In [1517]:
net.update(x, y[0], 1)

del_w: 
[array([[5.72679231e-126, 5.44045269e-128, 4.52115182e-128],
       [1.66165887e-276, 1.57857592e-278, 1.31183595e-278]]), array([[8.10772450e-129, 4.99245065e-278]])]
del_b: 
[array([[1.50705061e-128],
       [4.37278649e-279]]), array([[1.24999997e-08]])]


In [1518]:
net.weights

[array([[-0.75061472,  1.31635732,  1.24614003],
        [-1.60491574, -1.46814368, -1.71507046]]),
 array([[1.85878369, 0.08758798]])]

In [1519]:
net.bias

[array([[-1.50705061e-128],
        [-4.37278649e-279]]),
 array([[-1.24999997e-08]])]