### Lecture 12 - Building a Neural Network Framework from Scratch

In [1]:
import numpy as np

In [4]:
class Node:
    """
    Declare a class, node, with the following properties:
    -- inputs
    -- outputs
    -- calculate and forward values to the next node
    -- receive values from backward-propagation (given by partial derivatives)
        to adjust parameters ("gradients")
    """
    def __init__(self, inputs=None):
        inputs = inputs or []
        self.inputs = inputs
        self.outputs = []
        
        for n in self.inputs:
            n.outputs.append(self)
            
            self.value = None
            
            self.gradients = {
                # a dictionary where the key is "self",
                # and the value is the partial derivative of "self"
                # if the functional form is wx + b, then the partial derivatives are:
                # w: x
                # x: w
                # b: 1
                
            }
    
    def calculate(self):
        
        raise NotImplemented
        
    def backward_partial(self):
        
        return NotImplemented
    

In [5]:
class Input(Node):
    def __init__(self):
        Node.__init__(self)
        
    def calculate(self, value=None):
        self.value = value
        
    def backward_partial(self):
        
        for n in self.outputs:
            self.gradients[self] = n.gradients[self] * 1
            

In [6]:
class Add(Node):
    """
    Define a subclss of "Node" where the calculation is simply adding all inputs.
    """
    
    def __init__(self, nodes):
        Node.__init__(self, nodes)
        
    def calculate(self):
        self.value = sum([n.value for n in self.inputs])
    
    # note there isn't a backpropagation process since the function, "add",
    # does not have any parameters (and thus cannot calculate partial derivatives)!       

In [7]:
class Linear(Node):
    """
    Define a subclss of "Node" where the calculation is a linear function 
    f(X) = wX + b. (X is a vector)
    """
    def __init__(self, nodes, weights, bias):
        Node.__init__(nodes)
        self.weights = weights
        self.bias = bias
            
    def calculate(self):
        x = self.inputs
        self.value = np.dot(weights, x) + bias
        
    def backward_partial(self):
        
        for n in self.outputs:
            
            grad_cost = n.gradients[self] 
            
            self.gradients[self.inputs] = np.dot(
                grad_cost, self.weights.value.T
            )
            
            self.gradients[self.weights] = np.dot(
                grad_cost, self.inputs.value.T
            )
            
            self.gradients[self.bias] = np.dot(
                grad_cost, 1 # the partial derivative of f(X) on b = 1
            )
                
             

In [9]:
class sigmoid(Node):
    """
    Define a subclss of "Node" where the calculation is a sigmoid function 
    f(x) = 1 / 1 + e^(-x)
    """
    def __init__(self, node):
        Node.__init__(Node)
        
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-1 * x))
    
    def calculate(self):
        self.x = self.inputs[0].value
        self.value = self._sigmoid(self.x)
        
    # note that for a sigmoid function, its derivative has the following property:
    # f'(x) = f(x)[1-f(x)]
        
    def backward_partial(self):
        self.partial = self._sigmoid(self.x) * (1 - self._sigmoid(self.x))
        
        for n in self.outputs:
            grad_cost = n.gradients[self]
            
            self.gradients[self.inputs[0]] = grad_cost * self.partial
            

In [10]:
class LOSS(Node):
    """
    Define the loss function
    """
    def __init__(self, y_true, y_hat):
        Node.__init__([y_true, y_hat])
        
    def calculate(self):
        y_true = self.inputs[0].value.reshape(-1, 1) 
        # reshape the value to fall in the range of [-1,1]
        y_hat = self.inputs[0].value.reshape(-1, 1)
        
        self.diff = y_true - y_hat
        
        self.value = np.mean(self.diff ** 2)
        
    def backward_partial(self):
        n = self.inputs[0].value.shape[0]
        
        self.gradients[self.inputs[0]] = (2 / n) * self.diff
        self.gradients[self.inputs[1]] = -1 * (2 / n) * self.diff
    

In [11]:
def run_one_epoch(output_node, topological_sorted_graph: list):
    """
    Define a procedure to connect all the nodes defined above
    (i.e., run the neural net for one round --- forward and backward)
    """
    for n in topological_sorted_graph:
        n.forward()
        
    for n in topological_sorted_graph[::-1]:
        n.backward()



In [12]:
def topological_sort(graph):
    """
    Define a topological sort procedure where the input is a @graph,
    and the output is a @sorted_list
    """
    
    pass

In [13]:
def gradient_descent_update(trainable_nodes, learning_rate=1e-3):
    for node in trainable_nodes:
        update_value += -1 *(learning_rate * node.gradient[node])

In [14]:
from sklearn.datasets import load_boston

In [15]:
data = load_boston()

In [16]:
X_ = data['data']

In [17]:
y_ = data['target']

In [None]:
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0) # normalize X

In [18]:
x, y = Input(), Input()

In [19]:
W1, b1 = Input(), Input()

In [20]:
W2, b2 = Input(), Input()

In [None]:
output_1 = Linear(X_, W1, b1)
sigmoid_1 = Sigmoid(loss1)
linear2 = Linear(sigmoid_1, W2, b2)
loss = MES(y, linear2)

In [21]:
from sklearn.utils import resample, shuffle

In [None]:
topological_sorted_list = topological_sort()

In [None]:
epochs = 1000
batch_size = 16  # we supply 16 values for each input
batch_num = X_.shape[0] / batch_size

for epoch in range(epochs):
    loss = 0
    
    for batch in range(batch_num):
        
    