In [55]:
import numpy as np
from scipy.special import expit as sigmoid
from typing import List, Union, Callable
import sys

In [104]:
X = np.identity(8)
y = X

[[1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]


In [124]:
def init_network(input_size, hidden_size, output_size):
    input = np.zeros((input_size, 2))
    hidden = np.zeros((hidden_size, 2))
    output = np.zeros((output_size, 2))
    network = list((input, hidden, output))
    return network
network = init_network(8,3,8)
network

[array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]),
 array([[0., 0.],
        [0., 0.],
        [0., 0.]]),
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])]

In [133]:
def sigmoid_derivative(z: float) -> float:
    return sigmoid(z)*(1-sigmoid(z))

def quadratic_loss(predictions: np.ndarray, actuals: np.ndarray) -> np.ndarray:
    norms = np.apply_along_axis(np.linalg.norm, 0, predictions-actuals)
    return 0.5*np.apply_along_axis(np.power, 0, norms, {'x2': 2})

def cost_function(loss_function: Callable, predictions: np.ndarray, actuals: np.ndarray, weigths: np.ndarray, decay_parameter: float) -> float:
    avg_loss: float = np.mean(loss_function(predictions, actuals))
    regularization: float = 0.5*decay_parameter*sum(np.power(weights, 2))
    return avg_loss+regularization

class Layer:

    # declaration of instance variables
    weigths: np.ndarray
    has_bias: bool

    def __init__(self, num_nodes: int, num_nodes_n1: int, include_bias: bool = True, epsilon: float = 0.01) -> None:
        self.weights = np.random.normal(loc=0, scale=np.power(epsilon,2), size=(num_nodes_n1, np.add(num_nodes, include_bias)))
        self.has_bias = include_bias
    
    def print_weights(self) -> None:
        print(self.weights)


class Network:

    # declaration of instance variables
    layers: List[Layer]
    weights: List[np.ndarray]

    def __init__(self, num_nodes: List[int], include_biases: List[bool]) -> None:
        # num_nodes is a list of number of nodes for all layers not counting the bias node
        # TO-DO: code won't work if include_biases != [True, True, False], see prop_forward
        assert (include_biases == [True, True, False]), 'error when initializing Network class: include_bias parameter not available'

        self.layers = [Layer(num_nodes[i], num_nodes[i+1], include_biases[i]) for i in range(len(num_nodes)-1)]
        self.layers.append(Layer(num_nodes[-1], 0, include_biases[-1]))
        self.weights = self.get_weights(form='list')

    def get_weights(self, form: str = 'vector') -> Union[List[np.ndarray], np.ndarray]:
        assert (form in ['vector', 'list']), 'Error in get_weights function: form parameter ill-defined'

        if form == 'vector':
            # returns one np.ndarray with all weights of all layers
            weigth_vector = []
            for layer in self.layers:
                weigth_vector.append(layer.weights)
            return np.asarray(weigth_vector)
        elif form == 'list':
            # returns a list, where list[i] stores the weights between layer i-1 and layer i
            list_of_weights: List[np.ndarray] = []
            for layer in self.layers:
                list_of_weights.append(layer.weights)
            return list_of_weights

    def print_weights(self) -> None:
        print('Printing weights of network:')
        for index, layer in enumerate(self.layers):
            print(f'Layer {index+1}')
            layer.print_weights()

    def prop_forward(self, features: np.ndarray) -> List[np.ndarray]:
        # returns a list, where list[i] stores the activations for neurons in layer i+1
        # the activation of a bias node (should the layer have one) is given by the first value in the array and is always =1
        # TO-DO: right now it is hard coded that input layer & hidden layer have a bias node, but output layer has not
        z_2 = np.matmul(self.layers[0].weights, np.append(1, features))
        a_2 = np.apply_along_axis(sigmoid, 0, z_2)
        z_3 = np.matmul(self.layers[1].weights, np.append(1, a_2))
        a_3 = np.apply_along_axis(sigmoid, 0, z_3)
        return [np.append(1, features), np.append(1, a_2), a_3]

    def print_activations(self, features: np.ndarray) -> None:
        print(f'Printing activations for input: {features}')
        for index, array in enumerate(self.prop_forward(features)):
            print(f'Layer {index+1}: {array}')

    def get_deltas(self, X: np.ndarray, y: np.ndarray, activations: List[np.ndarray]=None) -> List[np.ndarray]:
        # TO-DO: adapt code to accept different cost functions
        # Right now: hard coded to use quadratic loss
        if activations == None:
            activations = self.prop_forward(X)
        weights = self.weights
        deltas = []
        deltas_output = -np.multiply((y-activations[-1]), np.apply_along_axis(sigmoid_derivative, 0, activations[-1]))
        deltas.insert(0, deltas_output)
        for i in range(len(activations)-2):
            delta = np.multiply(np.matmul(np.transpose(weights[-(i+2)]), deltas[-(i+1)]), np.apply_along_axis(sigmoid_derivative, 0, activations[-2]))
            # remove 'bias delta', as activation of bias cannot be changed
            deltas.insert(0, delta[1:])
        return deltas

    def partial_derivatives(self, X: np.ndarray, y: np.ndarray, verbose: bool =False) -> List[np.ndarray]:
        activations = self.prop_forward(X)
        deltas = self.get_deltas(X, y, activations)
        partial_derivatives = []
        for index in range(len(deltas)):
            if verbose:
                # for testing/debugging purposes
                print(f'Layer {index+1}: dimension deltas {index+2} {deltas[index].shape}, dimension activations {index+1} {activations[index].shape}')
                print(f'activations: {activations[index]}')
                print(f'deltas: {deltas[index]}')
            partial = np.outer(deltas[index], np.transpose(activations[index]))
            if verbose:
                # deltas should be equal to partial derivatives of the bias node
                print(f'Partial of bias: {partial[:,0]}')
            partial_derivatives.append(partial)
        return partial_derivatives

    def update_weights(self, big_delta: List[np.ndarray], regularization_parameter: float, learning_rate: float=0.01) -> None:
        for num_layer in range(len(self.weights)):
            self.weights[num_layer] = self.weights[num_layer]-learning_rate*(big_delta[num_layer] + regularization_parameter*self.weights[num_layer])

    def gradient_descent(self, X_train: np.ndarray, y_train: np.ndarray, regularization_parameter: float, learning_rate: float=0.01) -> None:
        big_delta: List[np.ndarray] = []
        for index in range(len(self.weights)):
            big_delta.append(np.zeros(self.weights[index].shape))
        for num_instance in range(X_train.shape[0]):
            partials = self.partial_derivatives(X[num_instance], y[num_instance])
            for num_layer in range(len(self.layers)-1):
                big_delta[num_layer] = big_delta[num_layer]+partials[num_layer]
        for num_layer in range(len(self.layers)):
            big_delta[num_layer] = (1/X_train.shape[0])*big_delta[num_layer]
        self.update_weights(big_delta, regularization_parameter, learning_rate)

    def train_network(self, n_iter: int, X_train: np.ndarray, y_train: np.ndarray, regul_param: float, learning_rate: float=0.01) -> None:
        # TO-DO: print/plot loss after each iteration 
        return


test_network = Network([8,3,8], [True, True, False])

In [134]:
#test_network.print_weights()
#test_network.print_activations(X[0])
#print(test_network.get_deltas(y=X[0])
#print(test_network.partial_derivatives(X[0], X[0], verbose=False)[0])
test_network.gradient_descent(X, y, 1)

In [123]:
def activation(inputs, weights, bias=True):
    
    if bias == True:
        term = 1
    else: 
        term = 0
    for input, weight in zip(inputs, weights):
        term += (input * weight)
    return sigmoid(term)



# activation function for top node hidden layer.
print(activation([1,0,0,0,0,0,0,0], np.zeros((8,1)), False))


[0.5]


In [121]:
def forward(network, input):
    for i in range(len(network)):
        if i == 0:
            network[i] = input
        elif i == 1:
            for node_i in range(len(network[i])):
                network[i][node_i] = activation(network[i-1], np.zeros(network[i-1].shape), bias=False)
        else: 
            for node_i in range(len(network[i])):
                network[i][node_i] = activation(network[i-1], np.zeros(network[i-1].shape))
    return network

In [184]:
sigmoid([1,0.5,0])

array([0.73105858, 0.62245933, 0.5       ])

In [194]:
activation = np.ones((1,8))
weights = np.ones((8,2))
np.dot(activation, weights)

array([[8., 8.]])