<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Hyperparameters" data-toc-modified-id="Hyperparameters-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Hyperparameters</a></span></li></ul></div>

# Hyperparameters

* Input layer: $s_0$ and $s_2$, samples.
* Output layer: $\hat{s}_1$, a prediction.
* Initial prediction:
$$
\hat{s}_1 = \frac{s_0 + s_2}{2}.
$$

In [None]:
import numpy as np
import ipdb

def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

class Network:

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.zeros((y, 1)) for y in sizes[1:]] # All biases init to 0
        self.weights = [np.full((y, x), 0.5) for x, y in zip(sizes[:-1], sizes[1:])] # All weights init to 0.5
        #for x, y in zip(sizes[:-1], sizes[1:]):
        #    print (x, y)
    
    def propagate_stimuli(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
    def learn_stimuli(self, _in, ideal_out, learning_rate):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        delta_nabla_b, delta_nabla_w = self.backprop(_in, ideal_out)
        nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
        nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        
        # Gradient descend optimization on weights and biases
        
        # w^l_jk -= \alpha/len(x)\nabla_w^_jk
        self.weights = [w-(learning_rate/len(_in))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        
        # b^l_k -= \alpha/len(x)\nabla_b^l_k
        self.biases = [b-(learning_rate/len(_in))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)

    def backprop(self, _in, ideal_out):
        ipdb.set_trace() # <-------------------------- breakpoint
        """Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        print(len(nabla_b), len(nabla_w))

        # Forward pass
        activation = _in
        print(_in)
        activations = [_in] # list to store all the activations, layer by layer
        print(activations)
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        # Backward pass
        delta = self.cost_derivative(activations[-1], ideal_out) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        #print(delta.shape, activations[-2].transpose().shape)
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())

        return (nabla_b, nabla_w)

In [None]:
net = Network([2, 3, 1])
net.learn_stimuli(np.array([2,4]), np.array([3]), 1.0)

In [14]:
net = Network([748, 30, 10])
net.learn_stimuli(np.array([2,4]), np.array([3]), 1.0)

748 30
30 10


ValueError: shapes (30,748) and (2,) not aligned: 748 (dim 1) != 2 (dim 0)