In [91]:
import numpy as np
import pickle
import gzip

In [98]:
#Write a Data Loader

def load_data(file = "data/mnist.pkl.gz"):
    f = gzip.open(file,"rb")
    u = pickle._Unpickler(f)
    u.encoding = "latin1"
    p = u.load()
    return p

def vectorized_results(j):
    e = np.zeros((10,1))
    e[j] = 1
    return e

def load_data_wrapper():
    tr_d , va_d , te_d = load_data()
    training_inputs = [
        np.reshape(x,(784,1)) for x in tr_d[0]
    ]
    training_results = [
        vectorized_results(y) for y in tr_d[1]
    ]
    training_data = zip(
        training_inputs , training_results
    )
    validation_inputs = [
        np.reshape(x,(784,1)) for x in va_d[0]
    ]
    validation_data = zip(
        validation_inputs , va_d[1]
    )
    test_inputs = [
        np.reshape(x , (784,1)) for x in te_d[0]
    ]
    test_data = zip(test_inputs , te_d[1])
    return list(training_data) , np.array(list(validation_data)) , np.array(list(test_data))

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [106]:
#Cost Definitions

class QuadraticCost:
    
    @staticmethod
    def fn(output , input):
        return 0.5 * np.linalg.norm(y-a)**2
    
    @staticmethod
    def delta(activation , z, output):
        return (activation - output)*sigmoid_prime(z)

In [138]:

class BaseNetwork:
    
    def __init__(self , size , cost = QuadraticCost , batch_size = 1):
        self.size = size
        self.num_layers = len(size)
        self.cost = cost
        self.batch_size = batch_size
        self.weights = self.initialize_weights()
        self.biases = self.initialize_biases()
        
    def _get_weight_dimensions(self):
        '''
        Return an iterable with the sizes of weights for each layer
        '''
        return zip(
            self.size[:-1] , self.size[1:]
        )
    def _get_bias_dimensions(self):
        '''
        Return an iterable with the sizes of the biases for each layer
        '''
        return self.size[1:]
    
    def initialize_weights(self):
        '''
        Initialize the weights using Gaussian distribution with mean 0
        and sd 1 over the square root of the number of weights connectin
        to the same neuron
        '''
        sizes = self._get_weight_dimensions()
        
        return [
            np.random.randn(y,x)/np.sqrt(x)
            for x,y in sizes
        ]
    
    def initialize_biases(self):
        size = self._get_bias_dimensions()
        return [
            np.random.randn(self.batch_size , y,1) for y in size
        ]
    
    def activation(self , w , b,a):
        return sigmoid(
            np.matmul(w,a) + b
        )
        
    def inspect(self):
        print("Weights")
        print([np.shape(e) for e in self.weights ])
        print("Biases")
        print([np.shape(e) for e in self.biases ])
        
    def feedforward(self , x):
        a = x
        for w,b in zip(self.weights , self.biases):
            a = self.activation(w,b,a)
        return a
    
    def backprop(self , x , y):
        '''
        Implement the backprop Pipeline
        1. Feedforward with the x,y
        2. Calculate backward errors at each step
        '''
        nabla_b = [
            np.zeros(b.shape) for b in self.biases
        ]
        nabla_w = [
            np.zeros(w.shape) for w in self.weights
        ]
        activation = x
        activations = [x]
        zs = []
    
        for b,w in zip(self.biases, self.weights):
            z = self.activation(w,b,activation)
            activation = sigmoid(z)
            zs.append(z)
            activations.append(activation)
        
        #At this point I have all the zs and activations
        
        #Error at the output layer
        delta = (self.cost).delta(
            activations[-1] , zs[-1] , y
        )
        nabla_b[-1] = delta
        nabla_w[-1] = np.matmul(
            delta , np.transpose(activations[-2]  ,axes = [0,2,1])
        )
        
        for l in range(2, self.num_layers):
            delta = np.matmul(
                np.transpose(
                    self.weights[-l+1]
                ),
                delta
            ) * sigmoid_prime(zs[-l])
            nabla_b[-l] = delta
            nabla_w[-l] = np.matmul(
                delta,
                np.transpose(
                    activations[-l-1]
                )
            )
            
        #Now that we have the error at the output layer, we backpropagate it
        
        return nabla_b , nabla_w
    

In [139]:
tr_d , va_d , te_d = load_data_wrapper()
net = BaseNetwork([784 , 30 , 10] , batch_size = 1)

net.backprop(tr_d[0][0] , tr_d[0][1])

([array([[[ 0.00631094],
          [ 0.02475288],
          [-0.0158375 ],
          [-0.01633883],
          [-0.01199943],
          [ 0.014742  ],
          [ 0.0140955 ],
          [ 0.03165616],
          [-0.0272028 ],
          [-0.01774997],
          [-0.01978159],
          [ 0.00028084],
          [ 0.02080956],
          [ 0.01393356],
          [ 0.0108351 ],
          [-0.01732825],
          [-0.01575855],
          [-0.01584315],
          [-0.00773702],
          [-0.00615239],
          [-0.03207866],
          [ 0.01582908],
          [ 0.00891985],
          [ 0.00359077],
          [-0.00742408],
          [ 0.04956248],
          [-0.01392205],
          [ 0.00107689],
          [ 0.00474457],
          [-0.0053545 ]]]), array([[[ 0.14755358],
          [ 0.1307305 ],
          [ 0.14803519],
          [ 0.14810396],
          [ 0.13394327],
          [-0.10353604],
          [ 0.14787448],
          [ 0.14767859],
          [ 0.14789961],
          [ 0.14017782]]

In [51]:
a = np.random.randn(10,20)
b = np.random.randn(20,30)

np.matmul(net.weights[0] , batch[:,0])



TypeError: list indices must be integers or slices, not tuple

In [131]:
a = np.random.randn(1,10,1)
np.shape(a)

(1, 10, 1)

In [129]:
np.shape(np.transpose(a , axes = [0,2,1]))

(1, 1, 10)