In [102]:
import numpy as np
import matplotlib.pyplot as plt
import copy

In [24]:
"""
mnist_loader
~~~~~~~~~~~~

A library to load the MNIST image data.  For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""

#### Libraries
# Standard library
import _pickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('./data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = _pickle.load(f, encoding='latin1')
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.

    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.

    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.

    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = [ [inputs, results] for inputs, results in zip(training_inputs, training_results) ] # zip(training_inputs, training_results)
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = [ [ va_i, va_o   ] for va_i, va_o in zip(validation_inputs, va_d[1])]#zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = [ [te_i, te_o] for te_i, te_o in zip(test_inputs, te_d[1]) ]#zip(test_inputs, te_d[1])
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [13]:
training, validation, test = load_data_wrapper()

In [16]:
np.shape(training)
np.shape(training[0][0])

(784, 1)

In [28]:
def sigmoid(x):
    return (1/(1+np.exp(-x)))

def P_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))
def Cost_der(h, y):
    return (h-y)

In [123]:
class Network(object):
    
    def __init__(self, net_dim, lr=0.1, Ws=None, Bs=None): # net_dim = [#nodes layer, start from input nodes]
        Nlayer = len(net_dim)
        if (weights == None) or (biases == None):
            self.weights = [np.random.randn(o, i) for i, o in zip(net_dim[:-1], net_dim[1:] )]
            self.biases = [np.random.randn(o, 1) for o in net_dim[1:] ]
        
        else:
            self.weights = Ws
            self.biases = Bs
        self.lr = lr
        #self.activations = np.zeros((Nlayer, ))
        #self.output = 
        
    def stochastic_gradient_descent(self,train, epochs, test=None, lr=0.5, Bsize = 200):
        # Mini_batch with k partitioning will be used
        N = len(training)
        idx = np.arange(0, N, 1)
        for epoch in range(epochs):

            #devide train_data_set
            #np.random.shuffle(train)
            #np.arange(0, N, partition)
            #batches = [ train[i : i+k] for i in range(0, N, Bsize)]

            np.random.shuffle(training)
            #training_shuffled = [ [training[0][i],training[1][i]] for i in idx]
            batches = [ training[i: i+Bsize] for i in range(0, N, Bsize)]

            #Learning from the batch data
            for batch in batches:
                self.update_from_batch(batch)

            # Evaluate from the test data
            if test :
                print ("Epoch {0} : {1} accuracy".format(epoch, self.evaluate(test)) )
            else:
                print ("Epoch {0} Complete. No test data available")
        return (self.weights, self.biases)

    def update_from_batch(self,batch):
        #batch is composed of x, y of input node, labels
        N = len(batch)
        #make placeholders for delta_parameters
        accum_delta_w = [np.zeros_like(w) for w in self.weights]
        accum_delta_b = [np.zeros_like(b) for b in self.biases]
        
        #calculate backpropagated derivatives of Cost function w.r.t weights and biases
        # This 
        for sample in batch: # Suppose the sample consists of [input, label], where shape(input) = n,1/
            del_w, del_b = self.backpropagation(sample) # accum_delta_w, accum_delta_b #
        
            #add calculated sample into (accumulate)
            accum_delta_w = [acc_del_w + del_w for acc_del_w, del_w in zip(accum_delta_w,del_w )]
            accum_delta_b = [acc_del_b + del_b for acc_del_b, del_b in zip(accum_delta_b, del_b)]

        # weight correction using accumulated delta with learning rate(lr)
        self.weights = [w - (self.lr/N)*del_w for w, del_w in zip(self.weights, accum_delta_w)]
        self.biases = [b - (self.lr/N)*del_b for b, del_b in zip(self.biases, accum_delta_b)]
    def backpropagation(self, sample):
        #Suppose sample is One case with N feasures in shape of matrix(N,1)
        #activations and sum values of each layer should be counted and listed in proccess of feedforward 
        #
        #Layer_val = np.zeros(1,1)
        #Storing intermediate output values will be appended from existing list
        in_a = [sample[0]] #first value will be the input features
        out_z = []
        ina = copy.deepcopy(sample[0])
        #Feed_forward and save each layer output
        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, ina)+b
            ina = sigmoid(z)
            out_z.append(z)
            in_a.append(ina)
        #Backpropagation using parameters
        dw = [np.zeros_like(w) for w in self.weights]
        db = [np.zeros_like(b) for b in self.biases]
        N = len(out_z)
        #1. Get Cost derivative values(dz, dw, db) of the Last layer
        dz = Cost_der(in_a[-1], sample[1])* P_sigmoid(out_z[-1]) #(dz is the delta value)
        dw[-1] = in_a[-2].transpose()*dz # (1,N) * (N,1)
        db[-1] = dz #(N,1)
        #2. Get iterative, # for i the iterative (from last layerN)
        #a0 --- (z0)a1 --- (z1)a2 ---(z2) : a3 --- C
        #    w0         w1         w2
        for i in range(N-2, -1,-1) : 
            dz = np.dot(np.transpose(self.weights[i+1]),dz) * P_sigmoid(out_z[i])
            dw[i] = in_a[i].transpose()*dz # (1,N) * (N,1)
            db[i] = dz #(N,1)
        #3. 
        return (dw, db)

    def feedforward(self, X):
        a=[]
        
        for in_a, label in X:
            ina = copy.deepcopy(in_a)
            #weights and biases are yet seperated.
            for w, b in zip(self.weights, self.biases):
                ina = sigmoid(np.dot(w, ina) + b)
            a.append(np.argmax(ina) == label)
        return (np.array(a))


    def evaluate(self, test):
        #Return Onehot Encoding | Assume x:0, y:1
        return (np.sum( self.feedforward(test))/len(test))


In [128]:
#(784, 10, 10)
net_dim = [784, 10, 10]
epochs = 10

In [None]:
for i in range(epochs):
    nn = Network(net_dim , Ws=weights, Bs = biases)
    weights, biases = nn.stochastic_gradient_descent(training, i, test=test)


Epoch 0 : 0.1744 accuracy
Epoch 0 : 0.1923 accuracy
Epoch 1 : 0.2065 accuracy
Epoch 0 : 0.2217 accuracy
Epoch 1 : 0.2441 accuracy
Epoch 2 : 0.2711 accuracy
Epoch 0 : 0.2977 accuracy
Epoch 1 : 0.3396 accuracy
Epoch 2 : 0.3585 accuracy


array([7, 2, 1, ..., 4, 5, 6])

In [27]:
net_dim = [784, 10,10]

weights =  [ np.random.randn(o,i) for o, i in zip(net_dim[1:] , net_dim[:-1])]
biases = [np.random.randn(o,1) for o in net_dim[1:]]



In [290]:
def backpropagation(sample, weights, biases): #Cal for One sample case

    #Suppose sample is One case with N feasures in shape of matrix(N,1)
    #activations and sum values of each layer should be counted and listed in proccess of feedforward 
    #
    #Layer_val = np.zeros(1,1)
    #Storing intermediate output values will be appended from existing list
    #N = 
    in_a = [sample[0]] #first value will be the input features
    out_z = []
    
    #Feed_forward and save each layer output
    for w, b in zip(weights, biases):
        z = np.dot(w, in_a[-1])+b
        a = sigmoid(z)
        out_z.append(z)
        in_a.append(a)
    #Backpropagation using parameters
    #1. Get Cost derivative values(dz, dw, db) of the Last layer
    dw = [np.zeros_like(w) for w in weights]
    db = [np.zeros_like(b) for b in biases]
    N = len(out_z)
    dz = Cost_der(in_a[-1], sample[1])* P_sigmoid(out_z[-1]) #(dz is the delta value)
    dw[-1] = in_a[-2].transpose()*dz # (1,N) * (N,1)
    db[-1] = dz #(N,1)
    #2. Get iterative, # for i the iterative (from last layerN)
    #a0 --- (z0)a1 --- (z1)a2 ---(z2) : a3 --- C
    #    w0         w1         w2
    for i in range(N-2, -1,-1) : 
        dz = np.dot(np.transpose(weights[i+1]),dz) * P_sigmoid(z[i])
        dw[i] = in_a[i].transpose()*dz # (1,N) * (N,1)
        db[i] = dz #(N,1)

    #3.
    return (dw, db)

In [317]:
dw = [np.zeros_like(w) for w in weights]
db = [np.zeros_like(b) for b in biases]
batch_in = training[0][:200]
batch_lbl= np.array(training[1][:200])

In [319]:
for inp, out in zip(batch_in, batch_lbl):
    Dw, Db = backpropagation([inp,out], weights, biases)
    dw = [ dw + Dw for dw, Dw in zip(dw, Dw)]
    db = [ db + Db for db, Db in zip(db, Db)]

In [321]:
np.average(dw[0][0])

1.0045748028676951

In [256]:
np.shape(dw[0][0])
np.where(dw[0][0] != 0)

(array([152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 176,
        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
        190, 191, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
        214, 215, 216, 217, 218, 231, 232, 233, 234, 235, 236, 237, 238,
        239, 240, 241, 260, 261, 262, 263, 264, 265, 266, 268, 269, 289,
        290, 291, 292, 293, 319, 320, 321, 322, 347, 348, 349, 350, 376,
        377, 378, 379, 380, 381, 405, 406, 407, 408, 409, 410, 434, 435,
        436, 437, 438, 439, 463, 464, 465, 466, 467, 493, 494, 495, 496,
        518, 519, 520, 521, 522, 523, 524, 544, 545, 546, 547, 548, 549,
        550, 551, 570, 571, 572, 573, 574, 575, 576, 577, 578, 596, 597,
        598, 599, 600, 601, 602, 603, 604, 605, 622, 623, 624, 625, 626,
        627, 628, 629, 630, 631, 648, 649, 650, 651, 652, 653, 654, 655,
        656, 657, 676, 677, 678, 679, 680, 681, 682, 683]),)

In [200]:
dz = Cost_der(As[-1], training[1][0])*P_sigmoid(Zs[-1])
dw[-1] = As[-2].transpose()*dz
db[-1] = dz

In [201]:
for i in range(len(Zs)-2, -1,-1):
    print(i)
    dz = np.dot(weights[i+1].transpose(), dz) * P_sigmoid(Zs[i])
    dw[i] = As[i].transpose()*dz
    db[i] = dz

print (dw, db)

0
[array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [-0., -0., -0., ..., -0., -0., -0.],
       ..., 
       [-0., -0., -0., ..., -0., -0., -0.],
       [-0., -0., -0., ..., -0., -0., -0.],
       [-0., -0., -0., ..., -0., -0., -0.]]), array([[  1.39291766e-01,   2.29537404e-03,   1.76381838e-02,
          1.39561404e-01,   7.72245085e-02,   1.39746121e-01,
          1.34929086e-01,   4.51977709e-03,   1.26357874e-01,
          3.63729785e-07],
       [  3.45410781e-02,   5.69198712e-04,   4.37385425e-03,
          3.46079420e-02,   1.91498597e-02,   3.46537473e-02,
          3.34592360e-02,   1.12079829e-03,   3.13337775e-02,
          9.01964219e-08],
       [  9.25102919e-02,   1.52446715e-03,   1.17143574e-02,
          9.26893714e-02,   5.12884718e-02,   9.28120503e-02,
          8.96128281e-02,   3.00179910e-03,   8.39202787e-02,
          2.41570263e-07],
       [  1.46870319e-01,   2.42026020e-03,   1.85978379e-02,
          1.47154

In [196]:
np.shape(weights[0].transpose())

np.shape(Zs[0])

(10, 1)

In [155]:
np.shape(training[0][2])

(784, 1)

In [179]:
P_sigmoid(Zs[0])

array([[  6.22457135e-03],
       [  1.61075093e-02],
       [  1.10000135e-01],
       [  4.32131601e-03],
       [  2.47405567e-01],
       [  3.01321376e-03],
       [  3.59902021e-02],
       [  3.12052807e-02],
       [  8.88273462e-02],
       [  2.59491659e-06]])

In [177]:
np.shape(weights[0].transpose())

(784, 10)

AttributeError: 'list' object has no attribute 'shuffled'

In [19]:
np.random.shuffle(training)

In [65]:
training[0][0]

array([[ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0

In [68]:
l= [[1,2],[3,4]]
for i, j in l:
    print (i+ j)

3
7
