In [14]:
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline

class Network:
     # 設定初始化 w,b.
    def __init__(self,sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        
        
    def training(self,training_data,training_label,eta,epochs):
        train_loss = []
        
        for ep in range(epochs):
            l = 0
            
            for data,label in zip(training_data,training_label):
                
                # list of local gradient
                delta = []

                # list of v,y layer by layer
                lbl_v,lbl_y = feedforward(self,data)
                lbl_y.insert(0,data)

                #  ∂E(n)/(∂e(n)) = d - y
                e = label - lbl_y[-1]
                
                # loss
                l = l + np.linalg.norm(e)

                # δL = e(n) ⊙ f'(v(n))
                delta_L = local_gradient_L(self,e,lbl_v[-1])
                delta.append(delta_L)

                # list... [(l3,l2),...,(L,L-1)] -> [(L,L-1),...,(l3,l2)]
                wr = list(reversed(self.weights[1:]))

                # list... [(l2_v),...,(L-1_v)] -> [(L-1_v),...,(l2_v)]
                vr = list(reversed(lbl_v[:-1]))

                for w,v in zip(wr,vr):
                    # 1st.. (L,L-1).T dot delta_L = (L-1,L) dot (L,1) = (L-1,1)
                    # ...delta_L-1 = (L-1,1) ⊙ f'(v_L-1)
                    # ...

                    delta_l = np.multiply(np.dot(w.T,delta[-1]),sigmoid_prime(v))
                    delta.append(delta_l)

                delta = list(reversed(delta))
                self.weights = np.array(self.weights)+eta*np.array([np.dot(d,y.T) for d,y in zip(delta,lbl_y[:-1])])
                self.biases = np.array(self.biases) + eta*np.array([d for d in delta])
            
            print('training loss --',(l/len(training_data)))
            train_loss.append(l/len(training_data))
        plt.figure(figsize=(5,5))
        plt.plot(train_loss)
        plt.title('Training Loss--Σ|d-y| divide by number of training data')
    
    def testing(self,testing_data,testing_label):
        score = 0
        for y,label in zip(testing_data,testing_label):
            for b,w in zip(self.biases,self.weights):
                y = sigmoid(np.dot(w,y)+b)
            if(np.argmax(y)==np.argmax(label)):
                score = score + 1
                
            else:
                pass
        print("Testing Accuracy =",(score/len(testing_data)))
    
        
def feedforward(self, y):
    lbl_v = []
    lbl_y = []
    for b, w in zip(self.biases, self.weights):
        v = np.dot(w, y)+b
        y = sigmoid(v)
        lbl_v.append(v)
        lbl_y.append(y)
    return lbl_v,lbl_y
    

def local_gradient_L(self,e,v):
    delta = np.multiply(e,sigmoid_prime(v))
    return delta

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

