In [10]:
#此练习是全过程演示神经网络如何工作，同时也可以使用如Keras等库，屏蔽神经网络内部过程
# Import our dependencies
import numpy as np
from numpy import exp, array, random, dot, ones_like, where, log10

# Create our Artificial Neural Network class
class ArtificialNeuralNetwork():
    
    # initializing the class
    def __init__(self):
        
        # generating the same synaptic weights every time the program runs
        random.seed(1)
        
        # synaptic weights (3 × 4 Matrix) of the hidden layer 
        self.w_ij = 2 * random.rand(3, 4) - 1
        
        # synaptic weights (4 × 1 Matrix) of the output layer
        self.w_jk = 2 * random.rand(4, 1) - 1
        
    def Sigmoid(self, x):
        
        # The Sigmoid activation function will turn every input value into 
        # probabilities between 0 and 1
        # the probabilistic values help us assert which class x belongs to        
        return 1 / (1 + exp(-x))
    
    def SigmoidDerivative(self, x):
        
        # The derivative of Sigmoid will be used to calculate the gradient during 
        # the backpropagation process
        # and help optimize the random starting synaptic weights        
        return x * (1 - x)
        
    def crossentropyerror(self, a, y):
        
        # The cross entropy loss function
        # we use it to evaluate the performance of our model        
        return - sum(y * log10(a) + (1 - y) * log10(1 - a))
    
    def train(self, x, y, learning_rate, iterations):
        
        # x: training set of data
        # y: the actual output of the training data
        
        for i in range(iterations):
            
            z_ij = dot(x, self.w_ij) # the dot product of the weights of the hidden layer and the inputs
            a_ij = self.Sigmoid(z_ij) # applying the Sigmoid activation function
            
            z_jk = dot(a_ij, self.w_jk) # the same previous process will be applied to find the predicted output
            a_jk = self.Sigmoid(z_jk)  
            
            dl_jk = -y/a_jk + (1 - y)/(1 - a_jk) # the derivative of the cross entropy loss wrt output
            da_jk = self.SigmoidDerivative(a_jk) # the derivative of Sigmoid  wrt the input (before activ.) of the output layer
            dz_jk = a_ij # the derivative of the inputs of the hidden layer (before activation) wrt weights of the output layer
            
            dl_ij = dot(da_jk * dl_jk, self.w_jk.T) # the derivative of cross entropy loss wrt hidden layer input (after activ.)
            da_ij = self.SigmoidDerivative(a_ij) # the derivative of Sigmoid wrt the inputs of the hidden layer (before activ.)
            dz_ij = x # the derivative of the inputs of the hidden layer (before activation) wrt weights of the hidden layer
            
            # calculating the gradient using the chain rule
            gradient_ij = dot(dz_ij.T , dl_ij * da_ij)
            gradient_jk = dot(dz_jk.T , dl_jk * da_jk)
            
            # calculating the new optimal weights
            self.w_ij = self.w_ij - learning_rate * gradient_ij 
            self.w_jk = self.w_jk - learning_rate * gradient_jk
            
            # printing the loss of our neural network after each 1000 iteration
            if i % 1000 == 0 in range(iterations):
                print("loss: ", self.crossentropyerror(a_jk, y))
                #print("loss: ", self.mael1(a_jk, y))
                  
    def predict(self, inputs):
        
        # predicting the class of the input data after weights optimization
        # the output of the hidden layer
        output_from_layer1 = self.Sigmoid(dot(inputs, self.w_ij)) 
        # the output of the output layer
        output_from_layer2 = self.Sigmoid(dot(output_from_layer1, self.w_jk))   
        
        return output_from_layer2
    
    # the function will print the initial starting weights before training
    def SynapticWeights(self):
        
        print("Layer 1 (4 neurons, each with 3 inputs): w_ij ")        
        print(self.w_ij)
        
        print("Layer 2 (1 neuron, with 4 inputs): w_jk ")        
        print(self.w_jk)   

In [12]:
def main():
    
    ANN = ArtificialNeuralNetwork()    
    #ANN.SynapticWeights()
    
    # the training inputs 
    # the last column is used to add non linearity to the clasification task

    x = array([[0, 0, 1], 
               [0, 1, 1], 
               [1, 0, 1], 
               [0, 1, 0], 
               [1, 0, 0], 
               [1, 1, 1], 
               [0, 0, 0]])
    
    # the training outputs
    y = array([[0, 1, 1, 1, 1, 0, 0]]).T

    ANN.train(x, y, 1, 10000)
    
    # Printing the new synaptic weights after training
    #print("New synaptic weights after training: ")
    #print("Layer 1  w_ij:\n", ANN.w_ij)
    #print("Layer 2  w_jk:\n", ANN.w_jk)
    
    # Our prediction after feeding the ANN with new set of data
    print("Considering new situation [1, 0, 0] -> ?: ")
    print(ANN.predict(array([[1, 0, 0]])))
    
if __name__=="__main__":
    main()

loss:  [2.07175185]
loss:  [0.02522354]
loss:  [0.00812448]
loss:  [0.00483746]
loss:  [0.00344286]
loss:  [0.00267204]
loss:  [0.00218308]
loss:  [0.00184531]
loss:  [0.00159802]
loss:  [0.00140915]
Considering new situation [1, 0, 0] -> ?: 
[[0.99967377]]


In [1]:
#此练习是全过程演示神经网络如何工作，同时也可以使用如Keras等库，屏蔽神经网络内部过程
# Import our dependencies
import numpy as np
from numpy import exp, array, random, dot, ones_like, where, log10
#import tensorflow as tf

# Create our Artificial Neural Network class
class ANN_Gelu():
    
    # initializing the class
    def __init__(self):        
        # generating the same synaptic weights every time the program runs
        random.seed(1)        
        # synaptic weights (2 × 64 Matrix) of the hidden layer 
        self.w_ij = 2 * random.rand(2, 4) - 1        
        # synaptic weights (64 × 1 Matrix) of the output layer
        self.w_jk = 2 * random.rand(4, 1) - 1

    def Gelu(self, x):
        #return 0.5*x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
        return 0.5*x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3))))
    
    def GeluDerivative(self, x): 
        """
        ax = (0.0356774 * tf.pow(x, 3) + 0.797885 * x)
        bx = (0.5 * tf.tanh(ax) +
              (0.0535161 * tf.pow(x, 3) + 0.398942 * x) * tf.pow(tf.sech(ax), 2) + 0.5)
        """
        ax = (0.0356774 * np.power(x, 3) + 0.797885 * x)
        #　sech / 双曲正割： sech(x) = 1 / cosh(x) 
        bx = 0.5 * np.tanh(ax)
        cx = (0.0535161 * np.power(x, 3) + 0.398942 * x) * np.power(1/np.cosh(ax), 2)
              
        return bx + cx + 0.5
        
    def crossentropyerror(self, a, y):        
        # The cross entropy loss function
        # we use it to evaluate the performance of our model        
        return - sum(y * log10(a) + (1 - y) * log10(1 - a))
    
    def mael1(self, a, y):        
        return np.sum(np.absolute(a - y))
    
    def train(self, x, y, learning_rate=1, iterations=10000):        
        # x: training set of data
        # y: the actual output of the training data        
        for i in range(iterations):            
            z_ij = dot(x, self.w_ij) # the dot product of the weights of the hidden layer and the inputs
            a_ij = self.Gelu(z_ij) # applying the Sigmoid activation function
            
            z_jk = dot(a_ij, self.w_jk) # the same previous process will be applied to find the predicted output
            a_jk = self.Gelu(z_jk)  
            
            dl_jk = -y/a_jk + (1 - y)/(1 - a_jk) # the derivative of the cross entropy loss wrt output
            da_jk = self.GeluDerivative(a_jk) # the derivative of Gelu wrt the input (before activ.) of the output layer
            dz_jk = a_ij # the derivative of the inputs of the hidden layer (before activation) wrt weights of the output layer
            
            dl_ij = dot(da_jk * dl_jk, self.w_jk.T) # the derivative of cross entropy loss wrt hidden layer input (after activ.)
            da_ij = self.GeluDerivative(a_ij) # the derivative of Sigmoid wrt the inputs of the hidden layer (before activ.)
            dz_ij = x # the derivative of the inputs of the hidden layer (before activation) wrt weights of the hidden layer
            
            # calculating the gradient using the chain rule
            gradient_ij = dot(dz_ij.T , dl_ij * da_ij)
            gradient_jk = dot(dz_jk.T , dl_jk * da_jk)
            
            # calculating the new optimal weights
            self.w_ij = self.w_ij - learning_rate * gradient_ij 
            self.w_jk = self.w_jk - learning_rate * gradient_jk
            
            # printing the loss of our neural network after each 1000 iteration
            if i % 1000 == 0 in range(iterations):
                print("loss: ", self.crossentropyerror(a_jk, y))
                #print("loss: ", self.mael1(a_jk, y))
                  
    def predict(self, inputs):
        
        # predicting the class of the input data after weights optimization
        # the output of the hidden layer
        output_from_layer1 = self.Gelu(dot(inputs, self.w_ij)) 
        # the output of the output layer
        output_from_layer2 = self.Gelu(dot(output_from_layer1, self.w_jk))   
        
        return output_from_layer2
    
    # the function will print the initial starting weights before training
    def SynapticWeights(self):
        
        print("Layer 1 : w_ij ")        
        print(self.w_ij)
        
        print("Layer 2 : w_jk ")        
        print(self.w_jk)   

In [2]:
def main():
    
    ANN = ANN_Gelu()    
    #ANN.SynapticWeights()
    
    # the training inputs 
    # LOAD DATA
    train_data = np.load('add_2.npz')
    x = train_data['x']
    y = train_data['y']
    
    #print(x)
    #print(y)

    ANN.train(x, y, 1, 10000)
    
    # Printing the new synaptic weights after training
    #print("New synaptic weights after training: ")
    #print("Layer 1  w_ij:\n", ANN.w_ij)
    #print("Layer 2  w_jk:\n", ANN.w_jk)
    
    # Our prediction after feeding the ANN with new set of data
    print("Considering new situation [1, 0, 0] -> ?: ")
    print(ANN.predict(array([[1, 0]])))
    
if __name__=="__main__":
    main()



loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
loss:  [nan]
Considering new situation [1, 0, 0] -> ?: 
[[nan]]
