<a href="https://colab.research.google.com/github/vladgap/MLNN/blob/main/MLNN_2_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Single Neuron

In [2]:
class Neuron:
    """A single neuron with an activation function.
       Attributes:
          bias:     The bias term. By defaul it's 1.0.
          weights:  List of weights incl. bias
          activ:    The activation function: linear (default), relu, mrelu (modified relu), sigmoid.
          param:    parameter of mrelu activ function -- slope of negatives. 0.01 by default."""

    def __init__(self, inputs, bias = 1.0, activ = 'linear'):
        """Return a new Perceptron object with the specified number of inputs (+1 for the bias) and random initial weights.
        inputs:   The number of inputs in the perceptron, not counting the bias."""
        self.weights = (np.random.rand(inputs+1) * 2) - 1
        self.bias = bias
        self.activ = activ

    def run(self, x):
        """Run the perceptron according the activ function. x is a list with a single row of the input data."""
        sum = np.dot(np.append(x,self.bias),self.weights)
        if self.activ == 'linear':
          return sum
        if self.activ == 'sigmoid':
          return self.sigmoid(sum)
        if self.activ == 'relu':
          return self.relu(sum)
        if self.activ == 'mrelu':
          return self.mrelu(sum)

    def set_weights(self, w_init):
        """Overrides the np.random.rand() weights and the bias weight.
           w_init is a list of numbers. Turns to a numpy array of doubles."""
        self.weights=np.array(w_init, dtype='double')

    def set_activ(self, activ, param=0):
        """Overrides the 'linear' activation function."""
        self.activ = activ
        self.param = param

    def sigmoid(self, x):
        """Returns the sigmoid of the input"""
        # return the output of the sigmoid function applied to x
        return 1/(1+np.exp(-x))

    def relu(self, x):
        """Returns the RELU of the input"""
        # return the output of the relu function applied to x
        if x >= 0:
          return x
        return 0

    def mrelu(self, x):
        """Returns the modified RELU of the input. The parameter is a slope for negatives"""
        # return the output of the modified relu function applied to x
        if x >= 0:
          return x
        return self.param*x

## AND gate

In [3]:
neuron = Neuron(inputs=2, activ='sigmoid')
neuron.set_weights([10,10,-15]) #AND gate

print("AND Gate:")
print ("0 0 = {0:.10f}".format(neuron.run([0,0])))
print ("0 1 = {0:.10f}".format(neuron.run([0,1])))
print ("1 0 = {0:.10f}".format(neuron.run([1,0])))
print ("1 1 = {0:.10f}".format(neuron.run([1,1])))


AND Gate:
0 0 = 0.0000003059
0 1 = 0.0066928509
1 0 = 0.0066928509
1 1 = 0.9933071491


## OR gate

In [4]:
neuron = Neuron(inputs=2, activ='sigmoid')
neuron.set_weights([10,10,-5]) #OR gate

print("OR Gate:")
print ("0 0 = {0:.10f}".format(neuron.run([0,0])))
print ("0 1 = {0:.10f}".format(neuron.run([0,1])))
print ("1 0 = {0:.10f}".format(neuron.run([1,0])))
print ("1 1 = {0:.10f}".format(neuron.run([1,1])))


OR Gate:
0 0 = 0.0066928509
0 1 = 0.9933071491
1 0 = 0.9933071491
1 1 = 0.9999996941


## NAND gate

In [5]:
neuron = Neuron(inputs=2, activ='sigmoid')
neuron.set_weights([-10,-10,15]) #NAND gate

print("NAND Gate:")
print ("0 0 = {0:.10f}".format(neuron.run([0,0])))
print ("0 1 = {0:.10f}".format(neuron.run([0,1])))
print ("1 0 = {0:.10f}".format(neuron.run([1,0])))
print ("1 1 = {0:.10f}".format(neuron.run([1,1])))

NAND Gate:
0 0 = 0.9999996941
0 1 = 0.9933071491
1 0 = 0.9933071491
1 1 = 0.0066928509


# Multilayer neuron

In [6]:
class MultiLayerNeuron:
    """A multilayer neuron class that uses the Neuron class.
       Builds a list of neurons with the specific activation function.
       The activation function may be modified later using the set_activ method.
       For example: self.network[layer][neuron].set_activ('linear'). Layer 0 is an input.
       Attributes:
          layers:  A list with the number of neurons per layer. Including the input (0) and the output (last) layers.
          bias:    The bias term. The same bias is used for all neurons.
          network: self.network[layer][neuron] -- list of lists of Neurons. Layer 0 is an inputs.
          param:   Parameter of mrelu activ function -- slope of negatives. 0.01 by default.
          eta:     Learning rate."""

    def __init__(self, layers, bias = 1.0, activ='linear'):
        """Return a new MLP object with the specified parameters.
           layers -- a list of layers. [0] is a number of the model's features.
           Activation function is linear by default."""
        self.layers = np.array(layers,dtype=object)
        self.bias = bias
        self.network = [] # The list of lists of neurons (perceptrons).
        self.values = []  # The list of lists of neurons' (perceptrons') output values.
        self.d = []       # The list of lists of error terms (lowercase deltas)
        self.activ = activ

        # 2 nested loops to create neurons layer by layer
        for i in range(len(self.layers)): # outer loop iterates on each layer
            self.values.append([]) #The new list of values will be filled with zeros, for every neuron in the layer.
            self.values[i] = [0.0 for j in range(self.layers[i])]
            self.d.append([])
            self.d[i] = [0.0 for j in range(self.layers[i])]
            self.network.append([])
            if i > 0:      #network[0] is the input layer, so it has no neurons
                for j in range(self.layers[i]): # inner loop iterates on each neuron in a layer
                    neur=Neuron(inputs = self.layers[i-1], bias = self.bias, activ = self.activ) #
                    self.network[i].append(neur) # adding j perceptrons
        self.network = np.array([np.array(x) for x in self.network],dtype=object) #transforms list of lists to numpy array
        self.values = np.array([np.array(x) for x in self.values],dtype=object)
        self.d = np.array([np.array(x) for x in self.d],dtype=object)

    def set_weights(self, w_init): # set_weights of the MultiLayer class
        """Set the weights.
           Overrides the np.random.rand() weights and the bias weight.
           w_init -- a list of lists with the weights for all but the input layer. Incl. the bias. """
        for i in range(len(w_init)):
            for j in range(len(w_init[i])):
                self.network[i+1][j].set_weights(w_init[i][j]) # set_weights for each perceptron i

    def set_activ(self, activ, param=0):
        """Set the activation function to every neurons.
           activ -- a string of 'linear' (default), 'relu', 'mrelu' (modified relu), 'sigmoid'.
           param -- parameter for mrelu."""
        for i in range(1,len(self.network)):
            for j in range(self.layers[i]):
                self.network[i][j].set_activ(activ, param) # set_activ for each neuron
        self.param=param

    def set_output_activ(self, activ, param=0):
        """Set the activation function to the last (output) neurons.
           activ -- a string of 'linear' (default), 'relu', 'mrelu' (modified relu), 'sigmoid'.
           param -- parameter for mrelu."""
        i = len(self.network)-1
        for j in range(self.layers[i]):
            self.network[i][j].set_activ(activ, param)

    def printWeights(self):
        """Displays a summary of weights and activation functions per layer and neuron."""
        print()
        print('Layer 0 is the Input Layer')
        for i in range(1,len(self.network)):
            for j in range(self.layers[i]):
                print("Layer",i,"Neuron",j,":",self.network[i][j].weights,self.network[i][j].activ)
        print()

    def run(self, x):
        """Feed a single row of x into the MultiLayer Neuron.
           Returns the output of the last neuron."""
        x = np.array(x,dtype=object)
        self.values[0] = x
        for i in range(1,len(self.network)):
            for j in range(self.layers[i]):
                self.values[i][j] = self.network[i][j].run(self.values[i-1]) #runs preceptrons with the previous outputs
        return self.values[-1]

    def bp_classif(self, x, y, eta=0.2):
        """Run a single (x,y) pair with the backpropagation algorithm - Gradient Descent.
           Uses the derivative of the SIGMOID function."""
        x = np.array(x,dtype=object)
        y = np.array(y,dtype=object)
        self.eta=eta
        # STEP 1: Feed a sample to the network
        outputs = self.run(x)
        # STEP 2: Calculate the MSE
        error = 2*(y - outputs) # A list of outputs
        MSE = sum( error ** 2) / self.layers[-1]
        # ∂MSE/∂weight=∂MSE/∂output*∂output/∂weight
        # STEP 3: Calculate the OUTPUT error terms
        # ∂MSE/∂output -- depends on neuron's activation function
        self.d[-1] = outputs * (1 - outputs) * (error) # derivative of the SIGMOID function
        # STEP 4: Calculate the error term of EACH UNIT on each layer
        for i in reversed(range(1,len(self.network)-1)):
            for h in range(len(self.network[i])):
                fwd_error = 0.0
                for k in range(self.layers[i+1]):
                    fwd_error += self.network[i+1][k].weights[h] * self.d[i+1][k]
                self.d[i][h] = self.values[i][h] * (1-self.values[i][h]) * fwd_error # derivative of the SIGMOID function
        # STEPS 5 & 6: Calculate the deltas and update the weights
        for i in range(1,len(self.network)): # runs on layers
            for j in range(self.layers[i]): # runs on neurons
                for k in range(self.layers[i-1]+1): # runs on inputs. +1 for bias
                    if k==self.layers[i-1]:
                        delta = self.eta * self.d[i][j] * self.bias
                    else:
                        delta = self.eta * self.d[i][j] * self.values[i-1][k] # applying the delta rule
                    self.network[i][j].weights[k] += delta
        return MSE

    def sigmoid(self, x):
        """Return the output of the sigmoid function applied to x"""
        return 1/(1+np.exp(-x))

    def deriv(self, value, i, j=0):
        '''Calculates the derivative of the activ function for the back propagation'''
        if self.network[i][j].activ == 'linear':
          # print ('lin')
          return 1
        if self.network[i][j].activ == 'sigmoid':
          # print ('sig')
          return self.sigmoid(value)*(1-self.sigmoid(value))
        if self.network[i][j].activ == 'relu':
          if value > 0:
            # print ('re>')
            return 1
          else:
            # print ('re<')
            return 0
        if self.network[i][j].activ == 'mrelu':
          if value > 0:
            return 1
          else:
            return self.param

    def bp_regres(self, x, y, eta=0.01):
        """Run a single (x,y) pair with the backpropagation algorithm - Gradient Descent.
           Uses the derivative according each neuron's activation function.
           Modifies the weights of the neurons, calculates and returns updated MSE.
           eta -- learning rate."""
        x = np.array(x,dtype=object)
        y = np.array(y,dtype=object)
        self.eta=eta
        # STEP 1: Feed a sample to the network
        outputs = self.run(x)
        # STEP 2: Calculate the MSE
        error = 2*(y - outputs) # A list of outputs
        MSE = sum( error ** 2) / self.layers[-1]
        # ∂MSE/∂weight=∂MSE/∂output*∂output/∂weight
        # STEP 3: Calculate the OUTPUT error terms
        # ∂MSE/∂output -- depends on neuron's activation function
        for j in range (len(outputs)):
            self.d[-1][j] = self.deriv(outputs[j], len(self.network)-1) * error
        # STEP 4: Calculate the error term of EACH UNIT on each layer
        for i in reversed(range(1,len(self.network)-1)):
            for h in range(len(self.network[i])):
                fwd_error = 0.0
                for k in range(self.layers[i+1]):
                    fwd_error += self.network[i+1][k].weights[h] * self.d[i+1][k]
                self.d[i][h] = self.deriv(self.values[i][h], i, h) * fwd_error
        # STEPS 5 & 6: Calculate the deltas and update the weights
        for i in range(1,len(self.network)): # runs on layers
            for j in range(self.layers[i]): # runs on neurons
                for k in range(self.layers[i-1]+1): # runs on inputs. +1 for bias
                    # output=sum(weight*value)+bias*bias_weight
                    if k==self.layers[i-1]:
                        # ∂output/∂bias_weight=bias
                        delta = self.eta * self.d[i][j] * self.bias
                    else:
                        # ∂output/∂weight=value
                        delta = self.eta * self.d[i][j] * self.values[i-1][k]
                    self.network[i][j].weights[k] += delta # applying the delta rule
        return MSE

## XOR gate=(OR+NAND)+AND

In [7]:
#test code
mln1 = MultiLayerNeuron(layers=[2,2,1])  #mln1
mln1.set_weights([[[-10,-10,15],[15,15,-10]],[[10,10,-15]]])
mln1.set_activ('sigmoid') #linear is by default

mln1.printWeights()
print("XOR Gate:")
print ("0 0 = {0:.10f}".format(mln1.run([0,0])[0]))
print ("0 1 = {0:.10f}".format(mln1.run([0,1])[0]))
print ("1 0 = {0:.10f}".format(mln1.run([1,0])[0]))
print ("1 1 = {0:.10f}".format(mln1.run([1,1])[0]))


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-10. -10.  15.] sigmoid
Layer 1 Neuron 1 : [ 15.  15. -10.] sigmoid
Layer 2 Neuron 0 : [ 10.  10. -15.] sigmoid

XOR Gate:
0 0 = 0.0066958493
0 1 = 0.9923558642
1 0 = 0.9923558642
1 1 = 0.0071528098


## Training

In [8]:
mln2 = MultiLayerNeuron(layers=[2,2,1])
mln2.set_activ('sigmoid') #linear is by default
print("\nTraining Neural Network as an XOR Gate...\n")
for i in range(2000):
    MSE = 0.0
    MSE += mln2.bp_classif([0,0],[0])
    MSE += mln2.bp_classif([0,1],[1])
    MSE += mln2.bp_classif([1,0],[1])
    MSE += mln2.bp_classif([1,1],[0])
    MSE = MSE / 4
    if(i%200 == 0):
        print (MSE)

mln2.printWeights()

print("XOR Gate:")
print ("0 0 = {0:.10f}".format(mln2.run([0,0])[0]))
print ("0 1 = {0:.10f}".format(mln2.run([0,1])[0]))
print ("1 0 = {0:.10f}".format(mln2.run([1,0])[0]))
print ("1 1 = {0:.10f}".format(mln2.run([1,1])[0]))


Training Neural Network as an XOR Gate...

1.3865346614894274
0.981728759957696
0.8416211819188844
0.7510105408677401
0.6956784569061343
0.6043135194484166
0.2425380200975286
0.08437612938122538
0.04499006045279842
0.02959963735955659

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 3.53159829  3.55055132 -5.47094891] sigmoid
Layer 1 Neuron 1 : [-5.71711817 -5.91280545  1.99010299] sigmoid
Layer 2 Neuron 0 : [-7.20929649 -7.13466441  3.55701391] sigmoid

XOR Gate:
0 0 = 0.0600900373
0 1 = 0.9239385076
1 0 = 0.9229420625
1 1 = 0.0792222481


# Regres

In [9]:
class Regres:
    """Creates a multilayer neuron network.
       Used for regression. Fits the model by running the MultiLayer Neuron Network in a
       loop for each row of X and calculating the error. Each running modifies the weights of the objects.
       Attributes:
          layers:               A list with the number of neurons per layer. Including the input (first) and the output (last) layers.
          regres_network:       MultiLayerNeuron Class. self.regres_network.network[layer][neuron] -- Neuron Class (layer 0 is an input).
          epochs:               Number of iterations
          eta:                  Learning rate
          weight_history:       List of lists of weights propagation
          weight_history_table: Pandas table of weights propagation
          MSE_history:          List of MSEs propagation"""

    def __init__(self, layers, bias=1.0):
        """Return a new MLP object with the specified parameters.
           layers -- a list of layers. [0] is a number of the model's features.
           Activation function is linear by default."""
        self.layers = layers
        self.bias = bias
        self.regres_network = MultiLayerNeuron(layers=layers, bias=bias)

    def set_weights(self, w_init):
        """Set the weights.
           Overrides the np.random.rand() weights and the bias weight.
           w_init -- a list of lists with the weights for all but the input layer. Incl. the bias. """
        self.regres_network.set_weights(w_init)

    def set_hidden_activ(self, activ, param=0):
        """Sets the activ function of the hidden layers.
           activ -- a string of 'linear' (default), 'relu', 'mrelu' (modified relu), 'sigmoid'.
           param -- parameter for mrelu."""
        self.regres_network.set_activ(activ, param=0)
        self.regres_network.set_output_activ('linear')

    def fit(self, X, y, epochs, eta=0.01):
        """Runs the MLNs epochs times. Each time the weights are being modified and the error is being calculated.
           MSEs and weights are stored.
           X,y -- an array and a list of data."""
        self.epochs=epochs
        self.eta=eta
        self.weight_history=[]
        self.weight_history_table=[]
        self.MSE_history=[]
        for i in range(self.epochs):
            weight_epoch=[]
            weight_epoch_table=[]
            MSE = 0.0
            for j in range (len(X)):
                MSE +=  self.regres_network.bp_regres(X[j],[y[j]], self.eta)
            MSE = MSE / len(X)
            self.MSE_history.append(MSE)
            for m in range(1,len(self.layers)):
                weight_layer=[]
                for n in range(self.layers[m]):
                    neuron_w=self.regres_network.network[m][n].weights
                    neuron_w_list=[x for x in neuron_w]
                    weight_layer.append(neuron_w_list)
                    weight_epoch_table+=neuron_w_list
                weight_epoch.append(weight_layer)
            self.weight_history.append(weight_epoch)
            self.weight_history_table.append(weight_epoch_table)
        self.weight_history_table=pd.DataFrame(data=self.weight_history_table, columns=self.get_cols())
        print ("""Model fitted.
self.weight_history - list of lists of weights propagation
self.weight_history_table - Pandas table of weights propagation
self.MSE_history - list of MSEs propagation""")

    def get_cols(self):
        """Gets a list of names for weights. Used for pandas table of weights propagation as column names."""
        cols=[]
        for i in range(1,len(self.layers)):
            for h in range(self.layers[i]):
                for k in range(self.layers[i-1]+1):
                    col="{}_{}_{}".format(i,h,k)
                    cols.append(col)
        return cols

    def printWeights(self):
        """Displays a summary of weights and activation functions per layer and neuron."""
        self.regres_network.printWeights()

    def run(self, x):
        """Calculates the output of a single row of X with the weights set"""
        return self.regres_network.run(x)

    def predict(self, x):
        """Returns the list of output. Runs every row of the data."""
        y=[]
        for i in range(len(x)):
            y.append(self.run(x[i])[0])
        return y

In [10]:
weight_history=[]
a=[[1,2],[1,3],[4,1]]
b=[]
for m in range(3):
  for n in range(2):
    b.append(a[m][n])
print ("b",b)
weight_history.append(b)
print (weight_history)

b [1, 2, 1, 3, 4, 1]
[[1, 2, 1, 3, 4, 1]]


# Example 1 -- weights

In [11]:
print ("1 1 =",mln1.run([1,1]))
# print (mln1.network[1][0].weights) # network is list of lists of perceptrons. Each has attribute "weights"
w1=mln1.network[1][0].weights
w2=[x for x in w1]
w2

1 1 = [0.00715281]


[-10.0, -10.0, 15.0]

In [12]:
mln1=MultiLayerNeuron(layers=[2,2,1])
mln1.set_activ('relu')
mln1.printWeights()

mln1.set_output_activ('sigmoid') # setting the output activ func
mln1.printWeights()

mln1.network[1][0].set_activ('linear') # changing specific activ func
mln1.printWeights()


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0.63700643 0.99416301 0.29961022] relu
Layer 1 Neuron 1 : [0.87385944 0.16299017 0.16205695] relu
Layer 2 Neuron 0 : [0.16231712 0.16206876 0.16786803] relu


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0.63700643 0.99416301 0.29961022] relu
Layer 1 Neuron 1 : [0.87385944 0.16299017 0.16205695] relu
Layer 2 Neuron 0 : [0.16231712 0.16206876 0.16786803] sigmoid


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0.63700643 0.99416301 0.29961022] linear
Layer 1 Neuron 1 : [0.87385944 0.16299017 0.16205695] relu
Layer 2 Neuron 0 : [0.16231712 0.16206876 0.16786803] sigmoid



# Example 3 -- linear activs, bp_regres. מתכנס למשקלים טוב מאוד

In [13]:
mln3 = MultiLayerNeuron(layers=[3,1])
# mln3.set_weights([[[5.,-0.9,2.,0.]]])
mln3.set_weights([[[0,0,0,0]]])
mln3.printWeights()
print("\nTraining Neural Network...\n")
for i in range(400):
    MSE = 0.0
    MSE += mln3.bp_regres([0.7759,	0.1104,	0.9977,],5.764286995)
    MSE += mln3.bp_regres([0.9692,	0.6961,	0.8483,],5.84646758)
    MSE += mln3.bp_regres([0.0265,	0.399,	0.5375,],0.808633075)
    MSE += mln3.bp_regres([0.7694,	0.5051,	0.2542,],3.850298589)
    MSE = MSE / 4
    if(i%200 == 0):
        print (MSE)

mln3.printWeights()




Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0. 0. 0. 0.] linear


Training Neural Network...

75.38887198247966
0.43096181977352294

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 4.74597852 -0.63634893  2.10160426 -0.05486685] linear



# Example 4 -- sigmoid, bp_regres. מקרב רגרסיה עם סיגמויד. עובד טוב

In [14]:
# generating output values
mln4 = MultiLayerNeuron(layers=[3,2,1])
mln4.set_activ('sigmoid')
mln4.set_output_activ('linear')
mln4.set_weights([[[0,1,-2,1],[1,-2,0,1]],[[-1,2,3]]])
mln4.printWeights()

y4=[]
y4.append(mln4.run([2,1,0])[0])
y4.append(mln4.run([-1,0,1])[0])
y4.append(mln4.run([0,0,0])[0])
y4.append(mln4.run([1,1,0])[0])
y4.append(mln4.run([-1,-1,-1])[0])
y4.append(mln4.run([-1,1,-1])[0])
display ('original values:',y4)
print ()

# training model and predicting values
mln4 = MultiLayerNeuron(layers=[3,2,1])
mln4.set_activ('sigmoid')
mln4.set_output_activ('linear')

for i in range(1000):
    MSE = 0.0
    MSE += mln4.bp_regres([2,1,0],y4[0])
    MSE += mln4.bp_regres([-1,0,1],y4[1])
    MSE += mln4.bp_regres([0,0,0],y4[2])
    MSE += mln4.bp_regres([1,1,0],y4[3])
    MSE += mln4.bp_regres([-1,-1,-1],y4[4])
    MSE += mln4.bp_regres([-1,1,-1],y4[5])
    MSE = MSE / 6
    if(i%200 == 0):
        print ('MSE=',MSE)
mln4.printWeights()

y4=[]
y4.append(mln4.run([2,1,0])[0])
y4.append(mln4.run([-1,0,1])[0])
y4.append(mln4.run([0,0,0])[0])
y4.append(mln4.run([1,1,0])[0])
y4.append(mln4.run([-1,-1,-1])[0])
y4.append(mln4.run([-1,1,-1])[0])
display ('predicted values:',y4)
print ()


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 0.  1. -2.  1.] sigmoid
Layer 1 Neuron 1 : [ 1. -2.  0.  1.] sigmoid
Layer 2 Neuron 0 : [-1.  2.  3.] linear



'original values:'

[3.5813200792821274,
 3.731058578630005,
 3.731058578630005,
 3.119202922022118,
 3.880797077977882,
 2.2563920540063265]


MSE= 101.19594574073945
MSE= 0.06608902000082345
MSE= 0.024345593761805406
MSE= 0.030028804098242
MSE= 0.03577215798546097

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 1.37090635 -2.90990385  1.52153656  0.58876187] sigmoid
Layer 1 Neuron 1 : [ 0.12587554 -0.03011433 -0.60007478  0.58625456] sigmoid
Layer 2 Neuron 0 : [2.31028717 0.5287845  1.93761073] linear



'predicted values:'

[3.6979401707467336,
 3.747313403656686,
 3.763059381864765,
 2.9330178324310756,
 3.8283375492873826,
 2.339820056822494]




# Example 5 -- relu-sigmoid, bp_regres. מתכנס למשקלים לא משהו

In [15]:
mln4 = MultiLayerNeuron(layers=[3,2,1,1]) # generating MLN with random weights
mln4.set_activ('relu')
mln4.network[2][0].set_activ('sigmoid') # changing specific activ func
mln4.set_output_activ('linear')
mln4.set_weights([[[0,1,-2,1],[1,-2,0,1]],[[1,0,1]],[[-1,2]]]) # setting weights
mln4.printWeights()

y4=[] # generating output values
y4.append(mln4.run([2,1,0])[0])
y4.append(mln4.run([-1,0,1])[0])
y4.append(mln4.run([0,0,0])[0])
y4.append(mln4.run([1,1,0])[0])
y4.append(mln4.run([-1,-1,-1])[0])
y4.append(mln4.run([-1,1,-1])[0])
display ('original values:',y4)
print ()

# training model and predicting values
mln4 = MultiLayerNeuron(layers=[3,2,1,1]) # generating MLN with random weights
mln4.set_activ('relu')
mln4.network[2][0].set_activ('sigmoid') # changing specific activ func
mln4.set_output_activ('linear')

for i in range(400):
    MSE = 0.0
    MSE += mln4.bp_regres([2,1,0],y4[0])
    MSE += mln4.bp_regres([-1,0,1],y4[1])
    MSE += mln4.bp_regres([0,0,0],y4[2])
    MSE += mln4.bp_regres([1,1,0],y4[3])
    MSE += mln4.bp_regres([-1,-1,-1],y4[4])
    MSE += mln4.bp_regres([-1,1,-1],y4[5])
    MSE = MSE / 6
    if(i%20 == 0):
        print ('MSE', i, '=',MSE)
mln4.printWeights()

y4=[]
y4.append(mln4.run([2,1,0])[0])
y4.append(mln4.run([-1,0,1])[0])
y4.append(mln4.run([0,0,0])[0])
y4.append(mln4.run([1,1,0])[0])
y4.append(mln4.run([-1,-1,-1])[0])
y4.append(mln4.run([-1,1,-1])[0])
display ('predicted values:',y4)
print ()


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 0.  1. -2.  1.] relu
Layer 1 Neuron 1 : [ 1. -2.  0.  1.] relu
Layer 2 Neuron 0 : [1. 0. 1.] sigmoid
Layer 3 Neuron 0 : [-1.  2.] linear



'original values:'

[1.0474258731775667,
 1.2689414213699952,
 1.1192029220221178,
 1.0474258731775667,
 1.0474258731775667,
 1.0066928509242847]


MSE 0 = 0.6764640166091156
MSE 20 = 0.033936577782530265
MSE 40 = 0.03297245249818461
MSE 60 = 0.03281070473333864
MSE 80 = 0.03266266646051252
MSE 100 = 0.03252132755456654
MSE 120 = 0.03238592934112636
MSE 140 = 0.03225592730817455
MSE 160 = 0.03213081605422936
MSE 180 = 0.032010120350574094
MSE 200 = 0.03189339173639965
MSE 220 = 0.031780205587341695
MSE 240 = 0.03167015846045972
MSE 260 = 0.03156286567641128
MSE 280 = 0.031457959108908994
MSE 300 = 0.031355085155580026
MSE 320 = 0.03125390286767832
MSE 340 = 0.03115408221896301
MSE 360 = 0.03105530249652462
MSE 380 = 0.03095725079849281

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-0.63835869  0.59372006  0.2499894   0.48905591] relu
Layer 1 Neuron 1 : [-0.1238511   0.1393931   0.33091861  0.229071  ] relu
Layer 2 Neuron 0 : [ 0.46681038 -0.61170518  0.41603618] sigmoid
Layer 3 Neuron 0 : [-0.17284869  1.19828658] linear



'predicted values:'

[1.097219107474325,
 1.0850854174816522,
 1.0905244337065187,
 1.0917603054944132,
 1.0887383357482223,
 1.0717863892019408]




# Example 7 -- Regres class. עובד באופן עקרוני
# relu בעייתי

In [16]:
reg1=Regres(layers=[3,2,1,1])
reg1.set_weights([[[1,1,0,0],[0,0,0,0]],[[0,0,0]],[[0,0]]])
X=[
[2,1,0],
[-1,0,1],
[0,0,0],
[1,1,0],
[-1,-1,-1],
[-1,1,-1],
]
y=[1.0474258731775667,
 1.2689414213699952,
 1.1192029220221178,
 1.0474258731775667,
 1.0474258731775667,
 1.0066928509242847]
reg1.set_hidden_activ('relu')
reg1.regres_network.network[2][0].set_activ('sigmoid') # changing specific activ func
reg1.fit(X,y, epochs=5, eta=0.1)
reg1.printWeights()
pred=reg1.predict(X)
print (pred)
print()
# display ('weight_history',reg1.weight_history)
display ('MSE',reg1.MSE_history)

Model fitted.
self.weight_history - list of lists of weights propagation
self.weight_history_table - Pandas table of weights propagation
self.MSE_history - list of MSEs propagation

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [9.99993917e-01 9.99950434e-01 1.19553645e-05 2.60048657e-06] relu
Layer 1 Neuron 1 : [0. 0. 0. 0.] relu
Layer 2 Neuron 0 : [0.00850294 0.         0.03500299] sigmoid
Layer 3 Neuron 0 : [0.42542099 0.84771537] linear

[1.0668595952299427, 1.0641482356911771, 1.06414823804216, 1.0659559852988165, 1.0641482356911771, 1.0641482356911771]



'MSE'

[1.8232957676324437,
 0.10904793902185193,
 0.04350360405968031,
 0.03926659710245326,
 0.03860509075063843]

# Example 6 -- https://mmuratarat.github.io/2020-01-09/backpropagation

In [17]:
# generating output values
mln6 = MultiLayerNeuron(layers=[5,3,1])
mln6.set_activ('sigmoid')
mln6.set_output_activ('linear')
mln6.set_weights([[[.19,.33,.4,.51,.54,.1],
                   [.55,.16,.35,.85,.49,.1],
                   [.76,.97,.7,.85,.57,.1]],
                  [[.1,.03,-.17,.1]]
                  ]) # setting initial weights
mln6.printWeights()

# training
for i in range(100):
    MSE = 0.0
    MSE += mln6.bp_regres([.5,.1,1,0,0],[0.1])
    MSE += mln6.bp_regres([.3,.2,0,1,0],[.6])
    MSE += mln6.bp_regres([.7,.9,0,0,1],[.4])
    MSE += mln6.bp_regres([.8,.1,1,0,0],[.1])
    MSE = MSE / 4
    if(i%10 == 0):
        print ('MSE', i, '=',MSE)

mln6.printWeights()


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0.19 0.33 0.4  0.51 0.54 0.1 ] sigmoid
Layer 1 Neuron 1 : [0.55 0.16 0.35 0.85 0.49 0.1 ] sigmoid
Layer 1 Neuron 2 : [0.76 0.97 0.7  0.85 0.57 0.1 ] sigmoid
Layer 2 Neuron 0 : [ 0.1   0.03 -0.17  0.1 ] linear

MSE 0 = 0.400087605270782
MSE 10 = 0.18417190060692476
MSE 20 = 0.18077017778530885
MSE 30 = 0.18024931127980853
MSE 40 = 0.17978341258857397
MSE 50 = 0.1793093069058859
MSE 60 = 0.17882249169509304
MSE 70 = 0.17832192452872242
MSE 80 = 0.1778069002177498
MSE 90 = 0.17727674108839087

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [0.18271499 0.33792424 0.37105698 0.53292503 0.54692616 0.10090817] sigmoid
Layer 1 Neuron 1 : [0.54479069 0.16504893 0.33058568 0.86459823 0.49452302 0.09970693] sigmoid
Layer 1 Neuron 2 : [0.76307447 0.96504968 0.71560515 0.83584425 0.56591146 0.09736086] sigmoid
Layer 2 Neuron 0 : [ 0.17686676  0.13173312 -0.10165984  0.16890483] linear



# Example 8 -- Regres_linear. עובד טוב

In [18]:
reg2=Regres(layers=[1,2,1])
X=[[0],[1],[2],[3],[4],[5]]
y=[0,1,2,3,4,5]
reg2.printWeights()

reg2.set_hidden_activ('linear')
reg2.fit(X,y,epochs=5, eta=0.001)
reg2.printWeights()
pred=reg2.predict(X)
print ('predictions',pred)
display ('weight_history_table',reg2.weight_history_table)
display ('MSE',reg2.MSE_history)


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-0.8864883  -0.75942606] linear
Layer 1 Neuron 1 : [-0.62025963 -0.86124906] linear
Layer 2 Neuron 0 : [ 0.73330911  0.19804708 -0.91880648] linear

Model fitted.
self.weight_history - list of lists of weights propagation
self.weight_history_table - Pandas table of weights propagation
self.MSE_history - list of MSEs propagation

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-0.59875103 -0.66857635] linear
Layer 1 Neuron 1 : [-0.70607799 -0.88340691] linear
Layer 2 Neuron 0 : [ 0.04069452 -0.46332177 -0.69195868] linear

predictions [-0.30986443085689963, -0.00708901611358248, 0.2956863986297348, 0.5984618133730518, 0.9012372281163689, 1.2040126428596865]


'weight_history_table'

Unnamed: 0,1_0_0,1_0_1,1_1_0,1_1_1,2_0_0,2_0_1,2_0_2
0,-0.745639,-0.714683,-0.593759,-0.851819,0.500168,0.006576,-0.851414
1,-0.669945,-0.690893,-0.602875,-0.853816,0.338748,-0.141477,-0.798819
2,-0.628524,-0.677942,-0.629995,-0.861457,0.21619,-0.265237,-0.756002
3,-0.607424,-0.671338,-0.666289,-0.871915,0.119023,-0.371651,-0.720774
4,-0.598751,-0.668576,-0.706078,-0.883407,0.040695,-0.463322,-0.691959


'MSE'

[151.74448071537702,
 93.71898825757285,
 62.94780766049748,
 43.21310175413254,
 29.38236534590513]

# Example 9 -- mrelu check. working

MultiLayerNeuron

In [19]:
mln4 = MultiLayerNeuron(layers=[1,2,1]) # generating MLN with random weights
mln4.set_activ('mrelu', param=0.01)
# mln4.set_activ('relu')
mln4.set_output_activ('linear')
mln4.set_weights([[[-1.1,1.74],[3.35,-6.44]],[[-1.41,2.05,2.2]]]) # setting weights
mln4.printWeights()

y4=[] # generating output values
y4.append(mln4.run([0])[0])
y4.append(mln4.run([1])[0])
y4.append(mln4.run([2])[0])
y4.append(mln4.run([3])[0])
y4.append(mln4.run([4])[0])
y4.append(mln4.run([5])[0])
display ('original values:',y4)
print ()

# training model and predicting values
mln4 = MultiLayerNeuron(layers=[1,2,1]) # generating MLN with random weights
mln4.set_activ('mrelu', param=0.01)
# mln4.set_activ('relu')
mln4.set_output_activ('linear')

eta=0.01
for i in range(4000):
    MSE = 0.0
    MSE += mln4.bp_regres([0],y4[0], eta)
    MSE += mln4.bp_regres([1],y4[1], eta)
    MSE += mln4.bp_regres([2],y4[2], eta)
    MSE += mln4.bp_regres([3],y4[3], eta)
    MSE += mln4.bp_regres([4],y4[4], eta)
    MSE += mln4.bp_regres([5],y4[5], eta)
    MSE = MSE / 6
    if(i%200 == 0):
        print ('MSE', i, '=',MSE)
mln4.printWeights()

y4=[]
y4.append(mln4.run([0])[0])
y4.append(mln4.run([1])[0])
y4.append(mln4.run([2])[0])
y4.append(mln4.run([3])[0])
y4.append(mln4.run([4])[0])
y4.append(mln4.run([5])[0])
display ('predicted values:',y4)
print ()


Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-1.1   1.74] mrelu
Layer 1 Neuron 1 : [ 3.35 -6.44] mrelu
Layer 2 Neuron 0 : [-1.41  2.05  2.2 ] linear



'original values:'

[-0.38541999999999943,
 1.2342550000000005,
 2.7394859999999994,
 9.622496,
 16.505506,
 23.388515999999996]


MSE 0 = 645.4459284363264
MSE 200 = 63.788422571534035
MSE 400 = 63.82003947078695
MSE 600 = 63.807330281430154
MSE 800 = 63.78325466552849
MSE 1000 = 63.73775185157839
MSE 1200 = 63.65194160347127
MSE 1400 = 63.490759553188816
MSE 1600 = 63.190196195812085
MSE 1800 = 62.63713824789733
MSE 2000 = 61.64360281078441
MSE 2200 = 59.93171288435729
MSE 2400 = 57.17636935179345
MSE 2600 = 40.611708594536374
MSE 2800 = 82.09708950781682
MSE 3000 = 32.16232432191669
MSE 3200 = 45.1244869858785
MSE 3400 = 29.577118970265122
MSE 3600 = 28.20156585806213
MSE 3800 = 28.674039636115733

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [-17.04151821  -0.25920931] mrelu
Layer 1 Neuron 1 : [-1.20328799  5.80532124] mrelu
Layer 2 Neuron 0 : [-16.97104966  -2.09428867   8.28972662] linear



'predicted values:'

[-3.824301375565808,
 1.5878555429188728,
 7.000012461403554,
 12.412169379888235,
 17.824326298372917,
 22.79876118185951]




# Example 10 - Regres_mrelu. working

In [20]:
reg2=Regres(layers=[1,2,1])
X=[[-3],[-2],[-1],[0],[1],[2],[3],[4],[5]]
y=[9,4,1,0,1,4,9,16,25]
# y=[0,1,2,3,4,5]
reg2.set_hidden_activ('mrelu', param=0.01)
# reg2.set_weights([[[0,0],[0,0]],[[0,0,0]]])
reg2.fit(X,y,epochs=5000, eta=0.0001)
reg2.printWeights()
pred=reg2.predict(X)
print ('predictions',pred)
# display ('weight_history_table',reg2.weight_history_table.iloc[-1])
# display ('MSE',reg2.MSE_history)

fig=px.line(y=reg2.MSE_history[500:])
fig.show()

Model fitted.
self.weight_history - list of lists of weights propagation
self.weight_history_table - Pandas table of weights propagation
self.MSE_history - list of MSEs propagation

Layer 0 is the Input Layer
Layer 1 Neuron 0 : [ 2.11675703 -1.98503632] mrelu
Layer 1 Neuron 1 : [-1.85992396  0.24118815] mrelu
Layer 2 Neuron 0 : [ 2.84739136  1.61398592 -1.39657678] linear

predictions [7.9983707643243385, 4.996479675893627, 1.9945885874629166, -1.0073025009677945, -1.0215163733073391, 5.005719288010193, 11.032954949327726, 17.060190610645257, 23.08742627196279]


In [21]:
history=reg2.weight_history_table.T
# history
fig=px.parallel_coordinates(history)
fig.show()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns a

# VectorBackProp

In [112]:
class VectorBackProp:
    """Creates a two layer neuron network: the hidden layer and the output layer.
       The output layer may be multiple.
       The activation functions of the layers may be set.

       X and T are numpy column vectors or set of column vectors as numpy matrices.
       If the data is pandas dataframe, it should be turned to numpy:
          X=np.array(DataFrame[[x0,x1,...]])
          T=np.array(DataFrame[[t0,...]])

       Attributes:
          layers: A list of layers. [2,3,1] -- 2 inputs, 3 neurons in hidden layer, 1 neuron in output layer
          hidden_activation: Activation function of the hidden layer. 'linear' (by default), 'sigmoid', 'prelu'.
          output_activation: Activation function of the output layer. 'linear' (by default) for regression, 'softmax' for classification.
          Wh, bh, Wo, bo: Matrices of weights.
          JWh, Jbh, JWo, Jbo: Matrices of weight Jacobian.
          Wh_history, bh_history, Wo_history, bo_history: 3D numpy arrays with histories of weights."""

    def __init__(self, layers, hidden_activation = 'linear', output_activation = 'linear'):
        """Return a new MLP object with the specified parameters.
           layers: A list of layers. [2,3,1] -- 2 inputs, 3 neurons in hidden layer, 1 neuron in output layer
           hidden_activation: Activation function of the hidden layer. 'linear' (by default), 'sigmoid', 'prelu'.
           output_activation: Activation function of the output layer. 'linear' (by default) for regression, 'softmax' for classification."""
        self.layers = layers
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation

    def sigmoid(self, x):
        """Sigmoid activation function."""
        return 1. / (1. + np.exp(-x))

    def sigmoid_deriv(self, x):
        """Sigmoid derivative function."""
        return np.multiply(x,(1 - x))

    def PReLU(self, x):
        """Parametric ReLU activation function.
           Parameter = 0.001"""
        x[x<0] *= 0.001
        return x

    def PReLU_deriv(self, x):
        """Parametric ReLU derivative function:
           Matrix with ones for positives and 0.001s for negatives."""
        y=np.ones(x.shape)
        y[x<0]=0.001
        return y

    def softmax(self, x):
        """Softmax activation function"""
        return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

    def hidden_activations(self, X):
        """Compute the hidden activations H.
           self.hidden_activation may be 'linear' (default), 'sigmoid' or 'prelu'."""
        if self.hidden_activation == 'sigmoid':
            return self.sigmoid((X @ self.Wh) + self.bh)
        if self.hidden_activation == 'prelu':
            return self.PReLU((X @ self.Wh) + self.bh)
        if self.hidden_activation == 'linear':
            return (X @ self.Wh) + self.bh
        return (X @ self.Wh) + self.bh # default

    def output_activations(self, H):
        """Compute the output Y.
           self.output_activation may be 'linear' (default) or 'softmax'."""
        if self.output_activation == 'softmax': # softmax for classification
            return self.softmax((H @ self.Wo) + self.bo)
        if self.output_activation == 'linear': #linear for regression
            return (H @ self.Wo) + self.bo
        return (H @ self.Wo) + self.bo # default

    def run(self, X):
        """Runs the input X vectors of input layer, returns the Y vectors of output layer."""
        return self.output_activations(self.hidden_activations(X))

    def nn_predict(self, X):
        """Neural network prediction function that only returns
        1 or 0 depending on the predicted class"""
        return np.around(self.run(X))

    def loss(self, Y, T):
        """Loss function.
           Regression: MSE (output_activation -- 'linear').
           Classification: cross-entropy (output_activation -- 'softmax')."""
        SE=(Y-T)**2 # MSE
        abs_err=np.abs(Y-T) # ABS
        MSE=SE.sum()/Y.shape[0]
        if self.output_activation == 'linear':
            return MSE
        if self.output_activation == 'softmax':
            return - (T * np.log(Y)).sum() # softmax layer with corresponding cross-entropy loss function
        return MSE # default

    def error_output(self, Y, T):
        """Error function at the output"""
        return Y - T # same for softmax and MSE

    def gradient_weight_out(self, H, Eo):
        """Gradients for the weight parameters at the output layer"""
        return  H.T @ Eo

    def gradient_bias_out(self, Eo):
        """Gradients for the bias parameters at the output layer"""
        return  np.sum(Eo, axis=0, keepdims=True)

    def error_hidden(self, H, Eo):
        """Error at the hidden layer.
        H * (1-H) * (E . Wo^T) for sigmoid
        [1 for pos, 0.001 for neg] * (E . Wo^T) for PReLU
        (E . Wo^T) for linear (default)."""
        if self.hidden_activation == 'sigmoid':
            return np.multiply(self.sigmoid_deriv(H), (Eo @ self.Wo.T))
        if self.hidden_activation == 'prelu':
            return np.multiply(self.PReLU_deriv(H), (Eo @ self.Wo.T))
        if self.hidden_activation == 'linear':
            return (Eo @ self.Wo.T)
        return (Eo @ self.Wo.T) # default

    def gradient_weight_hidden(self, X, Eh):
        """Gradient for the weight parameters at the hidden layer"""
        return X.T @ Eh

    def gradient_bias_hidden(self, Eh):
        """Gradient for the bias parameters at the output layer"""
        return  np.sum(Eh, axis=0, keepdims=True)

    def generate_weights(self, init_var=0.1):
        # Initialize hidden layer parameters
        bh = np.random.randn(1, self.layers[1]) * init_var
        Wh = np.random.randn(self.layers[0], self.layers[1]) * init_var
        # Initialize output layer parameters
        bo = np.random.randn(1, self.layers[2]) * init_var
        Wo = np.random.randn(self.layers[1], self.layers[2]) * init_var
        return Wh, bh, Wo, bo

    def get_gradients(self, X, T):
        """Update the network parameters over 1 iteration."""
        # Compute the output of the network
        # Compute the activations of the layers
        H = self.hidden_activations(X)
        Y = self.output_activations(H)
        # Compute the gradients of the output layer
        Eo = self.error_output(Y, T)
        self.JWo = self.gradient_weight_out(H, Eo)
        self.Jbo = self.gradient_bias_out(Eo)
        # Compute the gradients of the hidden layer
        Eh = self.error_hidden(H, Eo)
        self.JWh = self.gradient_weight_hidden(X, Eh)
        self.Jbh = self.gradient_bias_hidden(Eh)

    def update_momentum(self, X, T, Ms, lr_decay, momentum_term):
        """Update the momentum term."""
        # list_of_weights = [Wh, bh, Wo, bo]
        self.get_gradients(X, T)
        Js = [self.JWh, self.Jbh, self.JWo, self.Jbo]
        return [momentum_term * M - lr_decay * J
                for M,J in zip(Ms, Js)]

    def update_weights(self, Ms):
        """Update the weights."""
        Ws = [self.Wh, self.bh, self.Wo, self.bo]
        # Ms = [MWh, Mbh, MWo, Mbo]
        return [P + M for P,M in zip(Ws, Ms)]

    def fit(self, X, T, epochs, X_valid=[], T_valid=[], learning_rate = 0.01, learning_rate_decay = 0, momentum_term = 0.9, init_var=0.1, repeat=False):
        """Run backpropagation:
              1. Initilizes weights matrices (if repeat is False)
              2. Creates list of losses and calculates initial loss by rinning self.loss() for train data and validation data (if present)
              3. Creates lists of weight matrices and puts initial matrices
              4. Starts epoch iterations for weights and loss update
                a. Calculates learning rate decay:
                   lr_decay = learning_rate / (1 + learning_rate_decay * epoch)
                   learning_rate_decay = 0 in case of constant learning rate
                b. Runs update_momentum() function, which calls get_gradients() function.
                   get_gradients() calculates H, Y, Eo, Eh and returns weights' Jacobians: self.JWh, self.Jbh, self.JWo, self.Jbo
                   update_momentum() returns list of momentums and replaces the previous ones
                c. Runs update_weights() which returns new weight matrices and replaces the previous ones
                d. Calculates loss and addes to the list of losses for train data and validation data (if present)
                e. New weight matrices are added to their lists
              5. Lists of weight matrices are turned to .self numpy arrays for better slicing option

           X and T are numpy column vectors or set of column vectors as numpy matrices.
           If the data is pandas dataframe, it should be turned to numpy:
              X=np.array(DataFrame[[x0,x1,...]])
              T=np.array(DataFrame[[t0,...]])
           epochs: int number of epochs
           learning_rate: learning rate, 0.01 by default
           learning_rate_decay: learning rate decay, 0 by default, integer or float
           momentum_term: momentum term, 0.9 by default, 0 for simple gradiend descent
           init_var: initial variance of generated weights, multiplies np.random.randn(), 0.1 by default
           repeat: False -- new weights are generated,
                   True -- old weights are used from previous fit() or import_weights()

           The results are:
           self.loss_list -- list of floats
           self.valid_loss_list -- list of floats (if validation data is present)
           self.Wh_history -- 3D numpy array of weights [epochs, starting neuron, target neuron]
           self.bh_history
           self.Wo_history
           self.bo_history

           self.JWh_history -- 3D numpy array of weights' Jacobians
           self.Jbh_history
           self.JWo_history
           self.Jbo_history
           """
        # Run backpropagation
        # Initialize weights and biases
        if repeat:
            self.Wh, self.bh, self.Wo, self.bo = self.Wh, self.bh, self.Wo, self.bo
        else:
            self.Wh, self.bh, self.Wo, self.bo = self.generate_weights(init_var)
        # Moments Ms = [MWh, Mbh, MWo, Mbo]
        Ms = [np.zeros_like(M) for M in [self.Wh, self.bh, self.Wo, self.bo]]
        # Start the gradient descent updates
        # list of loss over the iterations
        self.loss_list = [self.loss(self.run(X), T)]
        if len(X_valid)>0: # list of loss of validation data over the iterations
            self.valid_loss_list = [self.loss(self.run(X_valid), T_valid)]
        else:
            self.valid_loss_list = []
        Wh_hist, bh_hist, Wo_hist, bo_hist = [self.Wh], [self.bh], [self.Wo], [self.bo]
        JWh_hist, Jbh_hist, JWo_hist, Jbo_hist = [],[],[],[]
        for i in range(epochs):
            # learning rate decay
            lr_decay = learning_rate / (1 + learning_rate_decay * 1.0 * i)
            # Update the moments and the parameters
            Ms = self.update_momentum(X, T, Ms, lr_decay, momentum_term)
            self.Wh, self.bh, self.Wo, self.bo = self.update_weights(Ms)
            self.loss_list.append(self.loss(self.run(X), T))
            if len(X_valid)>0: # list of loss of validation data over the iterations
                self.valid_loss_list.append(self.loss(self.run(X_valid), T_valid))
            Wh_hist.append(self.Wh)
            bh_hist.append(self.bh)
            Wo_hist.append(self.Wo)
            bo_hist.append(self.bo)

            JWh_hist.append(self.JWh)
            Jbh_hist.append(self.Jbh)
            JWo_hist.append(self.JWo)
            Jbo_hist.append(self.Jbo)

        self.Wh_history=np.array(Wh_hist)
        self.bh_history=np.array(bh_hist)
        self.Wo_history=np.array(Wo_hist)
        self.bo_history=np.array(bo_hist)

        self.JWh_history=np.array(JWh_hist)
        self.Jbh_history=np.array(Jbh_hist)
        self.JWo_history=np.array(JWo_hist)
        self.Jbo_history=np.array(Jbo_hist)

    def export_weights(self):
        """
        return [self.Wh.tolist(), self.bh.tolist(), self.Wo.tolist(), self.bo.tolist()]
        Arranges weights without word "array", in a way that makes possible copy/paste and import as self.import_weights().
        """
        return [self.Wh.tolist(), self.bh.tolist(), self.Wo.tolist(), self.bo.tolist()]

    def export_weights_as_numpy(self):
        """
        return [self.Wh, self.bh, self.Wo, self.bo]"""
        return [self.Wh, self.bh, self.Wo, self.bo]

    def print_weights(self):
        """
        Prints the layers and weights.
        """
        print ('Layers (input, hidden, output): ', self.layers, self.hidden_activation, self.output_activation)
        print ('Hidden layer weights: ', self.Wh.tolist())
        print ('Hidden layer biases: ', self.bh.tolist())
        print ('Outlet layer weights: ', self.Wo.tolist())
        print ('Outlet layer biases: ', self.bo.tolist())

    def import_weights(self, weights):
        """
        Puts values to Wh, bh, Wo and bo from the list of lists as it is from self.export_weights().
        """
        self.Wh, self.bh, self.Wo, self.bo = np.array(weights[0]), np.array(weights[1]), np.array(weights[2]), np.array(weights[3])

######################## grad_descent #############################
    def fit_graddescent(self, X, T, epochs, eps, init_var=1, repeat=False):
        if repeat:
            self.Wh, self.bh, self.Wo, self.bo = self.Wh, self.bh, self.Wo, self.bo
        else:
            self.Wh, self.bh, self.Wo, self.bo = self.generate_weights(init_var)
        self.Wh_history, self.bh_history, self.Wo_history, self.bo_history = [self.Wh], [self.bh], [self.Wo], [self.bo]
        self.JWh_history, self.Jbh_history, self.JWo_history, self.Jbo_history = [], [], [], []
        self.loss_list = [self.loss(self.run(X), T)]
        for i in range(epochs):
            self.get_gradients(X, T)

            self.Wh = self.Wh.copy() - eps*self.JWh.copy()
            self.bh = self.bh.copy() - eps*self.Jbh.copy()
            self.Wo = self.Wo.copy() - eps*self.JWo.copy()
            self.bo = self.bo.copy() - eps*self.Jbo.copy()

            self.Wh_history.append(self.Wh)
            self.bh_history.append(self.bh)
            self.Wo_history.append(self.Wo)
            self.bo_history.append(self.bo)
            self.JWh_history.append(self.JWh)
            self.Jbh_history.append(self.Jbh)
            self.JWo_history.append(self.JWo)
            self.Jbo_history.append(self.Jbo)
            self.loss_list.append(self.loss(self.run(X), T))

        self.Wh_history_numpy=np.array(self.Wh_history)
        self.bh_history_numpy=np.array(self.bh_history)
        self.Wo_history_numpy=np.array(self.Wo_history)
        self.bo_history_numpy=np.array(self.bo_history)

        self.JWh_history_numpy=np.array(self.JWh_history)
        self.Jbh_history_numpy=np.array(self.Jbh_history)
        self.JWo_history_numpy=np.array(self.JWo_history)
        self.Jbo_history_numpy=np.array(self.Jbo_history)
###################################################

# Example 11 -- grad_descent 2D

In [23]:
data=[
[0.32,	0.99,	0.77,	0.69,	0.04,	0.37,	0.25,	0.44,	0.42,	0.64,	0.8,],
[0.4,	0.23,	0.22,	0.57,	0.78,	0.69,	0.2,	0.45,	0.17,	0.98,	0.96,],
[4.952768,	5.924299,	5.412533,	5.214509,	4.844064,	4.912653,	4.975625,	4.995184,	5.040088,	5.066144,	5.32,],
]
data=np.array(data).T
X=data[:,[0,1]]
T=data[:,[2]]

## pandas check

In [24]:
data_pand=pd.DataFrame(data)
data_pand

Unnamed: 0,0,1,2
0,0.32,0.4,4.952768
1,0.99,0.23,5.924299
2,0.77,0.22,5.412533
3,0.69,0.57,5.214509
4,0.04,0.78,4.844064
5,0.37,0.69,4.912653
6,0.25,0.2,4.975625
7,0.44,0.45,4.995184
8,0.42,0.17,5.040088
9,0.64,0.98,5.066144


In [25]:
X_pand=np.array(data_pand[[0,1]])
T_pand=np.array(data_pand[[2]])

In [26]:
bp1=VectorBackProp(layers=[2,2,1], hidden_activation = 'prelu')

In [27]:
bp1.fit_graddescent(X_pand, T_pand, epochs=1000, init_var=0.1, eps=0.01, repeat=False)
# 0.03 for prelu

In [28]:
fig1=go.Figure()
fig1.add_trace(go.Scatter(y=bp1.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp1.loss_list[0])
print('Final loss =', bp1.loss_list[-1])

# fig1.update_yaxes(range=[0, 1])
fig1.show()

Initial loss = 28.22482968098826
Final loss = 0.087620669674238


## ad kan

In [29]:
bp1=VectorBackProp(layers=[2,2,1], hidden_activation = 'prelu')

In [30]:
bp1.fit_graddescent(X, T, epochs=1000, init_var=0.1, eps=0.01, repeat=False)
# 0.03 for prelu

In [31]:
# a=bp1.loss_list[:-1]
# b=[0]+a
error=[(b-a)/a*100 for a,b in zip(bp1.loss_list,[0]+bp1.loss_list[:-1])]

In [32]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(y=bp1.loss_list[20:], name='loss',),secondary_y=False)
fig.add_trace(go.Scatter(y=error, name='errors', ),secondary_y=True)

fig.update_xaxes(title_text="data")

# Set y-axes titles
fig.update_yaxes(title_text="loss", secondary_y=False)
fig.update_yaxes(title_text="% error", secondary_y=True, showgrid=False, range=[-0.01, 0.01], tick0=-0.01, dtick=.0025)

fig.show()

In [33]:
fig1=go.Figure()
fig1.add_trace(go.Scatter(y=bp1.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp1.loss_list[0])
print('Final loss =', bp1.loss_list[-1])

# fig1.update_yaxes(range=[0, 1])
fig1.show()

Initial loss = 28.098416427034355
Final loss = 0.08762207745350058


In [34]:
fig=go.Figure(data=(go.Scatter(y=bp1.Wh_history_numpy[:,1,1])))
fig.show()

In [35]:
# bp1.JWh_history_numpy[:,0,0]

In [36]:
fig=go.Figure(data=(go.Scatter(y=bp1.JWh_history_numpy[:,0,0])))
fig.show()

In [37]:
predics=bp1.run(X)
predics[:,0]

array([5.1507152 , 5.1507126 , 5.15071315, 5.15071498, 5.15071772,
       5.15071641, 5.15071447, 5.15071511, 5.15071387, 5.15071701,
       5.15071649])

# np.meshgrid

In [38]:
x_mesh, y_mesh = np.linspace(-1, 2, 31), np.linspace(-1, 2, 31)
# x_mesh, y_mesh = np.arange(-1, 2, 0.1), np.arange(-1, 2, 0.1)
x_mesh, y_mesh = np.meshgrid(x_mesh, y_mesh)
x_mesh = x_mesh.flatten().reshape(31*31,1)
y_mesh = y_mesh.flatten().reshape(31*31,1)
mesh=np.concatenate((x_mesh, y_mesh), axis=1)
x_mesh[:5]

array([[-1. ],
       [-0.9],
       [-0.8],
       [-0.7],
       [-0.6]])

In [39]:
mesh_predics1=bp1.run( mesh )

In [40]:
fig_predic=go.Figure()
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2], mode='markers', name='data'))
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=predics[:,0], mode='markers', name='predics'))
fig_predic.add_trace(go.Scatter3d(x=x_mesh[:,0], y=y_mesh[:,0], z=mesh_predics1[:,0], mode='markers', marker_size=2, name='mesh-predics'))
x=np.linspace(0, 1, 11)
y=np.linspace(0, 1, 11)
x, y = np.meshgrid(x,y)
z=x**3-.2*y+5
fig_predic.add_trace(go.Surface(x=x, y=y, z=z, colorbar_x=0, opacity=0.5,))
fig_predic.update_scenes(camera_projection_type="orthographic")

fig_predic.show()

# Example 12 -- momentum 2D

In [41]:
data=[
[0.32,	0.99,	0.77,	0.69,	0.04,	0.37,	0.25,	0.44,	0.42,	0.64,	0.8,],
[0.4,	0.23,	0.22,	0.57,	0.78,	0.69,	0.2,	0.45,	0.17,	0.98,	0.96,],
[4.952768,	5.924299,	5.412533,	5.214509,	4.844064,	4.912653,	4.975625,	4.995184,	5.040088,	5.066144,	5.32,],
]
data=np.array(data).T
X=data[:,[0,1]]
T=data[:,[2]]

In [42]:
bp2=VectorBackProp(layers=[2,2,1], hidden_activation = 'prelu')

In [43]:
bp2.fit(X, T, epochs=1000, learning_rate = 0.01, momentum_term = 0.7, init_var=0.1, repeat=False)
# prelu -- lr=0.01, mom=0.7

In [88]:
fig1=go.Figure()
fig1.add_trace(go.Scatter(y=bp2.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp2.loss_list[0])
print('Final loss =', bp2.loss_list[-1])

# fig1.update_yaxes(range=[0, 1])
fig1.show()

Initial loss = 26.12290114279699
Final loss = 0.08762072931717096


## export weights

In [45]:
bp2.export_weights()

[[[-0.0526105740408863, 0.007986309725454551],
  [0.04043377526750987, 0.02097641410251582]],
 [[-0.09883227753834535, -0.11177218810051237]],
 [[-0.08405307864213879], [-0.16096577571431467]],
 [[5.150690730514402]]]

In [46]:
bp2.export_weights_as_numpy()

[array([[-0.05261057,  0.00798631],
        [ 0.04043378,  0.02097641]]),
 array([[-0.09883228, -0.11177219]]),
 array([[-0.08405308],
        [-0.16096578]]),
 array([[5.15069073]])]

In [47]:
bp2.import_weights([[[-0.07139359382924526, -0.005635384074571046],
  [-0.13928120287417017, -0.0023288530991011093]],
 [[0.018212944845399585, -0.1367965313682959]],
 [[-0.07102051518922774], [0.12666011059549018]],
 [[-3]]])

In [48]:
bp2.export_weights_as_numpy()

[array([[-0.07139359, -0.00563538],
        [-0.1392812 , -0.00232885]]),
 array([[ 0.01821294, -0.13679653]]),
 array([[-0.07102052],
        [ 0.12666011]]),
 array([[-3]])]

In [49]:
bp2.print_weights()

Layers (input, hidden, output):  [2, 2, 1] prelu linear
Hidden layer weights:  [[-0.07139359382924526, -0.005635384074571046], [-0.13928120287417017, -0.0023288530991011093]]
Hidden layer biases:  [[0.018212944845399585, -0.1367965313682959]]
Outlet layer weights:  [[-0.07102051518922774], [0.12666011059549018]]
Outlet layer biases:  [[-3]]


## ad kan

In [50]:
bp2.Wh_history[:,0][:,1]

array([-0.02511775, -0.01817881,  0.00209527, ...,  0.00799458,
        0.00799044,  0.00798631])

In [51]:
fig=go.Figure(data=(go.Scatter(y=bp2.Wh_history[:,1,1])))
fig.show()

In [52]:
predics2=bp2.run(X)
predics2[:,0]

array([-3.00001339, -3.0000121 , -3.00001315, -3.00001014, -3.00001096,
       -3.00001039, -3.00001561, -3.00001238, -3.00001516, -3.00000643,
       -3.00000592])

In [53]:
mesh_predics2=bp2.run( mesh)

In [54]:
fig_predic=go.Figure()
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2], mode='markers', name='data'))
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=predics2[:,0], mode='markers', name='predics'))
fig_predic.add_trace(go.Scatter3d(x=x_mesh[:,0], y=y_mesh[:,0], z=mesh_predics2[:,0], mode='markers', marker_size=2, name='mesh-predics'))
x=np.linspace(0, 1, 11)
y=np.linspace(0, 1, 11)
x, y = np.meshgrid(x,y)
z=x**3-.2*y+5
fig_predic.add_trace(go.Surface(x=x, y=y, z=z, colorbar_x=0, opacity=0.5,))
fig_predic.update_scenes(camera_projection_type="orthographic")

fig_predic.show()

# Example 13 -- scaled grad_descent 2D

In [55]:
from sklearn import preprocessing

In [56]:
scaler_X = preprocessing.StandardScaler().fit(X)
scaler_T = preprocessing.StandardScaler().fit(T)

# scaler_X.mean_, scaler_X.scale_ = 0, 1 # disable scaling
# scaler_T.mean_, scaler_T.scale_ = 0, 1 # disable scaling

scaler_data = [scaler_X.mean_, scaler_X.scale_, scaler_T.mean_, scaler_T.scale_]
print('scaler X -- mean, stdev:  ',scaler_data[0], scaler_data[1])
print('scaler T -- mean, stdev:  ',scaler_data[2], scaler_data[3])

X_sc = scaler_X.transform(X)
T_sc = scaler_T.transform(T)
X_sc[0:3]

scaler X -- mean, stdev:   [0.52090909 0.51363636] [0.26752925 0.28952226]
scaler T -- mean, stdev:   [5.15071518] [0.29600926]


array([[-0.7509799 , -0.39249612],
       [ 1.75341913, -0.97967031],
       [ 0.93107915, -1.01420996]])

In [57]:
bp3=VectorBackProp(layers=[2,2,1], hidden_activation = 'prelu')

In [91]:
bp3.fit_graddescent(X_sc, T_sc, epochs=3000, eps=0.001, init_var=1, repeat=False)
# 0.03 for prelu

In [92]:
fig3=go.Figure()
fig3.add_trace(go.Scatter(y=bp3.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp3.loss_list[0])
print('Final loss =', bp3.loss_list[-1])

# fig3.update_yaxes(range=[0, 1])
fig3.show()

Initial loss = 2.4697892470132583
Final loss = 0.005993100034698901


In [60]:
predics3=scaler_T.inverse_transform(bp3.run(X_sc))
predics3[:,0]

array([5.07323996, 5.21468812, 5.21474342, 5.21487464, 5.15227833,
       5.21499736, 4.90027259, 5.17842845, 4.98341401, 5.21501675,
       5.21496797])

In [61]:
mesh_predics3=scaler_T.inverse_transform(  bp3.run( scaler_X.transform(mesh) )  )

In [62]:
fig_predic=go.Figure()
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2], mode='markers', name='data'))
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=predics3[:,0], mode='markers', name='predics'))
fig_predic.add_trace(go.Scatter3d(x=x_mesh[:,0], y=y_mesh[:,0], z=mesh_predics3[:,0], mode='markers', marker_size=2, name='mesh-predics'))

x=np.linspace(0, 1, 11)
y=np.linspace(0, 1, 11)
x, y = np.meshgrid(x,y)
z=x**3-.2*y+5
fig_predic.add_trace(go.Surface(x=x, y=y, z=z, colorbar_x=0, opacity=0.5,))
fig_predic.update_scenes(camera_projection_type="orthographic")

fig_predic.show()

# Example 14 -- scaled momentum 2D

In [63]:
bp4=VectorBackProp(layers=[2,4,1], hidden_activation = 'prelu')

In [64]:
bp4.fit(X_sc, T_sc, epochs=1000, learning_rate = 0.01, momentum_term = 0.95, learning_rate_decay = 1, init_var=.1, repeat=False)

In [65]:
fig1=go.Figure()
fig1.add_trace(go.Scatter(y=bp4.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp4.loss_list[0])
print('Final loss =', bp4.loss_list[-1])

# fig1.update_yaxes(range=[0, 1])
fig1.show()

Initial loss = 0.9934874194746929
Final loss = 0.027280056947632015


In [66]:
predics4=scaler_T.inverse_transform(bp4.run(X_sc))
predics4[:,0]

array([4.94631906, 5.83904909, 5.51703605, 5.27700007, 4.85235624,
       4.94234345, 4.93468691, 4.97311867, 5.01449436, 5.05603171,
       5.30303037])

In [67]:
mesh_predics4=scaler_T.inverse_transform(  bp4.run( scaler_X.transform(mesh) )  )

In [68]:
fig_predic=go.Figure()
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2], mode='markers', name='data'))
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=predics4[:,0], mode='markers', name='predics'))
fig_predic.add_trace(go.Scatter3d(x=x_mesh[:,0], y=y_mesh[:,0], z=mesh_predics4[:,0], mode='markers', marker_size=2, name='mesh-predics'))
x=np.linspace(0, 1, 11)
y=np.linspace(0, 1, 11)
x, y = np.meshgrid(x,y)
z=x**3-.2*y+5
fig_predic.add_trace(go.Surface(x=x, y=y, z=z, colorbar_x=0, opacity=0.5,))
fig_predic.update_scenes(camera_projection_type="orthographic")

fig_predic.show()

In [69]:
bp4.JWh_history[-1]

array([[ 0.02136063, -0.01626555, -0.08900418,  0.00915825],
       [ 0.00661098, -0.02247385, -0.0786699 , -0.12828081]])

# Example 15 -- grad_descent 1D

In [70]:
def func(x):
    return x**2-1

In [71]:
x=np.linspace(-1,2,11)
data1D=np.array([
               x,
               func(x)
               ]).T
X1D=data1D[:,[0]]
T1D=data1D[:,[1]]

In [72]:
scaler_X1D = preprocessing.StandardScaler().fit(X1D)
scaler_T1D = preprocessing.StandardScaler().fit(T1D)

# scaler_X1D.mean_, scaler_X1D.scale_ = 0, 1 # disable scaling
# scaler_T1D.mean_, scaler_T1D.scale_ = 0, 1 # disable scaling

print('scaler X -- mean, stdev:  ',scaler_X1D.mean_, scaler_X1D.scale_)
print('scaler T -- mean, stdev:  ',scaler_T1D.mean_, scaler_T1D.scale_)

X_sc1D = scaler_X1D.transform(X1D)
T_sc1D = scaler_T1D.transform(T1D)
X_sc1D[0:3]

scaler X -- mean, stdev:   [0.5] [0.9486833]
scaler T -- mean, stdev:   [0.15] [1.23765908]


array([[-1.58113883],
       [-1.26491106],
       [-0.9486833 ]])

In [73]:
bp5=VectorBackProp(layers=[1,2,1], hidden_activation = 'prelu')

In [74]:
# bp5.fit_graddescent(X_sc, T_sc, epochs=3000, eps=0.03, repeat=False)
bp5.fit(X_sc1D, T_sc1D, epochs=500, learning_rate=0.02, momentum_term = 0.7, repeat=False)
# 0.03 for prelu

In [75]:
fig5=go.Figure()
fig5.add_trace(go.Scatter(y=bp5.loss_list[20:], name='sigmoid'))
print('Initial loss =', bp5.loss_list[0])
print('Final loss =', bp5.loss_list[-1])

# fig5.update_yaxes(range=[0, 1])
fig5.show()

Initial loss = 1.0108240303327496
Final loss = 0.41245593419506466


In [76]:
predics5=scaler_T1D.inverse_transform(bp5.run(X_sc1D))
predics5[:,0]
# errors5=(predics5-T[:,0])/T[:,0] #divide by zero

array([-1.35, -1.05, -0.75, -0.45, -0.15,  0.15,  0.45,  0.75,  1.05,
        1.35,  1.65])

In [77]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=X1D[:,0], y=T1D[:,0], mode='markers', name='data'),secondary_y=False)
# fig.add_trace(go.Scatter(x=X[:,0], y=errors5, mode='markers', name='errors'),secondary_y=True)

X_mean, X_wigth = X1D[:,0].mean(), X1D[:,0].max()-X1D[:,0].mean()
X_extended=np.linspace(X_mean-2*X_wigth, X_mean+2*X_wigth, 51)
fig.add_trace(go.Scatter(x=X_extended, y=scaler_T1D.inverse_transform(bp5.run(scaler_X1D.transform(X_extended.reshape(51,1))))[:,0], mode='markers', marker_size=2, name='extenden prediction'),secondary_y=False)
fig.add_trace(go.Scatter(x=X1D[:,0], y=predics5[:,0], mode='lines', name='prediction'),secondary_y=False)

fig.update_xaxes(title_text="X")

# Set y-axes titles
fig.update_yaxes(title_text="data, predics", secondary_y=False)
fig.update_yaxes(title_text="% error", range=[-5, 5], secondary_y=True, showgrid=False, tick0=-5, dtick=2)

fig.show()

# X_valid

In [78]:
data=[
[0.32,	0.99,	0.77,	0.69,	0.04,	0.37,	0.25,	0.44,	0.42,	0.64,	0.8,],
[0.4,	0.23,	0.22,	0.57,	0.78,	0.69,	0.2,	0.45,	0.17,	0.98,	0.96,],
[4.952768,	5.924299,	5.412533,	5.214509,	4.844064,	4.912653,	4.975625,	4.995184,	5.040088,	5.066144,	5.32,],
]
data=np.array(data).T
X=data[:,[0,1]]
T=data[:,[2]]

In [79]:
arr1, arr2 = np.array([.3, .8, .3, .8]), np.array([.3, .3, .8, .8])
data_valid=np.array([arr1, arr2, arr1**3-.2*arr2+5]).T
data_valid
X_valid=data_valid[:,[0,1]]
T_valid=data_valid[:,[2]]

In [80]:
scaler_X = preprocessing.StandardScaler().fit(X)
scaler_T = preprocessing.StandardScaler().fit(T)

# scaler_X.mean_, scaler_X.scale_ = 0, 1 # disable scaling
# scaler_T.mean_, scaler_T.scale_ = 0, 1 # disable scaling

scaler_data = [scaler_X.mean_, scaler_X.scale_, scaler_T.mean_, scaler_T.scale_]
print('scaler X -- mean, stdev:  ',scaler_data[0], scaler_data[1])
print('scaler T -- mean, stdev:  ',scaler_data[2], scaler_data[3])

X_sc = scaler_X.transform(X)
T_sc = scaler_T.transform(T)
X_sc[0:3]

scaler X -- mean, stdev:   [0.52090909 0.51363636] [0.26752925 0.28952226]
scaler T -- mean, stdev:   [5.15071518] [0.29600926]


array([[-0.7509799 , -0.39249612],
       [ 1.75341913, -0.97967031],
       [ 0.93107915, -1.01420996]])

In [113]:
bp6=VectorBackProp(layers=[2,4,1], hidden_activation = 'prelu')

In [119]:
bp6.fit(X_sc, T_sc, X_valid=scaler_X.transform(X_valid),  T_valid=scaler_T.transform(T_valid), epochs=1000, learning_rate = 0.01, momentum_term = 0.95, learning_rate_decay = 1, init_var=.1, repeat=False)

In [118]:
fig1=go.Figure()
fig1.add_trace(go.Scatter(y=bp6.loss_list[20:], name='loss'))
fig1.add_trace(go.Scatter(y=bp6.valid_loss_list[20:], name='validation'))
print('Initial loss =', bp6.loss_list[0])
print('Final loss =', bp6.loss_list[-1])

# fig1.update_yaxes(range=[0, 1])
fig1.show()

Initial loss = 0.9807368466460216
Final loss = 0.14773686028907804


In [84]:
predics6=scaler_T.inverse_transform(bp6.run(X_sc))
predics6[:,0]

array([4.95253713, 5.81936547, 5.5268519 , 5.29412878, 4.82968341,
       4.95567281, 4.93732355, 4.99400481, 5.01775782, 5.04420674,
       5.30832067])

In [85]:
mesh_predics6=scaler_T.inverse_transform(bp6.run(scaler_X.transform(mesh) )  )

In [86]:
fig_predic=go.Figure()
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2], mode='markers', name='data'))
fig_predic.add_trace(go.Scatter3d(x=data_valid[:,0], y=data_valid[:,1], z=data_valid[:,2], mode='markers', name='data_valid'))
fig_predic.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=predics6[:,0], mode='markers', name='predics'))
fig_predic.add_trace(go.Scatter3d(x=x_mesh[:,0], y=y_mesh[:,0], z=mesh_predics6[:,0], mode='markers', marker_size=2, name='mesh-predics'))
x=np.linspace(0, 1, 11)
y=np.linspace(0, 1, 11)
x, y = np.meshgrid(x,y)
z=x**3-.2*y+5
fig_predic.add_trace(go.Surface(x=x, y=y, z=z, colorbar_x=0, opacity=0.5,))
fig_predic.update_scenes(camera_projection_type="orthographic")

fig_predic.show()

In [87]:
bp6.JWh_history[-1]

array([[-0.14020859, -0.02293728, -0.11540787, -0.19928647],
       [-0.08409726, -0.08688781, -0.06457948,  0.07841002]])