# Perceptron

## 01.1 Create a function (single neuron) with some inputs and outputs

In [20]:
import numpy as np

# Define the true weights and bias for a single neuron
# (see also http://d2l.ai/chapter_linear-networks/linear-regression-scratch.html)
true_weights = np.array([2, -3.4])
true_bias = 4.2

# Create some inputs using a standard normal distribution
number_examples = 1000
number_features = len(true_weights)
true_inputs = np.random.normal(loc=0.0, scale=1.0, size=(number_examples, number_features))

# Create some noise for all the examples using a normal distribution
true_noise = np.random.normal(loc=0.0, scale=0.01, size=number_examples)
    
# Compute the true outputs using the inputs, and the true weights, bias, and noise
true_outputs = np.matmul(true_inputs, true_weights) + true_bias + true_noise

## 01.2. Learn the parameters of the neuron using gradient descent from scratch

In [30]:
# Define the training parameters
number_epochs = 10
batch_size = 10
learning_rate = 0.03

# Initialize the predicted weights and bias
predicted_weights = np.random.normal(loc=0.0, scale=0.01, size=number_features)
predicted_bias = 0

# Initialize the loss for all the batches
predicted_loss = np.zeros(int(np.ceil(number_examples/batch_size)))

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the batches
    k = 0
    for j in range(0, number_examples, batch_size):
        
        # Derive the end index of the batch
        j2 = min(j+100, number_examples)
    
        # Compute the predicted outputs using the inputs, and the predicted weights and bias
        predicted_outputs = np.matmul(true_inputs[j:j2, :], predicted_weights) + predicted_bias

        # Compute the difference between the predicted outputs and the true outputs
        difference_outputs = predicted_outputs-true_outputs[j:j2]

        # Compute the loss using the mean squared error
        predicted_loss[k] = np.mean(0.5*np.power(difference_outputs, 2))

        # Update the predicted weights and bias using gradient descent, taking the derivative of the loss function
        predicted_weights = predicted_weights-learning_rate*np.mean(true_inputs[j:j2, :]*(difference_outputs)[:, np.newaxis], axis=0)
        predicted_bias = predicted_bias-learning_rate*np.mean(difference_outputs, axis=0)
        
        # Update the index
        k = k+1
        
    # Print the epoch and loss
    print(f"Epoch: {i}; loss: {np.mean(predicted_loss)}")
    
# Print the predicted weights and bias
print("")
print(f"Predicted weights: {predicted_weights}")
print(f"Predicted bias: {predicted_bias}")

Epoch: 0; loss: 2.8037546806144213
Epoch: 1; loss: 0.00927178390146684
Epoch: 2; loss: 8.412635819532869e-05
Epoch: 3; loss: 5.009629749576156e-05
Epoch: 4; loss: 4.993365863470035e-05
Epoch: 5; loss: 4.993087456283108e-05
Epoch: 6; loss: 4.99307166890457e-05
Epoch: 7; loss: 4.993070621734263e-05
Epoch: 8; loss: 4.9930705525756386e-05
Epoch: 9; loss: 4.993070548048438e-05

Predicted weights: [ 1.99955669 -3.40039523]
Predicted bias: 4.200038276450225


## 01.3. Learn the parameters of the neuron using gradient descent in Keras

In [35]:
import tensorflow as tf

# Define the training parameters
number_epochs = 10
batch_size = 10
learning_rate = 0.03

# Initialize the model (as a feedforward NN)
model = tf.keras.Sequential()

# Add an input with the number of features
model.add(tf.keras.Input(shape=number_features))

# Add a densely-connected NN layer without activation and with initialized weights and bias
model.add(tf.keras.layers.Dense(1, activation=None, \
                                kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), \
                                bias_initializer="zeros"))

# Configure the model for training with gradient descent optimizer and mean squared error loss
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), loss="mean_squared_error")

# Train the model give the batch size and number of epochs
model.fit(x=true_inputs, y=true_outputs, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Print the predicted weights and bias
print("")
print(f"Predicted weights: {model.get_weights()[0][:, 0]}")
print(f"Predicted bias: {model.get_weights()[1][0]}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Predicted weights: [ 2.000095  -3.3992987]
Predicted bias: 4.200600624084473


## 01.2. Learn the parameters of the neuron using an evolutionary algorithm

In [51]:
# Initialize the training parameters
number_epochs = 10
batch_size = 10
number_individuals = 10
number_parents = 1
mutation_rate = 0.2

# Initialize the predicted weights and bias
predicted_weights = np.random.normal(loc=0.0, scale=0.01, size=(number_features, number_individuals))
predicted_bias = np.zeros((1, number_individuals))

# Initialize the loss for all the batches and individuals
predicted_loss = np.zeros((int(np.ceil(number_examples/batch_size)), number_individuals))

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the batches
    k = 0
    for j in range(0, number_examples, batch_size):
        
        # Derive the end index of the batch
        j2 = min(j+100, number_examples)
    
        # Compute the predicted outputs using the inputs, and the predicted weights and bias, for every individual
        predicted_outputs = np.matmul(true_inputs[j:j2, :], predicted_weights) + predicted_bias

        # Compute the loss using the squared error, for every individual
        predicted_loss[k, :] = np.mean(np.power(predicted_outputs-true_outputs[j:j2, np.newaxis], 2), axis=0)
        
        # Get the indices of the parents, the fittest individuals
        parent_indices = np.argsort(predicted_loss[k, :])[0:number_parents]
        
        # Compute the mutation scale
        mutation_scale = mutation_rate*np.mean(predicted_loss[k, parent_indices])
        
        # Update the predicted weights and bias using evolutionary algorithm, doing crossover and mutation
        predicted_weights = np.mean(predicted_weights[:, parent_indices], axis=1)[:, np.newaxis] \
        + np.random.normal(loc=0.0, scale=mutation_scale, size=(number_features, number_individuals))
        predicted_bias = np.mean(predicted_bias[:, parent_indices]) \
        + np.random.normal(loc=0.0, scale=mutation_scale, size=(1, number_individuals))
        
        # Update the index
        k = k+1
        
    # Print the epoch and loss
    print(f"Epoch: {i}; loss: {np.mean(predicted_loss)}")
    
# Print the predicted weights and bias
print("")
print(f"Predicted weights: {np.mean(predicted_weights, axis=1)}")
print(f"Predicted bias: {np.mean(predicted_bias)}")

Epoch: 0; loss: 10.126777128069596
Epoch: 1; loss: 0.0005799359993380929
Epoch: 2; loss: 0.00020963962076826672
Epoch: 3; loss: 0.00013234465227642445
Epoch: 4; loss: 0.00010788320805971668
Epoch: 5; loss: 0.00010101878563914387
Epoch: 6; loss: 9.998486872936858e-05
Epoch: 7; loss: 0.00010015609962209404
Epoch: 8; loss: 0.00010006813528823452
Epoch: 9; loss: 9.992032914625884e-05

Predicted weights: [ 1.99979901 -3.39991128]
Predicted bias: 4.2000524349572945
