# Perceptron

## 01.1 Create a function (single neuron) with some inputs and outputs

In [2]:
import numpy as np

# Define the true weights and bias for a single neuron
# (see http://d2l.ai/chapter_linear-networks/linear-regression-scratch.html)
true_weights = np.array([2, -3.4])
true_bias = 4.2

# Create some inputs using a standard normal distribution
number_examples = 1000
number_features = len(true_weights)
true_inputs = np.random.normal(loc=0.0, scale=1.0, size=(number_examples, number_features))

# Create some noise for all the examples using a normal distribution
true_noise = np.random.normal(loc=0.0, scale=0.01, size=number_examples)
    
# Compute the true outputs using the inputs, and the true weights, bias, and noise
true_outputs = np.matmul(true_inputs, true_weights) + true_bias + true_noise

## 01.2. Learn the parameters of the neuron using gradient descent from scratch

In [3]:
# Initialize the training parameters
number_epochs = 10
batch_size = 10
learning_rate = 0.03

# Initialize the predicted weights and bias
predicted_weights = np.random.normal(loc=0.0, scale=0.01, size=number_features)
predicted_bias = 0

# Initialize the loss for all the batches
predicted_loss = np.zeros(int(np.ceil(number_examples/batch_size)))

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the batches
    k = 0
    for j in range(0, number_examples, batch_size):
        
        # Derive the end index of the batch
        j2 = min(j+100, number_examples)
    
        # Compute the predicted outputs using the inputs, and the predicted weights and bias
        predicted_outputs = np.matmul(true_inputs[j:j2, :], predicted_weights) + predicted_bias

        # Compute the difference between the predicted outputs and the true outputs
        difference_outputs = predicted_outputs-true_outputs[j:j2]

        # Compute the loss using the squared error
        predicted_loss[k] = np.mean(0.5*np.power(difference_outputs, 2))

        # Update the predicted weights and bias using gradient descent, taking the derivative of the loss function
        predicted_weights = predicted_weights-learning_rate*np.mean(true_inputs[j:j2, :]*(difference_outputs)[:, np.newaxis], axis=0)
        predicted_bias = predicted_bias-learning_rate*np.mean(difference_outputs, axis=0)
        
        # Update the index
        k = k+1
        
    # Print the epoch and loss
    print(f"Epoch: {i}; loss: {np.mean(predicted_loss)}")

Epoch: 0; loss: 2.8158391659305777
Epoch: 1; loss: 0.008063429559107032
Epoch: 2; loss: 7.47349006122696e-05
Epoch: 3; loss: 5.2165294669915916e-05
Epoch: 4; loss: 5.212232965254876e-05
Epoch: 5; loss: 5.212337552022014e-05
Epoch: 6; loss: 5.2123439776608544e-05
Epoch: 7; loss: 5.2123443155928826e-05
Epoch: 8; loss: 5.212344333184311e-05
Epoch: 9; loss: 5.212344334097856e-05


## 01.3. Learn the parameters of the neuron using Keras

In [8]:
import tensorflow as tf

number_epochs = 10
batch_size = 10
learning_rate = 0.03

model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(np.shape(true_inputs)[1],)))
model.add(tf.keras.layers.Dense(1, activation=None, kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01)))
opt = keras.optimizers.SGD(learning_rate=learning_rate)



## 01.2. Learn the parameters of the neuron using an evolutionary algorithm

In [25]:
# Initialize the training parameters
number_epochs = 10
batch_size = 10
number_individuals = 10
number_parents = 1
mutation_rate = 0.2

# Initialize the predicted weights and bias
predicted_weights = np.random.normal(loc=0.0, scale=0.01, size=(number_features, number_individuals))
predicted_bias = np.zeros((1, number_individuals))

# Initialize the loss for all the batches and individuals
predicted_loss = np.zeros((int(np.ceil(number_examples/batch_size)), number_individuals))

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the batches
    k = 0
    for j in range(0, number_examples, batch_size):
        
        # Derive the end index of the batch
        j2 = min(j+100, number_examples)
    
        # Compute the predicted outputs using the inputs, and the predicted weights and bias, for every individual
        predicted_outputs = np.matmul(true_inputs[j:j2, :], predicted_weights) + predicted_bias

        # Compute the loss using the squared error, for every individual
        predicted_loss[k, :] = np.mean(np.power(predicted_outputs-true_outputs[j:j2, np.newaxis], 2), axis=0)
        
        # Get the indices of the parents, the fittest individuals
        parent_indices = np.argsort(predicted_loss[k, :])[0:number_parents]
        
        # Compute the mutation scale
        mutation_scale = mutation_rate*np.mean(predicted_loss[k, parent_indices])
        
        # Update the predicted weights and bias using evolutionary algorithm, doing crossover and mutation
        predicted_weights = np.mean(predicted_weights[:, parent_indices], axis=1)[:, np.newaxis] \
        + np.random.normal(loc=0.0, scale=mutation_scale, size=(number_features, number_individuals))
        predicted_bias = np.mean(predicted_bias[:, parent_indices]) \
        + np.random.normal(loc=0.0, scale=mutation_scale, size=(1, number_individuals))
        
        # Update the index
        k = k+1
        
    # Print the epoch and loss
    print(f"Epoch: {i}; loss: {np.mean(predicted_loss)}")

Epoch: 0; loss: 2.744160890040827
Epoch: 1; loss: 0.0005916940774964454
Epoch: 2; loss: 0.00022320956614667802
Epoch: 3; loss: 0.00013476951981175844
Epoch: 4; loss: 0.0001065308802221738
Epoch: 5; loss: 9.845686483952831e-05
Epoch: 6; loss: 9.7595533402721e-05
Epoch: 7; loss: 9.737403067618305e-05
Epoch: 8; loss: 9.75313417395032e-05
Epoch: 9; loss: 9.747489960243033e-05
