<center><h1>MLP training using PSO</h1></center>
<center><h2>Objective: Train the MLP using the Particle Swarm Optimization algorithm for finding weights and biases.</h2></center>

In [1]:
# Import modules
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris


# Import PySwarms
import pyswarms as ps

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

## Dataset: Iris Dataset

In [2]:
# Load the iris dataset
data = load_iris()

# Store the features as X and the labels as y
X = data.data
y = data.target

## Forward propagation of the Single Hidden Layer Feed Forward Network with:
### Input data dimension = 4;
### Hidden layer dimension = 20;
### Number of classes = 3

In [3]:
# Forward propagation
def forward_prop(params):
    """Forward propagation as objective function

    This computes for the forward propagation of the neural network, as
    well as the loss. It receives a set of parameters that must be
    rolled-back into the corresponding weights and biases.

    Inputs
    ------
    params: np.ndarray
        The dimensions should include an unrolled version of the
        weights and biases.

    Returns
    -------
    float
        The computed negative log-likelihood loss given the parameters
    """
    # Neural network architecture
    n_inputs = 4
    n_hidden = 20
    n_classes = 3

    # Roll-back the weights and biases
    W1 = params[0:80].reshape((n_inputs,n_hidden))
    b1 = params[80:100].reshape((n_hidden,))
    W2 = params[100:160].reshape((n_hidden,n_classes))
    b2 = params[160:163].reshape((n_classes,))

    # Perform forward propagation
    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1
    a1 = np.tanh(z1)     # Activation in Layer 1
    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2
    logits = z2          # Logits for Layer 2

    # Compute for the softmax of the logits
    exp_scores = np.exp(logits)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    # Compute for the negative log likelihood
    N = 150 # Number of samples
    corect_logprobs = -np.log(probs[range(N), y])
    loss = np.sum(corect_logprobs) / N

    return loss


## Objective function: Error from negative log likelihood

In [4]:

def f(x):
    """Higher-level method to do forward_prop in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [forward_prop(x[i]) for i in range(n_particles)]
    return np.array(j)

## Global best PSO algorithm for finding the optimal weights and biases:

In [5]:
%%time
# Initialize swarm
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}  #weights for the current velocity, self best position and global best position

# Call instance of PSO
dimensions = (4 * 20) + (20 * 3) + 20 + 3
optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f, iters=1000)

2020-02-23 11:09:39,445 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|█████████████████████████████████████████████████████████|1000/1000, best_cost=0.0383
2020-02-23 11:09:58,629 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.03828562888861999, best pos: [ 2.70000414e-01  1.09725913e+00  1.97964842e+00 -1.33152029e-02
 -8.28573961e-02  8.40499757e-01 -1.01932135e-01  2.86622194e-01
  1.32289640e+00 -3.05055239e-01 -1.40236652e-01  2.68665422e-01
  3.14637885e-01  6.56860289e-01 -1.46792360e+00 -5.26579488e-02
  8.82050289e-01 -3.00764704e-02 -1.42874057e-01 -8.25965772e-01
 -7.29540369e-01  3.23015620e-01 -3.53272998e-01 -1.05470920e-01
 -1.25965185e+00  9.12792075e-02  1.16912576e+00  1.88137214e+00
 -9.06006058e-01  8.61098627e-02  3.12868859e-01  9.55791842e-01
  1.56219963e+00  2.59951714e+00  1.53260982e+00  9.04517580e-01
  1.37225624e+00  6.16479477e-01 -1.44546

Wall time: 19.2 s


## Checking the accuracy using the obtained weights and biases from the PSO:

In [6]:
def predict(X, pos):
    """
    Use the trained weights to perform class predictions.

    Inputs
    ------
    X: numpy.ndarray
        Input Iris dataset
    pos: numpy.ndarray
        Position matrix found by the swarm. Will be rolled
        into weights and biases.
    """
    # Neural network architecture
    n_inputs = 4
    n_hidden = 20
    n_classes = 3

    # Roll-back the weights and biases
    W1 = pos[0:80].reshape((n_inputs,n_hidden))
    b1 = pos[80:100].reshape((n_hidden,))
    W2 = pos[100:160].reshape((n_hidden,n_classes))
    b2 = pos[160:163].reshape((n_classes,))

    # Perform forward propagation
    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1
    a1 = np.tanh(z1)     # Activation in Layer 1
    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2
    logits = z2          # Logits for Layer 2

    y_pred = np.argmax(logits, axis=1)
    return y_pred

## Accuracy:

In [7]:
(predict(X, pos) == y).mean()

0.9866666666666667

##### Reference: https://pyswarms.readthedocs.io/en/latest/examples/usecases/train_neural_network.html