<a href="https://colab.research.google.com/github/vothane/whistling-birds/blob/master/whistling_birds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Replacing back-propogation with PSO (Particle Swarm Optimization)

In [0]:
from keras import backend as K
from __future__ import print_function, division
import numpy as np
import copy

class ParticleSwarmOptimizedNN():
  def __init__(self, population_size, model_builder, inertia_weight=0.8, cognitive_weight=2, social_weight=2, max_velocity=20):
    self.population_size = population_size
    self.model_builder = model_builder
    self.best_individual = None
    # Parameters used to update velocity
    self.cognitive_w = cognitive_weight
    self.inertia_w = inertia_weight
    self.social_w = social_weight
    self.min_v = -max_velocity
    self.max_v = max_velocity

  def _build_model(self, id):
    """ Returns a new particle"""
    model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1])
    model.id = id
    model.fitness = 0
    model.highest_fitness = 0
    model.accuracy = 0
    # Set intial best as the current initialization
    model.best_layers = copy.copy(model.model.layers)

    # Set initial velocity to zero
    model.velocity = []
    for layer in model.model.layers:
      velocity = {"W": 0, "w0": 0}
      weights = layer.get_weights()[0]
      biases = layer.get_weights()[1]
      velocity = {"W": np.zeros_like(weights), "w0": np.zeros_like(biases)}
      model.velocity.append(velocity)

    return model

  def _initialize_population(self):
    """ Initialization of the neural networks forming the population"""
    self.population = []
    for i in range(self.population_size):
      model = self._build_model(id=i)
      self.population.append(model)
 
  def _update_weights(self, individual):
    """ Calculate the new velocity and update weights for each layer """
    # Two random parameters used to update the velocity
    r1 = np.random.uniform()
    r2 = np.random.uniform()

    for i, layer in enumerate(individual.model.layers):
      # Layer weights velocity
      weights = layer.get_weights()[0]
      biases = layer.get_weights()[1]
             
      first_term_W = self.inertia_w * individual.velocity[i]["W"]
      second_term_W = self.cognitive_w * r1 * (individual.best_layers[i].get_weights()[0] - weights)
      third_term_W = self.social_w * r2 * (self.best_individual.layers[i].get_weights()[0] - weights)
      new_velocity = first_term_W + second_term_W + third_term_W
      individual.velocity[i]["W"] = np.clip(new_velocity, self.min_v, self.max_v)

      # Bias weight velocity
      first_term_w0 = self.inertia_w * individual.velocity[i]["w0"]
      second_term_w0 = self.cognitive_w * r1 * (individual.best_layers[i].get_weights()[1] - biases)
      third_term_w0 = self.social_w * r2 * (self.best_individual.layers[i].get_weights()[1] - biases)
      new_velocity = first_term_w0 + second_term_w0 + third_term_w0
      individual.velocity[i]["w0"] = np.clip(new_velocity, self.min_v, self.max_v)

      # Update layer weights with velocity
      weights += individual.velocity[i]["W"]
      K.set_value(layer.weights[0],  weights)
      biases += individual.velocity[i]["w0"]
      K.set_value(layer.weights[1],  biases)
        
  def _calculate_fitness(self, individual):
    """ Evaluate the individual on the test set to get fitness scores """
    loss, acc = individual.test_on_batch(self.X, self.y)
    individual.fitness = 1 / (loss + 1e-8)
    individual.accuracy = acc

  def optimize(self, X, y, n_generations):
    """ Will evolve the population for n_generations based on dataset X and labels y"""
    self.X, self.y = X, y
    self._initialize_population()

    # The best individual of the population is initialized as population's first ind.
    self.best_individual = copy.copy(self.population[0])

    for epoch in range(n_generations):
      for individual in self.population:
        # Calculate new velocity and update the NN weights
        self._update_weights(individual)
        # Calculate the fitness of the updated individual
        self._calculate_fitness(individual)

        # If the current fitness is higher than the individual's previous highest
        # => update the individual's best layer setup
        if individual.fitness > individual.highest_fitness:
          individual.best_layers = copy.copy(individual.layers)
          individual.highest_fitness = individual.fitness

        # If the individual's fitness is higher than the highest recorded fitness for the
        # whole population => update the best individual
        if individual.fitness > self.best_individual.fitness:
          self.best_individual = copy.copy(individual)

      print ("[%d Best Individual - ID: %d Fitness: %.5f, Accuracy: %.1f%%]" % 
             (epoch,
              self.best_individual.id,
              self.best_individual.fitness,
              100*float(self.best_individual.accuracy)))
      
    return self.best_individual

In [0]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from __future__ import print_function
import matplotlib.pyplot as plt

def main():
  iris = datasets.load_iris()
  X = iris['data']
  y = iris['target']
  names = iris['target_names']
  feature_names = iris['feature_names']

  # One hot encoding
  enc = preprocessing.OneHotEncoder()
  Y = enc.fit_transform(y[:, np.newaxis]).toarray()

  # Scale data to have mean 0 and variance 1 
  # which is importance for convergence of the neural network
  scaler = preprocessing.StandardScaler()
  X_scaled = scaler.fit_transform(X)
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.5, random_state=2)

  n_features = X.shape[1]
  n_classes = Y.shape[1]

  # Model builder
  def model_builder(n_inputs, n_outputs): 
    model = Sequential()
    model.add(Dense(10, input_shape=(4,), activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Adam optimizer with learning rate of 0.001
    optimizer = Adam(lr=0.001)
    model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

  # Print the model summary of a individual in the population
  print ("")
  model_builder(n_inputs=n_features, n_outputs=n_classes).summary()

  population_size = 100
  n_generations = 10

  inertia_weight = 0.8
  cognitive_weight = 0.8
  social_weight = 0.8

  print ("Population Size: %d" % population_size)
  print ("Generations: %d" % n_generations)
  print ("")
  print ("Inertia Weight: %.2f" % inertia_weight)
  print ("Cognitive Weight: %.2f" % cognitive_weight)
  print ("Social Weight: %.2f" % social_weight)
  print ("")

  model = ParticleSwarmOptimizedNN(
            population_size=population_size, 
            inertia_weight=inertia_weight,
            cognitive_weight=cognitive_weight,
            social_weight=social_weight,
            max_velocity=5,
            model_builder=model_builder)
    
  model = model.optimize(X_train, y_train, n_generations=n_generations)

  loss, accuracy = model.test_on_batch(X_test, y_test)

  print ("Accuracy: %.1f%%" % float(100*accuracy))

  # Reduce dimension to 2D using PCA and plot the results
  y_pred = np.argmax(model.predict(X_test), axis=1)
  Plot().plot_in_2d(X_test, y_pred, title="Particle Swarm Optimized Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1]))


if __name__ == "__main__":
  main()