# Episode 57. Genetic Algorithm: Neural Networks
Boston housing prices -2 hidden layers

## Data: Boston Housing Prices
### Keras Built-in function - https://keras.io/api/datasets/boston_housing/
### Excel file form Kaggle - https://www.kaggle.com/fedesoriano/the-boston-houseprice-data

Original Data - http://lib.stat.cmu.edu/datasets/boston

The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978.

There are 14 attributes in each case of the dataset. They are:

CRIM - per capita crime rate by town

ZN - proportion of residential land zoned for lots over 25,000 sq.ft.

INDUS - proportion of non-retail business acres per town.

CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)

NOX - nitric oxides concentration (parts per 10 million)

RM - average number of rooms per dwelling

AGE - proportion of owner-occupied units built prior to 1940

DIS - weighted distances to five Boston employment centres

RAD - index of accessibility to radial highways

TAX - full-value property-tax rate per USD 10,000

PTRATIO - pupil-teacher ratio by town

B - 1000*(Bk - 0.63)^2 where Bk is the proportion of blacks by town

LSTAT - % lower status of the population

MEDV - Median value of owner-occupied homes in $1000's

## Step 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import random
import math
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

## Step 2. Load Data

In [None]:
# Load Loan Default Data
# Original Data Source: https://www.kaggle.com/fedesoriano/the-boston-houseprice-data
from google.colab import files
uploaded = files.upload()

Saving boston.csv to boston (1).csv


In [None]:
# Store data in DataFrame
df = pd.read_csv("boston.csv")

## Step 3. Set Training Variables

In [None]:
RANDOM_SEED = 27
# split data
X_train, X_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
y_train = X_train['MEDV']
X_train = X_train.drop(['MEDV'], axis=1)
y_test = X_test['MEDV']
X_test = X_test.drop(['MEDV'], axis=1)

# Standardize data based on mean & sd of X_train
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

# convert to numpy
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

## Step 4. Setup Neural Network

In [None]:
# Network Parameters
max_node = 5
num_hiddenlayers = 2
nn_in = X_train.shape[1]
nn_hidden1 = max_node
nn_hidden2 = max_node
nn_out = 1

In [None]:
def network(nn_in, nn_hidden1, nn_hidden2, nn_out):
  model = Sequential()
  model.add(Dense(nn_hidden1, input_dim=nn_in, activation='relu'))
  model.add(Dense(nn_hidden2, activation='relu'))
  model.add(Dense(nn_out, activation='linear'))
  # compile model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  return model

## Step 5. Run Single Episode

In [None]:
def run_episode(X_train,y_train,X_test,y_test,nn_in,nn_out, policy):
  nn_hidden1,nn_hidden2 = policy  
  # build model
  model = network(nn_in, nn_hidden1, nn_hidden2, nn_out)
  # train model
  model.fit(X_train, y_train, epochs=100, verbose=0, validation_split=0.05)
  # performance
  _, accuracy = model.evaluate(X_test, y_test)
  return accuracy

In [None]:
# test model 1
policy = [5,2]
acc1 = run_episode(X_train,y_train,X_test,y_test,nn_in,nn_out, policy)

# test model 2
policy = [2,5]
acc2 = run_episode(X_train,y_train,X_test,y_test,nn_in,nn_out, policy)

# test model 3
policy = [5,5]
acc3 = run_episode(X_train,y_train,X_test,y_test,nn_in,nn_out, policy)

print("Model 1:", acc1, "Model 2:", acc2, "Model 3:", acc3)

Model 1: 4.1494855880737305 Model 2: 7.256877422332764 Model 3: 4.179955005645752


## Step 6. Policy Functions: Genetic Algorithm

In [None]:
def evaluate_policy(X_train,y_train,X_test,y_test,nn_in,nn_out, policy, n_episodes=1):
  total_rewards = 0.0
  for _ in range(n_episodes):
    total_rewards += 1/run_episode(X_train,y_train,X_test,y_test,nn_in,nn_out, policy)
  return total_rewards / n_episodes

In [None]:
def gen_random_policy(max_node,num_hiddenlayers):
  return np.random.choice(max_node+1, size=((num_hiddenlayers)))

In [None]:
def crossover(policy1, policy2,num_hiddenlayers):
  new_policy = policy1.copy()
  for i in range(num_hiddenlayers):
    rand = np.random.uniform()
    if rand > 0.5:
      new_policy[i] = policy2[i]
  return new_policy

In [None]:
def mutation(policy,num_hiddenlayers,max_node, p=0.05):
  new_policy = policy.copy()
  for i in range(num_hiddenlayers):
    rand = np.random.uniform()
    if rand < p:
      new_policy[i] = np.random.choice(max_node+1)
  return new_policy

In [None]:
if __name__ == '__main__':
    random.seed(1234)
    np.random.seed(1234)

    ## Policy search
    n_policy = 10
    n_steps = 5
    start = time.time()
    policy_pop = [gen_random_policy(max_node,num_hiddenlayers) for _ in range(n_policy)]
    for idx in range(n_steps):
        policy_scores = [evaluate_policy(X_train,y_train,X_test,y_test,nn_in,nn_out, p) for p in policy_pop]
        print('Generation %d : MaxScore=%0.2f & AvgScore=%0.3f' %(idx+1, max(policy_scores), sum(policy_scores)/len(policy_scores)))
        policy_ranks = list(reversed(np.argsort(policy_scores)))
        elite_set = [policy_pop[x] for x in policy_ranks[:5]]
        select_probs = np.array(policy_scores) / np.sum(policy_scores)
        if np.sum(policy_scores)==0:
            pp = 1/np.array(policy_scores).size
            select_probs = pp*np.ones(np.array(policy_scores).size)
        child_set = [crossover(
            policy_pop[np.random.choice(range(n_policy), p=select_probs)], 
            policy_pop[np.random.choice(range(n_policy), p=select_probs)],num_hiddenlayers)
            for _ in range(n_policy - 5)]
        mutated_list = [mutation(p,num_hiddenlayers,max_node) for p in child_set]
        policy_pop = elite_set
        policy_pop += mutated_list
    policy_score = [evaluate_policy(X_train,y_train,X_test,y_test,nn_in,nn_out, p) for p in policy_pop]
    best_policy = policy_pop[np.argmax(policy_score)]

    end = time.time()
    print('Best policy score=%0.2f Time taken(seconds)=%4.4f Average Score=%0.3f'
            %(np.max(policy_score), (end-start), sum(policy_score)/len(policy_score)))    
    #print("Success Rate(%): ", sum(policy_score)/len(policy_score))
    print("Best Policy is:", best_policy)

Generation 1 : MaxScore=0.23 & AvgScore=0.150
Generation 2 : MaxScore=0.25 & AvgScore=0.152
Generation 3 : MaxScore=0.23 & AvgScore=0.149
Generation 4 : MaxScore=0.26 & AvgScore=0.182
Generation 5 : MaxScore=0.28 & AvgScore=0.189
Best policy score=0.24 Time taken(seconds)=325.5767 Average Score=0.170
