# Grammatical Evolution using GRAPE

Vítor Amorim Fróis - 12543440

### Grape 
Is an open source Github project. For using it here, I cloned the repo and turned into a package: https://github.com/vitorfrois/grape/tree/packaging. You can install it on a virtual env using 

```
pip install --upgrade https://github.com/vitorfrois/grape/tarball/packaging/
```

### Imports

In [9]:
from grape import algorithms, grape
from grape.functions import *

import textwrap
import random

import pandas as pd
import numpy as np
from deap import creator, base, tools
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings("ignore")

np.random.seed(0)

### Dataset

In [10]:
dataset = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target)

### Fitness Evaluation

In [61]:
def fitness_eval(individual, points):
    #points = [X, Y]
    x = points[0]
    y = points[1]
    
    if individual.invalid == True:
        return np.nan,

    try:
        pred = eval(individual.phenotype)
    except (FloatingPointError, ZeroDivisionError, OverflowError,
            MemoryError, ValueError):
        return np.nan,
    except Exception as err:
            # Other errors should not usually happen (unless we have
            # an unprotected operator) so user would prefer to see them.
            print("evaluation error", err)
            raise
    assert np.isrealobj(pred)
    
    try:
        # fitness = np.mean(np.square(y - pred))
        fitness = r2_score(y, pred)
    except (FloatingPointError, ZeroDivisionError, OverflowError,
            MemoryError, ValueError):
        fitness = np.nan
    except Exception as err:
            # Other errors should not usually happen (unless we have
            # an unprotected operator) so user would prefer to see them.
            print("fitness error", err)
            raise
        
    if fitness == float("inf"):
        return np.nan,
    
    return fitness,


### BNF Grammar

In [62]:
BNF_GRAMMAR = grape.Grammar("grammar.bnf")

### Config

In [150]:
toolbox = base.Toolbox()

# define a single objective, minimising fitness strategy:
creator.create("FitnessMin", base.Fitness, weights=(1.0,))

creator.create('Individual', grape.Individual, fitness=creator.FitnessMin)

toolbox.register("populationCreator", grape.sensible_initialisation, creator.Individual)

toolbox.register("evaluate", fitness_eval)

# Tournament selection:
toolbox.register("select", tools.selTournament, tournsize=3)

# Single-point crossover:
toolbox.register("mate", grape.crossover_onepoint)

# Flip-int mutation:
toolbox.register("mutate", grape.mutation_int_flip_per_codon)
    
POPULATION_SIZE = 100
MAX_INIT_TREE_DEPTH = 20
MIN_INIT_TREE_DEPTH = 25

MAX_GENERATIONS = 100
P_CROSSOVER = 0.5
P_MUTATION = 0.01
ELITE_SIZE = 1
HALLOFFAME_SIZE = 4

random_initilisation = False #put True if you use random initialisation

CODON_CONSUMPTION = 'lazy'
GENOME_REPRESENTATION = 'list'
MAX_GENOME_LENGTH = None#'auto'

MAX_TREE_DEPTH = 40 #equivalent to 17 in GP with this grammar
MAX_WRAPS = 0
CODON_SIZE = 255


REPORT_ITEMS = [
    'gen', 'invalid', 'avg', 'std', 'min', 'max', 
    'fitness_test',
    'best_ind_length', 'avg_length', 
    'best_ind_nodes', 'avg_nodes', 
    'best_ind_depth', 'avg_depth', 
    'avg_used_codons', 'best_ind_used_codons', 
    'structural_diversity',
    'selection_time', 'generation_time'
]

### Initialize population

In [151]:
population = toolbox.populationCreator(
    pop_size=POPULATION_SIZE,
    bnf_grammar=BNF_GRAMMAR,
    min_init_depth=MIN_INIT_TREE_DEPTH,
    max_init_depth=MAX_INIT_TREE_DEPTH,
    codon_size=CODON_SIZE,
    codon_consumption=CODON_CONSUMPTION,
    genome_representation=GENOME_REPRESENTATION
)

In [152]:
hof = tools.HallOfFame(HALLOFFAME_SIZE)
    
stats = tools.Statistics(key=lambda ind: ind.fitness.values)
stats.register("avg", np.nanmean)
stats.register("std", np.nanstd)
stats.register("min", np.nanmin)
stats.register("max", np.nanmax)

population, logbook = algorithms.ge_eaSimpleWithElitism(
    population, toolbox, cxpb=P_CROSSOVER, mutpb=P_MUTATION,
    ngen=MAX_GENERATIONS, elite_size=ELITE_SIZE,
    bnf_grammar=BNF_GRAMMAR,
    codon_size=CODON_SIZE,
    max_tree_depth=MAX_TREE_DEPTH,
    max_genome_length=MAX_GENOME_LENGTH,
    points_train=[X_train, y_train],
    points_test=[X_test, y_test],
    codon_consumption=CODON_CONSUMPTION,
    report_items=REPORT_ITEMS,
    genome_representation=GENOME_REPRESENTATION,
    stats=stats, halloffame=hof, verbose=False
)

gen = 0 , Best fitness = (-3.686046943896157,)
gen = 1 , Best fitness = (-3.62663708604651,) , Number of invalids = 0
gen = 2 , Best fitness = (-3.62663708604651,) , Number of invalids = 0
gen = 3 , Best fitness = (-2.7133481751652107,) , Number of invalids = 0
gen = 4 , Best fitness = (-0.7180304462053289,) , Number of invalids = 0
gen = 5 , Best fitness = (-0.04707112632929289,) , Number of invalids = 0
gen = 6 , Best fitness = (-0.04707112632929289,) , Number of invalids = 0
gen = 7 , Best fitness = (-0.04707112632929289,) , Number of invalids = 0
gen = 8 , Best fitness = (-0.04707112632929289,) , Number of invalids = 0
gen = 9 , Best fitness = (-0.04704042631984984,) , Number of invalids = 0
gen = 10 , Best fitness = (-0.04581271054919944,) , Number of invalids = 0
gen = 11 , Best fitness = (-0.04581247348554651,) , Number of invalids = 0
gen = 12 , Best fitness = (-0.04580964343112548,) , Number of invalids = 0
gen = 13 , Best fitness = (-0.015974667252808494,) , Number of invalid

In [153]:
for p in population:
    print(p.phenotype, p.fitness)

add(sub(add(pdiv(mul(add(sub(add(x.T[3],60.66),sub(sub(x.T[1],x.T[5]),x.T[7])),pdiv(sub(mul(mul(mul(mul(mul(x.T[8],sub(sub(x.T[6],36.87),62.01)),48.62),98.73),sub(x.T[7],mul(x.T[2],mul(x.T[6],x.T[4])))),pdiv(x.T[3],30.36)),sub(sub(x.T[4],58.16),mul(x.T[8],x.T[4]))),add(add(mul(pdiv(pdiv(add(x.T[3],x.T[2]),68.67),add(sub(sub(add(add(x.T[8],89.71),x.T[2]),51.03),69.87),64.20)),x.T[6]),sub(sub(x.T[4],58.16),mul(x.T[8],x.T[4]))),add(mul(mul(pdiv(pdiv(add(x.T[3],x.T[2]),68.67),add(sub(sub(add(add(x.T[8],89.71),x.T[2]),91.25),x.T[9]),x.T[3])),21.82),add(x.T[7],85.78)),sub(sub(x.T[6],69.87),54.62))))),31.92),add(sub(pdiv(x.T[5],x.T[5]),x.T[8]),sub(sub(x.T[3],69.87),64.20))),x.T[6]),sub(sub(x.T[4],58.16),mul(x.T[8],x.T[4]))),add(add(mul(pdiv(pdiv(add(x.T[3],x.T[2]),68.67),add(sub(sub(add(add(x.T[8],89.71),x.T[2]),51.06),x.T[7]),x.T[0])),x.T[3]),24.51),27.68)) (-0.5033401261707375,)
add(sub(add(pdiv(mul(add(sub(add(x.T[3],49.36),sub(sub(x.T[5],58.46),x.T[5])),x.T[9]),x.T[3]),21.82),mul(x.T[7],8

### Testing

In [154]:
best = hof.items[0].phenotype
print("Best individual: \n","\n".join(textwrap.wrap(best,80)))
print("\nTraining Fitness: ", hof.items[0].fitness.values[0])
print("Test Fitness: ", fitness_eval(hof.items[0], [X_test, y_test])[0])
print("Depth: ", hof.items[0].depth)
print("Length of the genome: ", len(hof.items[0].genome))
print(f'Used portion of the genome: {hof.items[0].used_codons/len(hof.items[0].genome):.2f}')

Best individual: 
 add(sub(add(pdiv(mul(add(sub(add(x.T[3],60.66),sub(sub(x.T[1],58.76),mul(x.T[8],
x.T[4]))),add(sub(mul(sub(pdiv(add(x.T[3],x.T[2]),68.77),add(sub(mul(add(pdiv(x.
T[8],39.71),x.T[2]),35.06),x.T[6]),x.T[4])),pdiv(x.T[3],30.36)),sub(sub(x.T[4],5
8.16),mul(x.T[8],x.T[4]))),add(add(mul(pdiv(pdiv(add(x.T[3],x.T[0]),68.67),add(s
ub(sub(add(add(x.T[8],89.71),x.T[2]),51.06),x.T[7]),x.T[0])),x.T[3]),24.51),31.6
8))),mul(x.T[2],89.51)),21.62),mul(x.T[7],85.78)),sub(sub(x.T[6],53.87),64.62)),
36.14)

Training Fitness:  0.4076133853026691
Test Fitness:  0.1787296239676761
Depth:  36
Length of the genome:  2098
Used portion of the genome: 0.10
