# Symbolic Regression through Genetic Programming 
## Importing relevant packages

In [1]:
import math, random, operator
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from deap import algorithms, base, creator, tools, gp

## Data Prep
### Pulling Data In

In [2]:
data1 = pd.read_csv("data1.csv")
data2 = pd.read_csv("data2.csv")

### Splitting Data

In [3]:
train1, test1 = train_test_split(data1, test_size=0.2, shuffle=True)
train2, test2 = train_test_split(data2, test_size=0.2, shuffle=True)

## Defining Operations

In [4]:
def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1
def create_pset(data_set):
    n_var = len(data_set.columns) - 1
    bounds = max(data_set['y'])
    pset = gp.PrimitiveSet("MAIN", n_var)
    pset.addPrimitive(operator.add, 2)
    pset.addPrimitive(operator.sub, 2)
    pset.addPrimitive(operator.pow, 2)
    pset.addPrimitive(operator.mul, 2)
    pset.addPrimitive(protectedDiv, 2)
    pset.addPrimitive(operator.neg, 1)
    pset.addPrimitive(math.cos, 1)
    pset.addPrimitive(math.sin, 1)    
    pset.addEphemeralConstant("rand2", lambda: random.randint(-2,2))
    pset.renameArguments(ARG0='x')

pset = create_pset(test1)

## Fitness Function

In [5]:
def evalSymbReg(individual, points):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    y_set = points['y'].to_numpy()
    x_set = points[points.columns.difference(['y'])].to_numpy()

    # Evaluate the mean squared error between the expression
    # and the real function : x**4 + x**3 + x**2 + x
    sqerrors = ((func(*list(x)) - y)**2 for x,y in zip(x_set, y_set))
    return math.fsum(sqerrors) / len(points),

## Configuring our GP

In [6]:
## Fitness function
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

toolbox.register("evaluate", evalSymbReg, points = train1)

#deap.tools.selDoubleTournament(individuals, k, fitness_size, parsimony_size, fitness_first, fit_attr='fitness')
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)

stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)

pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 100, stats=mstats,
                               halloffame=hof, verbose=True)
# print log
return pop, log, hof

AttributeError: 'NoneType' object has no attribute 'ret'