In [1]:
# general
import os,sys,inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

import numpy as np

# validation
from sklearn.model_selection import cross_validate

# modleing
from sklearn.ensemble import RandomForestClassifier

# bayesian optimization
from skopt import BayesSearchCV

# genetic algorithm optimization
from pygad import GA

# custom
import src.custom_exceptions

class Tuner():

    def __init__(self, model, x, y, params=None):
        self.model = model
        self.x = x
        self.y = y
        self.modelName = type(model).__name__
        self.choose_model_params(self.modelName)
        self.params = params
        global X
        global Y
        global CV
        X = self.x
        Y = self.y
        CV = self.cv


    def choose_model_params(self, modelType):
        if modelType == 'RandomForestClassifier':
            self.space = {
                         'n_estimators':(10, 500),
                         'max_depth':(10, 100),
                         'min_samples_leaf':(1, 100),
                         'min_samples_split':(2, 25),
                         'max_features':['auto', 'sqrt', 'log2'] }
            self.cv = 3
            self.n_iter = 32

    def tune(self, optMethod):
        if optMethod==None:
            optMethod = self.optMethod

        if optMethod == 'bayes':
            opt = self.bayes_opt()
        elif optMethod == 'genetic':
            opt = self.genetic_opt(self.params)
        else:
            opt = None
            raise custom_exceptions.InvalidTuningMethod(optMethod)
        return opt

    def bayes_opt(self):
        opt = BayesSearchCV(self.model, self.space, self.n_iter, self.cv)
        print(self.x)
        print(self.y)
        print(type(self.x))
        print(type(self.y))
        print(self.space)
        opt.fit(self.x, self.y)
        return opt

    def genetic_opt(self, params=None):
        fitnessFunc, geneSpace = self.choose_fitness(self.modelName)
        if params==None:
            cardinality = len(geneSpace)
            params = self.choose_genetic_params(cardinality)
        opt = GA(fitness_func=fitnessFunc,
                 gene_space=geneSpace,
                 **params)
        opt.run()
        return opt

    def choose_genetic_params(self, cardinality):

        if self.modelName == 'RandomForestClassifier':
            params = {
                'sol_per_pop':4,
                'num_genes':cardinality,
                'num_generations':10,
                'num_parents_mating':3,
                'parent_selection_type':"sss",
                'keep_parents':1,
                'crossover_type':"single_point",
                'mutation_type':"random",
                'mutation_percent_genes':20}
        return params


    def choose_fitness(self, modelName):
        if modelName == 'RandomForestClassifier':
            fitnessFunc = random_forest_fitness
            geneSpace = []
        for k in self.space:
            s = self.space[k]
            stype = type(s[0])
            if stype==int:
                geneSpace.append(s)
        return fitnessFunc, geneSpace

def random_forest_fitness(solution, solution_idx):
    # TODO: integrate genetic opt with common bayes optimization api
    rfc = RandomForestClassifier(n_estimators=int(solution[0]),
                                 max_depth=int(solution[1]),
                                 min_samples_leaf=int(solution[2]),
                                 min_samples_split=int(solution[3]),
                                oob_score=True)
    oobScores = []
    for i in range(CV):
        rfc.fit(X, Y)
        oobScores.append(rfc.oob_score_)
    fitness = np.mean(oobScores)
    return fitness

if __name__ == "__main__":
    pass

In [5]:
# data
import pandas as pd
import numpy as np
# modeling
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


In [6]:
data = load_iris()
x = pd.DataFrame(data['data'],columns=data['feature_names'])
y = data['target']
xtrn, xtst, ytrn, ytst = train_test_split(x,y,test_size=0.2)
xtrn = xtrn.to_numpy()


In [9]:
rfc = RandomForestClassifier()
t = Tuner(rfc, xtrn, ytrn)
t.cv = 3
t.n_iter = 4
print(type(t.cv))

<class 'int'>


In [28]:
doesntspace = {'n_estimators':(10, 500),
                 'max_depth':(10, 100),
                 'min_samples_leaf':(1, 100),
                 'min_samples_split':(2, 25),
                 'max_features':['auto', 'sqrt', 'log2'] 
}

In [18]:
cv = 3
n_iter=4

In [26]:
space = {  'n_estimators':(10, 500),
    'max_depth':(10, 100),
    'min_samples_leaf':(1, 100),
    'min_samples_split':(2, 25),
    'max_features':['auto', 'sqrt', 'log2']
    }

In [29]:
opt = BayesSearchCV(
    rfc,doesntspace,
    n_iter=3,
    cv=3
)
opt.fit(xtrn, ytrn)

BayesSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=3,
              search_spaces={'max_depth': (10, 100),
                             'max_features': ['auto', 'sqrt', 'log2'],
                             'min_samples_leaf': (1, 100),
                             'min_samples_split': (2, 25),
                             'n_estimators': (10, 500)})

In [30]:
opt = BayesSearchCV(rfc, space, n_iter, cv)
opt.fit(xtrn, ytrn)

TypeError: 'int' object is not iterable

In [8]:
opt = t.tune('bayes')

[[4.9 3.6 1.4 0.1]
 [6.2 2.9 4.3 1.3]
 [6.8 2.8 4.8 1.4]
 [5.9 3.  5.1 1.8]
 [6.5 3.2 5.1 2. ]
 [6.3 2.3 4.4 1.3]
 [4.9 3.  1.4 0.2]
 [6.3 2.8 5.1 1.5]
 [4.6 3.6 1.  0.2]
 [7.2 3.2 6.  1.8]
 [5.7 2.6 3.5 1. ]
 [5.1 3.8 1.5 0.3]
 [6.3 3.3 6.  2.5]
 [6.7 2.5 5.8 1.8]
 [6.3 2.5 4.9 1.5]
 [7.7 3.8 6.7 2.2]
 [7.2 3.  5.8 1.6]
 [5.4 3.4 1.5 0.4]
 [6.1 2.8 4.  1.3]
 [5.7 3.8 1.7 0.3]
 [5.7 2.5 5.  2. ]
 [5.5 2.4 3.8 1.1]
 [5.4 3.7 1.5 0.2]
 [5.  2.  3.5 1. ]
 [6.5 3.  5.8 2.2]
 [7.3 2.9 6.3 1.8]
 [5.1 2.5 3.  1.1]
 [4.8 3.1 1.6 0.2]
 [5.5 4.2 1.4 0.2]
 [7.7 2.8 6.7 2. ]
 [4.8 3.  1.4 0.1]
 [4.4 2.9 1.4 0.2]
 [6.1 3.  4.9 1.8]
 [5.1 3.7 1.5 0.4]
 [4.4 3.  1.3 0.2]
 [5.8 4.  1.2 0.2]
 [5.8 2.7 5.1 1.9]
 [6.7 3.1 4.4 1.4]
 [6.9 3.1 5.1 2.3]
 [5.7 4.4 1.5 0.4]
 [5.  3.4 1.6 0.4]
 [6.7 3.1 4.7 1.5]
 [7.2 3.6 6.1 2.5]
 [6.5 3.  5.5 1.8]
 [5.  3.  1.6 0.2]
 [6.1 2.8 4.7 1.2]
 [6.8 3.2 5.9 2.3]
 [5.8 2.8 5.1 2.4]
 [6.7 3.  5.  1.7]
 [6.2 2.2 4.5 1.5]
 [4.5 2.3 1.3 0.3]
 [5.6 3.  4.5 1.5]
 [4.9 3.1 1.

TypeError: 'int' object is not iterable