In [28]:
import random
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris

In [89]:
class GASearhCV:
    
    def __init__(self, model,param_grid,num_generations=50,scoring='accuracy',population_size=100, mutation_rate=0.05,elitism=False,elitism_size=0,tournament_size=5,cv=5):
        self.model=model
        self.X=None
        self.y=None
        self.population_size = population_size
        self.param_grid = param_grid
        self.scoring = scoring
        self.mutation_rate = mutation_rate
        self.population = [self.generate_random_parameters() for _ in range(self.population_size)]
        self.elitism = elitism
        self.tournament_size = tournament_size
        self.cv=cv
        self.elitism_size=elitism_size
        self.best_params=None
        self.best_score=None
        self.num_generations=num_generations
        
    def generate_random_parameters(self):
        return {param : random.choice(value_list) for param,value_list in self.param_grid.items()}
    
    def fit(self,X,y):
        self.X=X
        self.y=y
        
        new_population=[self.generate_random_parameters() for _ in range(self.population_size)]
        fitness_score = dict()
        
        if self.elitism and (self.population_size - self.elitism_size) % 2 == 1:
            self.elitism_size += 1
            
        for p in self.population:
            fitness_score[str(p)]=self.fitness(p)
            
        for _ in range(self.num_generations):
            if self.elitism:
                self.population.sort(key=lambda x: fitness_score[str(x)], reverse=True)
                new_population[:self.elitism_size] = self.population[:self.elitism_size]
                
            for i in range(self.elitism_size, self.population_size, 2):
                parent1_idx = self.selection(self.population, self.tournament_size,fitness_score)
                parent2_idx = self.selection(self.population, self.tournament_size,fitness_score)
            
                self.crossover(self.population[parent1_idx],
                          self.population[parent2_idx],
                          new_population[i],
                          new_population[i+1])
            
                self.mutation(new_population[i])
                self.mutation(new_population[i+1])
            
                fitness_score[str(new_population[i])] = self.fitness(new_population[i])
                fitness_score[str(new_population[i+1])] = self.fitness(new_population[i+1])
        
            self.population[:] = new_population[:]
            
        self.best_params = max(self.population, key=lambda x: fitness_score[str(x)])
        self.best_score = fitness_score[str(self.best_params)]
                
    def selection(self,population, tournament_size,fitness_score):
        chosen = random.sample(population, tournament_size)
        max_fitness = float('-inf')
        best_idx = -1
        for i,p in enumerate(chosen):
            if fitness_score[str(p)] > max_fitness:
                max_fitness = fitness_score[str(p)]
                best_idx = i
        return best_idx
    
    def mutation(self,parameters): 
        for param,values in self.param_grid.items():
            if random.random() < self.mutation_rate:
                parameters[param]=random.choice(values)
    
    def crossover(self,parent1, parent2, child1, child2):
        for param,value in parent1.items():
            if random.random() < 0.5:
                child1[param]=value
                child2[param]=parent2[param]
            else:
                child2[param]=value
                child1[param]=parent1[param]
    
    def fitness(self,params):
        self.model.set_params(**params)
        scores = cross_val_score(self.model, self.X, self.y, cv=self.cv, scoring=self.scoring)
        return scores.mean()

In [90]:
param_grid={
    'max_depth' : [4,5,6],
    'criterion' : ['gini','entropy']
}

In [91]:
iris = load_iris()
X = iris.data
y = iris.target

In [92]:
model=DecisionTreeClassifier()

In [93]:
ga=GASearhCV(model,param_grid=param_grid,num_generations=20,population_size=30,elitism=True,elitism_size=5)

In [94]:
ga.fit(X,y)

In [95]:
ga.best_params

{'max_depth': 6, 'criterion': 'gini'}