In [559]:
import warnings
warnings.filterwarnings('ignore')

In [560]:
import math
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle, randint
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import lightgbm as lgb

In [561]:
df = pd.read_csv('Breast_cancer_data.csv')
print(df.head())
print(df.shape)

   mean_radius  mean_texture  mean_perimeter  mean_area  mean_smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   diagnosis  
0          0  
1          0  
2          0  
3          0  
4          0  
(569, 6)


In [562]:
class GAHyperOpt:
    def __init__(self, features, target, data, params, model, generations, populationSize, mutationPercentage, elites):
        self.X = df[features]
        self.y = df[target]
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size = 0.3)
        self.model=model
        self.params=params
        self.currPopulation=[]
        self.nextPopulation=[]
        self.generations=generations
        self.accArr=[]
        self.populationSize=populationSize
        self.mutationPercentage=mutationPercentage
        self.elites=elites
    
    def createPopulation(self):
        for childNum in range(self.populationSize):
            myParams={}
            for i in self.params:
                decider=np.random.uniform(0,1)
                paramValue=i[1]+(decider*(i[2]-i[1]))
                if type(i[1])==float:
                    paramValue=round(paramValue, 2)
                elif type(i[1])==int:
                    paramValue=int(paramValue)
                myParams[i[0]]=paramValue
            self.currPopulation.append([myParams,self.fitness(myParams)])
        self.sortCurrPopulation()
#         print("self.currPopulation",self.currPopulation)
        
    def fitness(self,params):
        self.model.set_params(**params)
        self.model.fit(self.X_train, self.y_train)
        self.y_pred=self.model.predict(self.X_test)
        return accuracy_score(self.y_pred, self.y_test)
            
    def mutate(self,offspring):
        probability=100-self.mutationPercentage
        decider=randint(0,100)
        if decider >= probability:
#             print("mutating!")
            sign=np.random.choice([-1,1])
            pc=randint(0,len(self.params)-1)
            paramchoice=self.params[pc][0]
            currValue=offspring[paramchoice]
            decider2=np.random.uniform(0,1)
            paramValue=(sign*decider2*(self.params[pc][2]-self.params[pc][1]))+currValue
            if type(self.params[pc][1])==float:
                paramValue=min(max(round(paramValue, 2),self.params[pc][1]),self.params[pc][2])
            elif type(self.params[pc][1])==int:
                paramValue=min(max(int(paramValue),self.params[pc][1]),self.params[pc][2])
            offspring[paramchoice]=paramValue
        return offspring
                                        
    def sortCurrPopulation(self):
        self.currPopulation=sorted(self.currPopulation, key=lambda x: x[1], reverse=True)
        
    def sortNextPopulation(self):
        self.nextPopulation=sorted(self.nextPopulation, key=lambda x: x[1], reverse=True)
        
    def elitism(self):
        self.sortCurrPopulation()
        for i in range(self.elites):
            self.nextPopulation.append(self.currPopulation[i][:2].copy())
        
    def makeCrossoverProbabilities(self):
        sum=0
        for child in self.currPopulation:
            sum=sum+child[1]
        multiplier=360/sum
        cumulativeAngle=0
        for child in self.currPopulation:
            child.append(cumulativeAngle)
            cumulativeAngle=(child[1]*multiplier)+cumulativeAngle
            child.append(cumulativeAngle)
    
    def crossover(self):
        if(self.currPopulation[0][0]==self.currPopulation[-1][0]):
            print("Generation converged on following hyperparameters :",self.currPopulation[0][0])
        while(len(self.nextPopulation) != self.populationSize):
            decider1=randint(0,360)
            decider2=randint(0,360)
            parent1={}
            parent2={}
            offspring={}
            for child in self.currPopulation:
                if decider1>child[2] and decider1<=child[3]:
                    parent1=child[0].copy()
                if decider2>child[2] and decider2<=child[3]:
                    parent2=child[0].copy()
            if parent1!=parent2 and parent1!={} and parent2!={}:
#                 print("Crossover!")
#                 print("self.currPopulation",self.currPopulation)
#                 print("decider1",decider1)
#                 print("decider2",decider2)
#                 print("parent1",parent1)
#                 print("parent2",parent2)
                for param in self.params:
                    parentChoice=np.random.choice([1,2])
                    if parentChoice==1:
                        offspring[param[0]]=parent1[param[0]]
                    else:
                        offspring[param[0]]=parent2[param[0]]
                mutatedOffspring=self.mutate(offspring)
                newChild=[mutatedOffspring,self.fitness(mutatedOffspring)]
                self.nextPopulation.append(newChild)
        self.sortNextPopulation()
    
    def runGA(self):
        self.createPopulation()
        print("Current Population:",self.currPopulation)
        for i in range(self.generations):
            print("Generation ",i)
            self.makeCrossoverProbabilities()
            self.elitism()
            self.crossover()
            self.currPopulation=self.nextPopulation.copy()
            self.nextPopulation=[]
        print("Current Population:",self.currPopulation)
            
                

In [563]:
features=['mean_radius','mean_texture','mean_perimeter','mean_area','mean_smoothness']
target=['diagnosis']

params=[["num_leaves",2,500],
    ["max_depth",1,50],
    ["learning_rate",0.01,5],
    ["n_estimators",1,1000],
    ["colsample_bytree",0.01,1]]

myGenerations, myPopulationSize, myMutationPercentage, myElites = 50, 5, 3, 1

#               (features, target, data, params, model, generations, populationSize, mutationPercentage, elites)
myObj=GAHyperOpt(features, target, df, params, lgb.LGBMClassifier(), myGenerations, myPopulationSize, myMutationPercentage, myElites)

In [564]:
myObj.runGA()

Current Population: [[{'num_leaves': 184, 'max_depth': 15, 'learning_rate': 0.39, 'n_estimators': 111, 'colsample_bytree': 0.53}, 0.9181286549707602], [{'num_leaves': 338, 'max_depth': 35, 'learning_rate': 1.06, 'n_estimators': 82, 'colsample_bytree': 0.47}, 0.9181286549707602], [{'num_leaves': 436, 'max_depth': 31, 'learning_rate': 4.87, 'n_estimators': 188, 'colsample_bytree': 0.57}, 0.9122807017543859], [{'num_leaves': 409, 'max_depth': 28, 'learning_rate': 2.81, 'n_estimators': 482, 'colsample_bytree': 0.54}, 0.8421052631578947], [{'num_leaves': 388, 'max_depth': 38, 'learning_rate': 2.85, 'n_estimators': 380, 'colsample_bytree': 0.06}, 0.47953216374269003]]
Generation  0
Generation  1
Generation  2
Generation  3
Generation  4
Generation  5
Generation  6
Generation  7
Generation  8
Generation  9
Generation  10
Generation  11
Generation  12
Generation  13
Generation  14
Generation  15
Generation  16
Generation  17


KeyboardInterrupt: 