In [18]:
#import packages for data (pandas/numpy), optimization (gurobipy), gradient boost(xgbm,lgbm), explainability (shap)
import pandas as pd
import numpy as np
from gurobipy import *
import lightgbm as lgb
import xgboost as xgb
import shap


This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [19]:
#import data for LGBM/XGBM
TrainData = pd.read_excel("P_C_BOP_CleanInsureData.xlsx", sheet_name = "Train_Set", index_col=0)
TestData = pd.read_excel("P_C_BOP_CleanInsureData.xlsx", sheet_name = "Test_Set", index_col=0)


In [20]:
TrainData.head(3)

Unnamed: 0_level_0,Prod_ID,State,Agency_Yr,Agency_Life,Employ_Cnt,Max_Age,Min_Age,Prd_Ernd_Prem,3Yr_LossRatio,Prd_Incrd_Loss,Loss_Ratio
AgencyUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,BOP,IN,1998,22,56,87,36,570.36,0.285437,0.0,0.0
2,BOP,IN,1970,50,14,89,32,2842.47,1.215365,0.0,0.0
3,BOP,IN,1970,50,14,89,32,2868.84,0.438894,1381.46,0.48154


In [21]:
#features defined for Train Set
TrainFeatures=TrainData.loc[:, 'Agency_Life':'3Yr_LossRatio']
#predictor defined for Train Set
TrainOutput=TrainData['Loss_Ratio']
#features defined for Test Set
TestFeatures=TestData.loc[:, 'Agency_Life':'3Yr_LossRatio']
#predictor defined for Test Set
TestOutput=TestData['Loss_Ratio']

In [23]:
#define early params
AvgError = 0
TotalError = 0
AvgErrorHold = 10000
TotalErrorHold = 10000
colsampleshold = .03 
learnrateshold = .1
maxdepthshold = 10
alphashold = 10
estimatorshold = 5

#set up lists for hyperparameter ranges
colsamples = [x * 0.005 for x in range(3, 10)] #colsamplebytree.... .005,.01,.015,.02 up to  .1
learnrates = [x * 0.005 for x in range(15, 25)]#learningrate.... .005, .01, .015, .02 up to .50
maxdepths = [x * 1 for x in range(3, 18)] #maxdepth... 3,4,5,6 up to 100
alphas = [x * 1 for x in range(3, 18)] #alpha... 1,2,3,4 up to 100
estimators = [x * 1 for x in range(1, 10)] #n_estimators... 1,2,3,4 up to 100 


#colsample_bytree = .03
#learning_rate = 0.1x * 1 for x in range(3, 101)
#max_depth = 10
#alpha = 10
#n_estimators = 5

#brute-force loop for searching hyperparameter space
for colsample in colsamples:
    for learnrate in learnrates:
        for maxdepth in maxdepths:
            for alpha in alphas:
                for estimator in estimators:
                        #implement model
                        Insur_LossRat_Predict = xgb.XGBRegressor(objective ='reg:squarederror', colsample_bytree = colsample, learning_rate = learnrate, max_depth = maxdepth, alpha = alpha, n_estimators = estimator)
                        #train model
                        Insur_LossRat_Predict.fit(TrainFeatures,TrainOutput)
                        #test model
                        Predictions = Insur_LossRat_Predict.predict(TestFeatures)
                        #determine error on iteration
                        TotalError=0
                        for i in range(len(Predictions)):
                            Error = abs(Predictions[i]-TestOutput.iloc[i])
                            TotalError = TotalError+Error
                            Denominator = len(Predictions)
                            AvgError=TotalError/Denominator
                        #compare to prior best error
                        #save values in parent variable if superior
                        if AvgError < AvgErrorHold:
                            if TotalError < TotalErrorHold:
                                AvgErrorHold = AvgError
                                TotalErrorHold = TotalError
                                colsampleshold = colsample
                                learnrateshold = learnrate
                                maxdepthshold = maxdepth
                                alphashold = alpha 
                                estimatorshold = estimator
#print best results
print ('Average Error: %d' % AvgErrorHold)
print ('Total Error  : %d' % TotalErrorHold)
print ('Col Samples  : %d' % colsampleshold)
print ('Learn Rate   : %d' % learnrateshold)
print ('Max Depth    : %d' % maxdepthshold)
print ('Alpha        : %d' % alphashold)
print ('Estimators   : %d' % estimatorshold)
                                
                
            
    


Average Error: 1
Total Error  : 245
Col Samples  : 0
Learn Rate   : 0
Max Depth    : 14
Alpha        : 3
Estimators   : 1


[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
