# Problem Statement : Predict Customer Credit Risk Flag .

# Imports

In [1]:
#general
import os
import numpy as np
import gc
import sys
import pandas as pd
import time


#preprocessing
from category_encoders import CatBoostEncoder
from sklearn.preprocessing import OneHotEncoder as ohe
from sklearn.model_selection import StratifiedKFold,train_test_split
from sklearn.preprocessing import StandardScaler as Scaler

#feature selection:
from sklearn.feature_selection import mutual_info_classif as mic

#Minority Oversampling
from imblearn.over_sampling import SMOTE


#modelling
#metrics
from sklearn.metrics import roc_auc_score as auc
from catboost import CatBoostClassifier as cb

#hyperparam optimization
import optuna
from optuna import Trial

#thresholding
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import confusion_matrix

import warnings 
warnings.filterwarnings('ignore')

# Loading data

In [2]:
train=pd.read_csv('../input/ghf1-hackathon/Training Data.csv')
test=pd.read_csv('../input/ghf1-hackathon/Test Data.csv')
sample_sub=pd.read_csv('../input/ghf1-hackathon/Sample Prediction Dataset.csv')

# Preprocessing

In [3]:
train.reset_index(inplace=True,drop=True)
test.reset_index(inplace=True,drop=True)

y=train.pop('risk_flag')

train_ids=train.pop('Id')
test_ids=test.pop('id')

**Mapping Categorical Features to relevant Values**

In [4]:
def mapping_features(train,test):
    car_ownership={'yes':1,
                    'no':0}
    
    married={'single':0,
             'married':1}
    
    house_own={'rented':1,
             'norent_noown':0,
             'owned':2}

    
    train['car_ownership'].replace(car_ownership,
                               inplace=True)
    test['car_ownership'].replace(car_ownership,
                               inplace=True)

    train['married'].replace(married,
                         inplace=True)
    test['married'].replace(married,
                         inplace=True)

    
    
    train['house_ownership'].replace(house_own,
                                 inplace=True)
    test['house_ownership'].replace(house_own,
                                 inplace=True)
    
    
    return train,test



train,test=mapping_features(train,
                            test)

**Convert Location Address to Coordinates**

In [5]:
from geopy.geocoders import Nominatim
geocoder= Nominatim(user_agent="myGeocoder")


def get_coordinates(address,geocoder=geocoder):
    
    loc=geocoder.geocode(address)
    
    
    #added sleep because API allows only one call per sec
    time.sleep(1)
    return loc

get_coordinates('Fergusson college')

Location(Fergussson College, Dhondiba Gunaji Bane Path, Model Colony, Pune City, Pune District, Maharashtra, 411004, India, (18.524148500000003, 73.8385914880175, 0.0))

In [6]:
def city_features(row):
    '''Mark capital cities and cities of economic Importance'''
    
    row['city']=row['city'].split('[')[0]
    row['state']=row['state'].lower()
    
    
    # adding features based on location:
    
    of_economic_imp=['Chennai','Kalyan-Dombivli','Hyderabad','Malegaon','Mumbai',
                     'Delhi_city','Bangalore','New_Delhi','Mira-Bhayandar','Navi_Mumbai'
                    'Vasai-Virar','Kolkata','Mangalore','Jabalpur','Thane','Pune',
                    'Ambarnath','Panvel','Secunderabad','Noida','Visakhapatnam',
                    'Jamshedpur','Pimpri-Chinchwad','Ahmedabad','Surat','Jaipur',
                    'Nagpur','Indore','Patna']
    
    capital_cities=['Bhopal','Pondicherry','Chennai','Shimla','Hyderabad','Aizawl',
                    'Mumbai','Kochi','Srinagar','Patna','Delhi_city',
                    'Chandigarh_city','Jammu','Lucknow','Bangalore','New_Delhi',
                   'Imphal','Kolkata','Gangtok','Dehradun','Aizawl','Raipur',
                   'Ranchi','Thiruvananthapuram','Gandhinagar','Shillong','Kohima',
                   'Itanagar','Dispur','Panaji','Jaipur','Agartala']
    
    
    row['capital']=int(str(row['city']) in capital_cities)
    row['econ_imp']=int(str(row['city']) in of_economic_imp)
    
    return row
    

In [7]:
def location_data(train,test,y):
    '''Features from Location Data'''
    
    
    #city features
    train=train.apply(lambda x: city_features(x),axis=1)
    test=test.apply(lambda x: city_features(x),axis=1)
    
    
#     # location
    train['location']=train.city +','+ train.state
    test['location']=test.city + ','+ train.state
    
    train.drop(['state','city'],axis=1,inplace=True)
    test.drop(['state','city'],axis=1,inplace=True)
    
    enc=CatBoostEncoder(cols=['location'])
    
    enc.fit(train,y)
    
    train=enc.transform(train)
    test=enc.transform(test)
   
    return train,test
    
    

In [8]:
%%time
train,test=location_data(train,test,y)

CPU times: user 7min 46s, sys: 1.99 s, total: 7min 48s
Wall time: 7min 48s


**NOTE: The P in  'P(feature)' denote possibly, as I am using crude hueristics to make these features**

In [9]:
def ratio_features(row):
    '''features created out of ratios of given features'''
    
    #income to experience ratio
    if row['experience']!=0:
        row['income_exp_ratio']=row['income']/row['experience']
        row['age_exp_ratio']=row['age']/row['experience']
        
    else:
        row['income_exp_ratio']=row['income']/2
        row['age_exp_ratio']=row['age']/2
    
    #income to current job years ratio
    if row['current_job_years']!=0:
        row['income_joby_ratio']=row['income']/row['current_job_years']
        row['age_curjob_ratio']=row['age']/row['current_job_years']
        
    else:
        row['income_joby_ratio']=row['income']
        row['age_curjob_ratio']=row['age']
        
    return row     
    
def other_work_exp(row):
    '''Probable other work exp based on age and given experience'''
    
    #probable other work exp     
    #considering 25 to be the average age of starting earning:    
          
    if ((row['age']>35 and row['age']<45) and row['experience']<10):
        row['P(oth_work)']=row['age']-(25+row['experience'])
        
    elif ((row['age']>45 and row['age']<60) and row['experience']<15):
        row['P(oth_work)']=row['age']-(25+row['experience'])
    
    elif ((row['age']>65) and row['experience']<20):
        row['P(oth_work)']=row['age']-(25+row['experience'])
        
    elif ((row['age']>30 and row['age']<35)and row['experience']<4):
        row['P(oth_work)']=max(row['age']-(25+row['experience']),2)
        
    else:
        # about 2 years of workexp is req for loans
        row['P(oth_work)']=2        

    return row

def emi_features(row):
    
    '''guess around if a person pays emi'''
    # considering 15 years to be average repayment tenure and 
    # assuming person is paying for the house he owns.
    row['P(pays_house_emi)'] = int((row['house_ownership']==2) and (row['current_house_years']<=15))
    
    #considering people above 50 probably would not buy car with a EMI,
    #also assuming people without family wont buy a car on EMI
    row['P(pays_car_emi)']  = int((row['car_ownership']==1) and \
                                  (row['age']<55) and \
                                  (row['married']==1))
    return row
    
    
def create_features(df1):
    
    '''create some features from existing features'''
    
    
    df=df1.copy(deep=True)  
    
    #ratio features
    df=df.apply(lambda x:ratio_features(x),axis=1)
    
    #age income ratio
    df['income_age_ratio']=df['income']/df['age']

    #documented other exp
    df['other_exp']=df['experience']-df['current_job_years']
    
    #probable other exp based on age
    df=df.apply(lambda x: other_work_exp(x),axis=1)
    
    #probable total exp:
    df['P(total_exp)']=df['experience']+df['P(oth_work)']
    
    # in working age(18-65)
    df['working_age']=df['age'].apply(lambda x : int((x>18) and (x<65)))
    
    #income to probable total exp:
    df['P(income/total_exp)']=df['income'] /df['P(total_exp)']
    
    #pays house rent
    df['pays_rent']=df['house_ownership'].apply(lambda x: int(x==1))
    
    #Possibly pays EMIs
    df=df.apply(lambda x: emi_features(x),axis=1)
    
    return df
    

In [10]:
%%time
X=create_features(train)
X_test=create_features(test)

CPU times: user 26min 20s, sys: 3.85 s, total: 26min 24s
Wall time: 26min 24s


In [11]:
#there are ' ' in the test profession against the '_' in train profession

X_test['profession']=X_test['profession'].apply(lambda x: str(x).replace(' ','_'))

**Checking Feature Importances**

In [12]:
# def feature_selection(n_iters):
def feature_selector(n_iters=10):
    scores=[]
    n=0
    x=X.drop(['profession'],axis=1)
    
    while n<=n_iters:
        mir_data=mic(x,y)
        scores.append(mir_data)
        n+=1
        
    mean_score=np.mean(a=np.array(scores),axis=0)
    
    mutual_info=pd.Series(mean_score,index=x.columns).sort_values(ascending=False)
    
    return mutual_info

In [13]:
feature_selector(1)

income_age_ratio       0.160716
income_exp_ratio       0.160524
P(income/total_exp)    0.159919
income_joby_ratio      0.159785
income                 0.159388
pays_rent              0.040937
house_ownership        0.040604
working_age            0.026973
age_exp_ratio          0.022147
age_curjob_ratio       0.017372
location               0.009215
current_house_years    0.008334
car_ownership          0.005149
current_job_years      0.003848
P(oth_work)            0.003609
P(total_exp)           0.003176
experience             0.003011
other_exp              0.002808
age                    0.002388
married                0.001028
econ_imp               0.000531
capital                0.000402
P(pays_car_emi)        0.000267
P(pays_house_emi)      0.000000
dtype: float64

In [14]:
def job_segments(df):
    '''Cluster the jobs into super segments'''
    df1=df.copy(deep=True)
    
    def get_key(prof,d):
        for i,j in d.items():
            if  prof in j:
                return i
            
    
    
    prof_map={'Engineering':['Mechanical_engineer','Software_Developer',
                             'Technical_writer','Design_Engineer',
                            'Chemical_engineer','Biomedical_Engineer',
                            'Computer_hardware_engineer','Petroleum_Engineer',
                            'Engineer','Analyst','Web_designer','Civil_engineer',
                            'Industrial_Engineer','Financial_Analyst','Design_Engineer'],

                'Design':['Architect','Designer',
                        'Artist',
                        'Graphic_Designer','Fashion_Designer'],

                'Academic':['Librarian','Economist',
                         'Microbiologist','Geologist',
                          'Statistician','Scientist'],

                'Service':['Flight_attendant','Hotel_Manager',
                         'Secretary','Computer_operator',
                        'Technician','Drafter','Chef'],

                'Specialist':['Physician','Air_traffic_controller',
                            'Surveyor','Dentist','Chartered_Accountant',
                           'Aviator','Psychologist','Lawyer','Surgeon',
                            'Technology_specialist'],

                'Public':['Civil_servant','Police_officer',
                        'Politician','Magistrate',
                        'Firefighter','Official',
                       'Army_officer'],
                'Gig':['Comedian','Consultant']}

    
    
    for prof in df1['profession'].unique():
        
        sup_prof=get_key(prof=prof,
                        d=prof_map)
        
        
        df1.loc[df1['profession']==prof,'super_profession']=sup_prof
        
        
    return df1


        
X=job_segments(X)
X_test=job_segments(X_test)

In [15]:
# ratio of Positive and Negative Samples
y.value_counts(1)

0    0.877
1    0.123
Name: risk_flag, dtype: float64

In [16]:
categorical_columns=['profession','super_profession']

# Optuna Hyperparameter optimization

In [17]:
def objective(trial:Trial):
    
    #splitting training data 
    x_train,x_test,y_train,y_test=train_test_split(X,y,
                                                   random_state=7,
                                                   shuffle=True,
                                                   train_size=0.7,
                                                   stratify=y)
    
    #hyperparam_grid
    params={   'verbose'        : 0,
               'loss_function'  :'Logloss',
               'depth'          :trial.suggest_int('depth',4,10),
               'learning_rate'  :trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
               'l2_leaf_reg'    :trial.suggest_loguniform('l2_leaf_reg', 1e-2, 10.0),
               'random_strength':trial.suggest_uniform('random_strength',1e-2,0.3),
               'max_bin'        :trial.suggest_int('max_bin',64,254),
#                'grow_policy'    :trial.suggest_categorical('grow_policy',
#                                                            ['SymmetricTree','Depthwise','Lossguide']),
               'iterations'     :trial.suggest_int('iterations',1000,2000),
#                'max_leaves'     :trial.suggest_int('max_leaves',2,64),
               "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.1, 0.8),
               "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
               "bootstrap_type": 'MVS',
#             trial.suggest_categorical("bootstrap_type",
#                                                            ["Bayesian", "MVS",'Bernoulli']),
               'eval_metric': 'AUC'
                }
    
    
    try:
        model = cb(**params)

        model.fit(x_train,y_train,
                 eval_set=[(x_test,y_test)],
                 verbose=0,
                 cat_features=categorical_columns,
                 early_stopping_rounds=300)

        
        preds=[i[1] for i in model.predict_proba(x_test)]

        acc= auc(y_test,preds)
        
        return acc

    except Exception as e:
        print(e)
        return None

**Optimizing**

In [18]:
def get_best_params(time_out=10000):
    '''time_out: time out in seconds'''
    sampler = optuna.samplers.TPESampler(seed=7)  # Make the sampler behave in a deterministic way.
    study=optuna.create_study(direction='maximize',sampler=sampler)
    study.optimize(objective, n_trials=300, timeout=time_out)
    
    print("Number of finished trials: {}".format(len(study.trials)))
    
    return study.best_trial.params


best_params=get_best_params()

[32m[I 2021-08-15 03:20:56,924][0m A new study created in memory with name: no-name-18e328ac-5287-49bc-9220-e0cfe4f207c1[0m
[32m[I 2021-08-15 03:22:55,842][0m Trial 0 finished with value: 0.8251355893766199 and parameters: {'depth': 4, 'learning_rate': 0.036294229783964016, 'l2_leaf_reg': 0.2066463288698664, 'random_strength': 0.21980490157097296, 'max_bin': 250, 'iterations': 1539, 'colsample_bylevel': 0.4507843245619566, 'boosting_type': 'Plain'}. Best is trial 0 with value: 0.8251355893766199.[0m
[32m[I 2021-08-15 03:25:47,617][0m Trial 1 finished with value: 0.892184018361035 and parameters: {'depth': 7, 'learning_rate': 0.022827586243750055, 'l2_leaf_reg': 2.577609408631827, 'random_strength': 0.12047292861307612, 'max_bin': 76, 'iterations': 1288, 'colsample_bylevel': 0.7367154694037296, 'boosting_type': 'Plain'}. Best is trial 1 with value: 0.892184018361035.[0m
[32m[I 2021-08-15 03:31:18,146][0m Trial 2 finished with value: 0.8198497295681655 and parameters: {'depth'

Number of finished trials: 51


In [19]:
best_params['verbose']       = 0
best_params['loss_function'] ='Logloss'
best_params["bootstrap_type"]= 'MVS'
best_params['eval_metric']   = 'AUC'
best_params['iterations']    = 5000


best_params

{'depth': 10,
 'learning_rate': 0.027276944955829704,
 'l2_leaf_reg': 0.027758736296405108,
 'random_strength': 0.013849312691211818,
 'max_bin': 209,
 'iterations': 5000,
 'colsample_bylevel': 0.35140007149115216,
 'boosting_type': 'Plain',
 'verbose': 0,
 'loss_function': 'Logloss',
 'bootstrap_type': 'MVS',
 'eval_metric': 'AUC'}

# Model Fitting and Prediction

In [20]:
def k_fold_predict(k,
                  params=best_params):
    
    
    skf=StratifiedKFold(n_splits=k,
                       shuffle=True,
                       random_state=7)
    
    mean_preds=np.zeros(shape=(X_test.shape[0]))
    train_check=np.zeros(shape=(X.shape[0]))   
    
    i=0
    for train_idx,val_idx in skf.split(X,y):
        x_t,x_v=X.iloc[train_idx],X.iloc[val_idx]
        y_t,y_v=y.iloc[train_idx],y.iloc[val_idx]
        
        model=cb(**params)        
        model.fit(x_t,y_t,
                 cat_features=categorical_columns
                 )
        
        
        print('Validation AUC score for fold {} = {}'.format(i,
                                                         auc(
                                            y_v,
                                            [i[1] for i in model.predict_proba(x_v)])))
        i+=1
        
        #test predictions
        test_p=np.array([i[1] for i in model.predict_proba(X_test)])
        mean_preds+=test_p
        
        
        #training preds
        train_p=np.array([i[1] for i in model.predict_proba(X)])      
        train_check+=train_p
        
        
    mean_preds=mean_preds/k
    train_check=train_check/k
    
    return mean_preds,train_check

**Training and prediction on folds**

In [21]:
# %%time
test_pred,train_pred=k_fold_predict(7)

preds=pd.DataFrame(test_pred,columns=['risk_flag'])
train_preds=pd.DataFrame(train_pred,columns=['risk_flag'])

preds['id']=test_ids
train_preds['id']=train_ids

Validation AUC score for fold 0 = 0.9279354134814206
Validation AUC score for fold 1 = 0.9216519880685102
Validation AUC score for fold 2 = 0.9256586885730338
Validation AUC score for fold 3 = 0.916780052986243
Validation AUC score for fold 4 = 0.9251918422278738
Validation AUC score for fold 5 = 0.92495164334377
Validation AUC score for fold 6 = 0.9213937957272009


In [22]:
train_auc=auc(y,
             train_preds['risk_flag'])
print(f'AUC score for train set {train_auc}')

AUC score for train set 0.9693875163512812


In [23]:
def roc_threshold(y_true,y_pred):
    '''get best threshold from a roc_auc_curve
    y_true: ground truth
    y_pred: predicted probabilities'''


    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    # get the best threshold
    J = tpr - fpr
    ix = np.argmax(J)
    best_thresh = thresholds[ix]
    thresh_tpr=tpr[ix]
    thresh_fpr=fpr[ix]
    print(f'Best Threshold (TPR-FPR)= {best_thresh} \n TPR :{thresh_tpr} \n FPR :{thresh_fpr}')

    #best threshold by geometric mean :
    # calculate the g-mean for each threshold
    gmeans = np.sqrt(tpr * (1-fpr))

    # locate the index of the largest g-mean
    ix1 = np.argmax(gmeans)
    
    
    print('Best Threshold (geometric mean) = %f \n G-Mean=%.3f' % (thresholds[ix1], gmeans[ix1]))

    return best_thresh

best_thresh=roc_threshold(y,train_preds['risk_flag'])

Best Threshold (TPR-FPR)= 0.17582549002931389 
 TPR :0.9991934443153956 
 FPR :0.08522922662033267
Best Threshold (geometric mean) = 0.175825 
 G-Mean=0.956


In [24]:
#best threshold from above method
thresh=best_thresh

thresh

0.17582549002931389

**Converting Probablities into Flags (i.e, 0:No Risk, 1: Risk)**

In [25]:
def get_bool(predictions_df,threshold):
    '''get 1/0 based on thresholds'''
    
    df=predictions_df.copy(deep=True)
    
    df['risk_flag']=df['risk_flag'].apply(lambda x: int(x>=threshold))
    
    df=df[['id','risk_flag']]
    
    return df

submissions=get_bool(predictions_df=preds,
                     threshold=thresh)

**checking the AUC score on Predicted Training Set**

In [26]:

tr=get_bool(predictions_df=train_preds,
                     threshold=thresh)

auc(y,tr['risk_flag'])

0.9569821088475313

In [27]:
# The Ratio of Positive and Negative examples to the total.
submissions['risk_flag'].value_counts(1)

0    0.850179
1    0.149821
Name: risk_flag, dtype: float64

In [28]:
submissions.shape

(28000, 2)

**Submission File**

In [29]:
submissions.to_csv('submission.csv',index=False)

# Tweaking Threshold Val

In [30]:
thresh2=thresh-0.015

In [31]:
def get_bool(predictions_df,threshold):
    '''get 1/0 based on thresholds'''
    
    df=predictions_df.copy(deep=True)
    
    df['risk_flag']=df['risk_flag'].apply(lambda x: int(x>=threshold))
    
    df=df[['id','risk_flag']]
    
    return df

submissions=get_bool(predictions_df=preds,
                     threshold=thresh2)

In [32]:

tr=get_bool(predictions_df=train_preds,
                     threshold=thresh2)

auc(y,tr['risk_flag'])

0.9569338074740362

In [33]:
# The Ratio of Positive and Negative examples to the total.
submissions['risk_flag'].value_counts(1)

0    0.840821
1    0.159179
Name: risk_flag, dtype: float64

In [34]:
submissions.to_csv('submission1.csv',index=False)

**3**

In [35]:
thresh3=thresh-0.02

In [36]:
def get_bool(predictions_df,threshold):
    '''get 1/0 based on thresholds'''
    
    df=predictions_df.copy(deep=True)
    
    df['risk_flag']=df['risk_flag'].apply(lambda x: int(x>=threshold))
    
    df=df[['id','risk_flag']]
    
    return df

submissions=get_bool(predictions_df=preds,
                     threshold=thresh3)

In [37]:
tr=get_bool(predictions_df=train_preds,
                     threshold=thresh3)

auc(y,tr['risk_flag'])

0.9569095096391589

In [38]:
# The Ratio of Positive and Negative examples to the total.
submissions['risk_flag'].value_counts(1)

0    0.837929
1    0.162071
Name: risk_flag, dtype: float64

In [39]:
submissions.to_csv('submission2.csv',index=False)

**4**

In [40]:
thresh4=thresh-0.03

In [41]:
def get_bool(predictions_df,threshold):
    '''get 1/0 based on thresholds'''
    
    df=predictions_df.copy(deep=True)
    
    df['risk_flag']=df['risk_flag'].apply(lambda x: int(x>=threshold))
    
    df=df[['id','risk_flag']]
    
    return df

submissions=get_bool(predictions_df=preds,
                     threshold=thresh4)

In [42]:
tr=get_bool(predictions_df=train_preds,
                     threshold=thresh4)

auc(y,tr['risk_flag'])

0.9567964814822171

In [43]:
# The Ratio of Positive and Negative examples to the total.
submissions['risk_flag'].value_counts(1)

0    0.833429
1    0.166571
Name: risk_flag, dtype: float64

In [44]:
submissions.to_csv('submission3.csv',index=False)

**5**

In [45]:
thresh5=thresh-0.036

In [46]:
def get_bool(predictions_df,threshold):
    '''get 1/0 based on thresholds'''
    
    df=predictions_df.copy(deep=True)
    
    df['risk_flag']=df['risk_flag'].apply(lambda x: int(x>=threshold))
    
    df=df[['id','risk_flag']]
    
    return df

submissions=get_bool(predictions_df=preds,
                     threshold=thresh5)

In [47]:
tr=get_bool(predictions_df=train_preds,
                     threshold=thresh5)

auc(y,tr['risk_flag'])

0.9567765061236431

In [48]:
# The Ratio of Positive and Negative examples to the total.
submissions['risk_flag'].value_counts(1)

0    0.830857
1    0.169143
Name: risk_flag, dtype: float64

In [49]:
submissions.to_csv('submission4.csv',index=False)

# Rough work

In [50]:
#  Old params

# 13/8/21 92.5 auc
# best_params={
#              'verbose'        : 0,
#              'loss_function'  :'Logloss',
#              'eval_metric': 'AUC',
#              "bootstrap_type": 'MVS',           
#              'depth': 9, 
#              'learning_rate': 0.07689113711214997, 
#              'l2_leaf_reg': 0.9296767227963647, 
#              'random_strength': 0.0991487399289666,
#              'max_bin': 254, 
#              'iterations': 1727,
#              'colsample_bylevel': 0.6456810156401686,
#              'boosting_type': 'Plain'}


# 12/8/21 :93 auc
# best_params={
#              'verbose'        : 0,
#              'loss_function'  :'Logloss',
#              'eval_metric': 'AUC',
#              "bootstrap_type": 'MVS',           
#              'depth': 10,
#              'learning_rate': 0.048887169045477, 
#              'l2_leaf_reg': 9.637912687253579, 
#              'random_strength': 0.14125825814463858,
#              'max_bin': 65, 
#              'iterations': 1987, 
#              'colsample_bylevel': 0.5998403438024634,
#              'boosting_type': 'Plain'}


# # 11/8/21:89auc
# best_params={'verbose'        : 0,
#              'loss_function'  :'Logloss',
#              'eval_metric': 'AUC',
#              'depth': 8,
#              'learning_rate': 0.023218155139759035,
#              'l2_leaf_reg': 0.014433979096775669,
#              'random_strength': 0.09956727861050116,
#              'max_bin': 177,
#              'iterations': 1470,
#              'colsample_bylevel': 0.6789825997216761,
#              'boosting_type': 'Ordered',
#              'bootstrap_type': 'MVS'}