In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sweetviz as sv
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split

from sklearn import svm
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from lightgbm import LGBMClassifier

from catboost import CatBoostClassifier,Pool, cv

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, confusion_matrix, cohen_kappa_score
from sklearn.model_selection import KFold, StratifiedKFold

In [60]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
print(train.shape)
print(test.shape)

(245725, 11)
(105312, 10)


# EDA

In [61]:
train.dtypes

ID                     object
Gender                 object
Age                     int64
Region_Code            object
Occupation             object
Channel_Code           object
Vintage                 int64
Credit_Product         object
Avg_Account_Balance     int64
Is_Active              object
Is_Lead                 int64
dtype: object

#### Categorical Feature to Numerical

In [62]:
Yes_No_dict = {
    "Yes":1,
    "No":0
}
MF_dict = {
    "Male":1,
    "Female":0
}
Occupation_dict = {
    "Salaried":0,
    "Other":1,
    "Self_Employed":2,
    "Entrepreneur":3
}
Channel_Code_dict = {
    "X1":0,
    "X4":1,
    "X2":2,
    "X3":3
}

In [63]:
def cat_2_num(data):
    data['Gender'].replace(MF_dict, inplace=True)
    data['Occupation'].replace(Occupation_dict, inplace=True)
    data['Channel_Code'].replace(Channel_Code_dict, inplace=True)
    data['Credit_Product'].replace(Yes_No_dict, inplace=True)
    data['Is_Active'].replace(Yes_No_dict, inplace=True)
    data['Region_Code'] = data['Region_Code'].astype('category').cat.codes
    return data

In [64]:
train = cat_2_num(train)
train.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
0,NNVBBKZB,0,73,18,1,3,43,0.0,1045696,0,0
1,IDD62UNG,0,30,27,0,0,32,0.0,581988,0,0
2,HD3DSEMC,0,56,18,2,3,26,0.0,1484315,1,0
3,BF3NC7KV,1,34,20,0,0,19,0.0,470454,0,0
4,TEASRWXV,0,30,32,0,0,33,0.0,886787,0,0


In [65]:
test = cat_2_num(test)

### Sweetviz reports for EDA

In [66]:
report=sv.analyze(train, target_feat="Is_Lead")
report.show_html('eda_report_train_2.html')

In [67]:
comparison_report = sv.compare([train, "Training Data"], [test, "Test Data"], "Is_Lead")
comparison_report.show_html('comparison_report_2.html')

By EDA reports I have concluded following points - 
1. Credit_Product contains missing value. So we need to impute missing value in it.
2. Important features -
    i. Avg_Account_Balance
    ii. Vintage
    iii. Age
    iv. Occupation
    v. Credit_Product
3. Target "Is_Lead" is imbalanced ratio almost {"0":4,"1":1}, so we can use SMOTE or other oversampling.
4. Test dataset's feature distribution is almost same as train dataset.

## Missing Value imputation

I have used following 4 techniques-
1. Treat missing value as third category
2. Filling it by mode
3. Impute missing value with KNN
4. Predict missing value by building Model for it.

Out of these 3, first one works better.

#### Treate as third category

In [68]:
train['Credit_Product'] = train['Credit_Product'].fillna(3)
test['Credit_Product'] = test['Credit_Product'].fillna(3)

#### Mode Imputation

In [69]:
# train['Credit_Product'] = train['Credit_Product'].fillna(train['Credit_Product'].mode())
# test['Credit_Product'] = test['Credit_Product'].fillna(train['Credit_Product'].mode())

In [70]:
# X = train.drop(['ID', 'Is_Lead'], axis=1)

#### KNN Imputation

In [71]:
# imputer = KNNImputer(n_neighbors=7, weights='uniform', metric='nan_euclidean')

In [72]:
# X = imputer.fit_transform(train.drop(['ID', 'Is_Lead'], axis=1))

In [73]:
# X_test = imputer.transform(test.drop('ID', axis=1))

In [74]:
# X_test_final = X_test.copy()

In [75]:
# import pickle
# with open('imputer2.pkl','ab') as f:
#     pickle.dump(imputer, f)

In [76]:
# import pickle
# with open('imputer2.pkl','rb') as f:
#     loaded_imp = pickle.load(f)

In [77]:
# train.shape

In [78]:
# X = loaded_imp.transform(train.drop(['ID', 'Is_Lead'], axis=1))
# X.shape

In [79]:
train.isna().sum()

ID                     0
Gender                 0
Age                    0
Region_Code            0
Occupation             0
Channel_Code           0
Vintage                0
Credit_Product         0
Avg_Account_Balance    0
Is_Active              0
Is_Lead                0
dtype: int64

# Feature Engineering and Selection

In [80]:
train.head(2)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
0,NNVBBKZB,0,73,18,1,3,43,0.0,1045696,0,0
1,IDD62UNG,0,30,27,0,0,32,0.0,581988,0,0


#### 

Dividing "Avg_Account_Balance" in 3 category based on the 50%, 75% data distribution.

In [128]:
train['Avg_Account_Balance'].describe()

count    2.457250e+05
mean     1.128403e+06
std      8.529364e+05
min      2.079000e+04
25%      6.043100e+05
50%      8.946010e+05
75%      1.366666e+06
max      1.035201e+07
Name: Avg_Account_Balance, dtype: float64

In [82]:
def new_cat(x):
    if x<=894601:
        return 1
    elif x<1366666:
        return 2
    else:
        return 3
train['Richness'] = train['Avg_Account_Balance'].map(new_cat)
test['Richness'] = test['Avg_Account_Balance'].map(new_cat)

Creating new features

In [83]:
train['Richness + Occupation'] = train['Richness']*10 + train['Occupation']
test['Richness + Occupation'] = test['Richness']*10 + test['Occupation']

In [85]:
train['Age + Occupation'] = train['Age']*10 + train['Occupation']
test['Age + Occupation'] = test['Age']*10 + test['Occupation']

In [86]:
train['Credit_Product + Occupation'] = train['Credit_Product']*10 + train['Occupation']
test['Credit_Product + Occupation'] = test['Credit_Product']*10 + test['Occupation']

### Dealing with imbalance dataset

#### Without SMOTE

In [87]:
X = train.drop(['Is_Lead','ID'], axis=1)
y = train['Is_Lead']

#### SMOTE

In [92]:
from imblearn.over_sampling import SVMSMOTE

In [93]:
train.shape

(245725, 15)

In [94]:
train.shape

(245725, 15)

In [95]:
oversample = SVMSMOTE()
X, y = oversample.fit_resample(train.drop(['Is_Lead','ID'],axis=1), train['Is_Lead'])

For testing model

In [96]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)


# Modeling

Metrics

In [123]:
def metric(y_true, y_pred):
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_true,  y_pred)
    print('Accuracy: %f' % accuracy)

    # precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred)
    print('Precision: %f' % precision)

    # recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred)
    print('Recall: %f' % recall)

    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred)
    print('F1 score: %f' % f1)
    
    # kappa
    kappa = cohen_kappa_score(y_true, y_pred)
    print('Cohens kappa: %f' % kappa)
    # ROC AUC
    auc = roc_auc_score(y_true, y_pred)
    print('ROC AUC: %f' % auc)

    print(confusion_matrix(y_true, y_pred))

In [124]:
def metric2(y_true, y_pred):
    auc_score = roc_auc_score(y_test, pred)
    print(f"roc_auc_score: {auc_score}")
    print("-"*50)

#### Final Submission

In [112]:
def cross_val(X, y, model, params, folds=7):
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=21)
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)):
        print(f"Fold: {fold}")
        x_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        x_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
        alg = model(**params)
        alg.fit(x_train, y_train,
                eval_set=[(x_test, y_test)],
                early_stopping_rounds=100,
                verbose=400)
        pred = alg.predict_proba(x_test)[:, 1]
        metric2(y_test, pred)
    return alg

LGBMClassifier

In [113]:
lgb_params= {'learning_rate': 0.045, 
             'n_estimators': 20000, 
             'max_bin': 94,
             'num_leaves': 10, 
             'max_depth': 27, 
             'reg_alpha': 8.457, 
             'reg_lambda': 6.853, 
             'subsample': 0.749}

In [114]:
lgb_model = cross_val(X, y, LGBMClassifier, lgb_params)

Fold: 0
Training until validation scores don't improve for 100 rounds
[400]	valid_0's binary_logloss: 0.252142
[800]	valid_0's binary_logloss: 0.242673
[1200]	valid_0's binary_logloss: 0.239003
[1600]	valid_0's binary_logloss: 0.237225
[2000]	valid_0's binary_logloss: 0.235909
[2400]	valid_0's binary_logloss: 0.235164
[2800]	valid_0's binary_logloss: 0.234536
[3200]	valid_0's binary_logloss: 0.234139
[3600]	valid_0's binary_logloss: 0.233867
[4000]	valid_0's binary_logloss: 0.233473
[4400]	valid_0's binary_logloss: 0.233195
[4800]	valid_0's binary_logloss: 0.233049
[5200]	valid_0's binary_logloss: 0.232925
[5600]	valid_0's binary_logloss: 0.232847
Early stopping, best iteration is:
[5860]	valid_0's binary_logloss: 0.232775
roc_auc_score: 0.959466574986592
--------------------------------------------------
Fold: 1
Training until validation scores don't improve for 100 rounds
[400]	valid_0's binary_logloss: 0.254657
[800]	valid_0's binary_logloss: 0.245141
[1200]	valid_0's binary_logloss

In [115]:
xgb_params= {'n_estimators': 20000, 
             'max_depth': 6, 
             'learning_rate': 0.0201, 
             'reg_lambda': 29.326, 
             'subsample': 0.818, 
             'colsample_bytree': 0.235, 
             'colsample_bynode': 0.820, 
             'colsample_bylevel': 0.453}

In [116]:
# from xgboost import XGBClassifier

In [117]:
xgb_model = cross_val(X, y, XGBClassifier, xgb_params)

Fold: 0




[0]	validation_0-logloss:0.68535
[400]	validation_0-logloss:0.30168
[800]	validation_0-logloss:0.27548
[1200]	validation_0-logloss:0.26569
[1600]	validation_0-logloss:0.25867
[2000]	validation_0-logloss:0.25477
[2400]	validation_0-logloss:0.25225
[2800]	validation_0-logloss:0.25026
[3200]	validation_0-logloss:0.24874
[3600]	validation_0-logloss:0.24727
[4000]	validation_0-logloss:0.24592
[4400]	validation_0-logloss:0.24496
[4800]	validation_0-logloss:0.24410
[5200]	validation_0-logloss:0.24330
[5600]	validation_0-logloss:0.24253
[6000]	validation_0-logloss:0.24183
[6400]	validation_0-logloss:0.24124
[6800]	validation_0-logloss:0.24075
[7200]	validation_0-logloss:0.24029
[7600]	validation_0-logloss:0.23995
[8000]	validation_0-logloss:0.23961
[8400]	validation_0-logloss:0.23927
[8800]	validation_0-logloss:0.23898
[9200]	validation_0-logloss:0.23869
[9600]	validation_0-logloss:0.23841
[10000]	validation_0-logloss:0.23817
[10400]	validation_0-logloss:0.23793
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68544
[400]	validation_0-logloss:0.30432
[800]	validation_0-logloss:0.27863
[1200]	validation_0-logloss:0.26948
[1600]	validation_0-logloss:0.26206
[2000]	validation_0-logloss:0.25792
[2400]	validation_0-logloss:0.25526
[2800]	validation_0-logloss:0.25323
[3200]	validation_0-logloss:0.25167
[3600]	validation_0-logloss:0.25024
[4000]	validation_0-logloss:0.24894
[4400]	validation_0-logloss:0.24793
[4800]	validation_0-logloss:0.24707
[5200]	validation_0-logloss:0.24619
[5600]	validation_0-logloss:0.24551
[6000]	validation_0-logloss:0.24488
[6400]	validation_0-logloss:0.24432
[6800]	validation_0-logloss:0.24391
[7200]	validation_0-logloss:0.24340
[7600]	validation_0-logloss:0.24308
[8000]	validation_0-logloss:0.24270
[8400]	validation_0-logloss:0.24233
[8800]	validation_0-logloss:0.24203
[9200]	validation_0-logloss:0.24174
[9600]	validation_0-logloss:0.24148
[10000]	validation_0-logloss:0.24123
[10400]	validation_0-logloss:0.24099
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68539
[400]	validation_0-logloss:0.30237
[800]	validation_0-logloss:0.27655
[1200]	validation_0-logloss:0.26695
[1600]	validation_0-logloss:0.25927
[2000]	validation_0-logloss:0.25556
[2400]	validation_0-logloss:0.25288
[2800]	validation_0-logloss:0.25089
[3200]	validation_0-logloss:0.24923
[3600]	validation_0-logloss:0.24780
[4000]	validation_0-logloss:0.24648
[4400]	validation_0-logloss:0.24547
[4800]	validation_0-logloss:0.24468
[5200]	validation_0-logloss:0.24388
[5600]	validation_0-logloss:0.24317
[6000]	validation_0-logloss:0.24257
[6400]	validation_0-logloss:0.24202
[6800]	validation_0-logloss:0.24158
[7200]	validation_0-logloss:0.24111
[7600]	validation_0-logloss:0.24077
[8000]	validation_0-logloss:0.24044
[8400]	validation_0-logloss:0.24005
[8800]	validation_0-logloss:0.23972
[9200]	validation_0-logloss:0.23939
[9600]	validation_0-logloss:0.23911
[10000]	validation_0-logloss:0.23892
[10400]	validation_0-logloss:0.23863
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68532
[400]	validation_0-logloss:0.30259
[800]	validation_0-logloss:0.27715
[1200]	validation_0-logloss:0.26830
[1600]	validation_0-logloss:0.26106
[2000]	validation_0-logloss:0.25733
[2400]	validation_0-logloss:0.25470
[2800]	validation_0-logloss:0.25272
[3200]	validation_0-logloss:0.25116
[3600]	validation_0-logloss:0.24981
[4000]	validation_0-logloss:0.24853
[4400]	validation_0-logloss:0.24755
[4800]	validation_0-logloss:0.24682
[5200]	validation_0-logloss:0.24604
[5600]	validation_0-logloss:0.24530
[6000]	validation_0-logloss:0.24474
[6400]	validation_0-logloss:0.24415
[6800]	validation_0-logloss:0.24376
[7200]	validation_0-logloss:0.24319
[7600]	validation_0-logloss:0.24289
[8000]	validation_0-logloss:0.24253
[8400]	validation_0-logloss:0.24224
[8800]	validation_0-logloss:0.24194
[9200]	validation_0-logloss:0.24171
[9600]	validation_0-logloss:0.24143
[10000]	validation_0-logloss:0.24119
[10400]	validation_0-logloss:0.24095
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68545
[400]	validation_0-logloss:0.30500
[800]	validation_0-logloss:0.27852
[1200]	validation_0-logloss:0.26893
[1600]	validation_0-logloss:0.26147
[2000]	validation_0-logloss:0.25734
[2400]	validation_0-logloss:0.25454
[2800]	validation_0-logloss:0.25256
[3200]	validation_0-logloss:0.25098
[3600]	validation_0-logloss:0.24945
[4000]	validation_0-logloss:0.24818
[4400]	validation_0-logloss:0.24714
[4800]	validation_0-logloss:0.24631
[5200]	validation_0-logloss:0.24555
[5600]	validation_0-logloss:0.24482
[6000]	validation_0-logloss:0.24417
[6400]	validation_0-logloss:0.24351
[6800]	validation_0-logloss:0.24305
[7200]	validation_0-logloss:0.24251
[7600]	validation_0-logloss:0.24217
[8000]	validation_0-logloss:0.24181
[8400]	validation_0-logloss:0.24139
[8800]	validation_0-logloss:0.24109
[9200]	validation_0-logloss:0.24080
[9600]	validation_0-logloss:0.24052
[10000]	validation_0-logloss:0.24028
[10400]	validation_0-logloss:0.24004
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68534
[400]	validation_0-logloss:0.30402
[800]	validation_0-logloss:0.27804
[1200]	validation_0-logloss:0.26821
[1600]	validation_0-logloss:0.26100
[2000]	validation_0-logloss:0.25718
[2400]	validation_0-logloss:0.25436
[2800]	validation_0-logloss:0.25244
[3200]	validation_0-logloss:0.25082
[3600]	validation_0-logloss:0.24943
[4000]	validation_0-logloss:0.24823
[4400]	validation_0-logloss:0.24716
[4800]	validation_0-logloss:0.24642
[5200]	validation_0-logloss:0.24559
[5600]	validation_0-logloss:0.24489
[6000]	validation_0-logloss:0.24429
[6400]	validation_0-logloss:0.24368
[6800]	validation_0-logloss:0.24325
[7200]	validation_0-logloss:0.24276
[7600]	validation_0-logloss:0.24240
[8000]	validation_0-logloss:0.24204
[8400]	validation_0-logloss:0.24168
[8800]	validation_0-logloss:0.24139
[9200]	validation_0-logloss:0.24106
[9600]	validation_0-logloss:0.24076
[10000]	validation_0-logloss:0.24054
[10400]	validation_0-logloss:0.24025
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68537
[400]	validation_0-logloss:0.29447
[800]	validation_0-logloss:0.27109
[1200]	validation_0-logloss:0.26164
[1600]	validation_0-logloss:0.25554
[2000]	validation_0-logloss:0.25170
[2400]	validation_0-logloss:0.24874
[2800]	validation_0-logloss:0.24659
[3200]	validation_0-logloss:0.24492
[3600]	validation_0-logloss:0.24331
[4000]	validation_0-logloss:0.24214
[4400]	validation_0-logloss:0.24121
[4800]	validation_0-logloss:0.24033
[5200]	validation_0-logloss:0.23956
[5600]	validation_0-logloss:0.23885
[6000]	validation_0-logloss:0.23829
[6400]	validation_0-logloss:0.23776
[6800]	validation_0-logloss:0.23730
[7200]	validation_0-logloss:0.23679
[7600]	validation_0-logloss:0.23647
[8000]	validation_0-logloss:0.23604
[8400]	validation_0-logloss:0.23563
[8800]	validation_0-logloss:0.23530
[9200]	validation_0-logloss:0.23502
[9600]	validation_0-logloss:0.23477
[10000]	validation_0-logloss:0.23445
[10400]	validation_0-logloss:0.23426
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68542
[400]	validation_0-logloss:0.29795
[800]	validation_0-logloss:0.27549
[1200]	validation_0-logloss:0.26648
[1600]	validation_0-logloss:0.26030
[2000]	validation_0-logloss:0.25621
[2400]	validation_0-logloss:0.25353
[2800]	validation_0-logloss:0.25122
[3200]	validation_0-logloss:0.24957
[3600]	validation_0-logloss:0.24791
[4000]	validation_0-logloss:0.24674
[4400]	validation_0-logloss:0.24576
[4800]	validation_0-logloss:0.24500
[5200]	validation_0-logloss:0.24430
[5600]	validation_0-logloss:0.24361
[6000]	validation_0-logloss:0.24310
[6400]	validation_0-logloss:0.24264
[6800]	validation_0-logloss:0.24219
[7200]	validation_0-logloss:0.24175
[7600]	validation_0-logloss:0.24147
[8000]	validation_0-logloss:0.24113
[8400]	validation_0-logloss:0.24080
[8800]	validation_0-logloss:0.24057
[9200]	validation_0-logloss:0.24035
[9600]	validation_0-logloss:0.24016
[10000]	validation_0-logloss:0.23988
[10400]	validation_0-logloss:0.23972
[10800]	validation_0-logloss:0.



[0]	validation_0-logloss:0.68537
[400]	validation_0-logloss:0.29854
[800]	validation_0-logloss:0.27632
[1200]	validation_0-logloss:0.26760
[1600]	validation_0-logloss:0.26193
[2000]	validation_0-logloss:0.25800
[2400]	validation_0-logloss:0.25505
[2800]	validation_0-logloss:0.25284
[3200]	validation_0-logloss:0.25129
[3600]	validation_0-logloss:0.24960
[4000]	validation_0-logloss:0.24856
[4400]	validation_0-logloss:0.24766
[4800]	validation_0-logloss:0.24687
[5200]	validation_0-logloss:0.24617
[5600]	validation_0-logloss:0.24545
[6000]	validation_0-logloss:0.24493
[6400]	validation_0-logloss:0.24443
[6800]	validation_0-logloss:0.24404
[7200]	validation_0-logloss:0.24351
[7600]	validation_0-logloss:0.24323
[8000]	validation_0-logloss:0.24288
[8400]	validation_0-logloss:0.24252
[8800]	validation_0-logloss:0.24221
[9200]	validation_0-logloss:0.24198
[9600]	validation_0-logloss:0.24177
[10000]	validation_0-logloss:0.24151
[10400]	validation_0-logloss:0.24137
[10800]	validation_0-logloss:0.

In [118]:
cat_params= {'n_estimators': 20000, 
                  'depth': 4, 
                  'learning_rate': 0.023, 
                  'colsample_bylevel': 0.655, 
                  'bagging
             
             _temperature': 0.921, 
                  'l2_leaf_reg': 10.133}



In [119]:
from catboost import CatBoostClassifier
cat_model = cross_val(X, y, CatBoostClassifier, cat_params)

Fold: 0
0:	learn: 0.6695025	test: 0.6693945	best: 0.6693945 (0)	total: 56ms	remaining: 18m 39s
400:	learn: 0.2696708	test: 0.2685920	best: 0.2685920 (400)	total: 19.4s	remaining: 15m 49s
800:	learn: 0.2548027	test: 0.2536650	best: 0.2536650 (800)	total: 38.5s	remaining: 15m 22s
1200:	learn: 0.2484555	test: 0.2478100	best: 0.2478100 (1200)	total: 57.8s	remaining: 15m 4s
1600:	learn: 0.2444116	test: 0.2440473	best: 0.2440473 (1600)	total: 1m 17s	remaining: 14m 45s
2000:	learn: 0.2418406	test: 0.2417718	best: 0.2417718 (2000)	total: 1m 36s	remaining: 14m 25s
2400:	learn: 0.2397405	test: 0.2399656	best: 0.2399656 (2400)	total: 1m 55s	remaining: 14m 5s
2800:	learn: 0.2381100	test: 0.2386455	best: 0.2386452 (2799)	total: 2m 14s	remaining: 13m 47s
3200:	learn: 0.2362463	test: 0.2370550	best: 0.2370550 (3200)	total: 2m 33s	remaining: 13m 28s
3600:	learn: 0.2352657	test: 0.2364395	best: 0.2364366 (3583)	total: 2m 53s	remaining: 13m 8s
4000:	learn: 0.2344566	test: 0.2359779	best: 0.2359779 (4000

10800:	learn: 0.2264068	test: 0.2332222	best: 0.2332199 (10783)	total: 9m 21s	remaining: 7m 58s
11200:	learn: 0.2261028	test: 0.2331380	best: 0.2331378 (11199)	total: 9m 42s	remaining: 7m 37s
11600:	learn: 0.2258340	test: 0.2330862	best: 0.2330858 (11597)	total: 10m 4s	remaining: 7m 17s
12000:	learn: 0.2255350	test: 0.2330233	best: 0.2330226 (11997)	total: 10m 25s	remaining: 6m 56s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.232999114
bestIteration = 12082

Shrink model to first 12083 iterations.
roc_auc_score: 0.9592758134514379
--------------------------------------------------
Fold: 3
0:	learn: 0.6694776	test: 0.6694884	best: 0.6694884 (0)	total: 50ms	remaining: 16m 40s
400:	learn: 0.2684638	test: 0.2696863	best: 0.2696863 (400)	total: 21s	remaining: 17m 8s
800:	learn: 0.2539913	test: 0.2558047	best: 0.2558047 (800)	total: 42.3s	remaining: 16m 54s
1200:	learn: 0.2473099	test: 0.2495157	best: 0.2495157 (1200)	total: 1m 3s	remaining: 16m 34s
1600:	learn: 0.243

10000:	learn: 0.2270822	test: 0.2349050	best: 0.2349049 (9942)	total: 9m 12s	remaining: 9m 11s
10400:	learn: 0.2267655	test: 0.2348157	best: 0.2348147 (10352)	total: 9m 33s	remaining: 8m 49s
10800:	learn: 0.2264269	test: 0.2347373	best: 0.2347345 (10796)	total: 9m 55s	remaining: 8m 27s
11200:	learn: 0.2261255	test: 0.2346669	best: 0.2346638 (11195)	total: 10m 17s	remaining: 8m 5s
11600:	learn: 0.2258212	test: 0.2345903	best: 0.2345900 (11597)	total: 10m 39s	remaining: 7m 42s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.2345700564
bestIteration = 11739

Shrink model to first 11740 iterations.
roc_auc_score: 0.9592505231356971
--------------------------------------------------
Fold: 6
0:	learn: 0.6697516	test: 0.6697266	best: 0.6697266 (0)	total: 53.3ms	remaining: 17m 45s
400:	learn: 0.2692472	test: 0.2638880	best: 0.2638880 (400)	total: 21.4s	remaining: 17m 25s
800:	learn: 0.2547454	test: 0.2501606	best: 0.2501606 (800)	total: 42.6s	remaining: 17m 1s
1200:	learn:

9200:	learn: 0.2273182	test: 0.2368268	best: 0.2368235 (9193)	total: 7m 25s	remaining: 8m 43s
9600:	learn: 0.2269670	test: 0.2367226	best: 0.2367221 (9599)	total: 7m 44s	remaining: 8m 23s
10000:	learn: 0.2266236	test: 0.2366391	best: 0.2366386 (9999)	total: 8m 4s	remaining: 8m 4s
10400:	learn: 0.2262726	test: 0.2365310	best: 0.2365288 (10397)	total: 8m 24s	remaining: 7m 45s
10800:	learn: 0.2259408	test: 0.2364353	best: 0.2364290 (10765)	total: 8m 43s	remaining: 7m 26s
11200:	learn: 0.2256446	test: 0.2363780	best: 0.2363769 (11194)	total: 9m 3s	remaining: 7m 7s
11600:	learn: 0.2253397	test: 0.2363163	best: 0.2363155 (11592)	total: 9m 24s	remaining: 6m 48s
12000:	learn: 0.2250540	test: 0.2362919	best: 0.2362895 (11974)	total: 9m 44s	remaining: 6m 29s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.2362603272
bestIteration = 12172

Shrink model to first 12173 iterations.
roc_auc_score: 0.9579216885784303
--------------------------------------------------


#### Ensembling all the models

In [125]:
pred_test_lgb = lgb_model.predict_proba(test.drop('ID', axis=1))[:,1]
pred_test_xgb = xgb_model.predict_proba(test.drop('ID', axis=1))[:,1]
pred_test_cat = cat_model.predict_proba(test.drop('ID', axis=1))[:,1]
prediction = (pred_test_lgb + pred_test_cat+pred_test_xgb)/3

In [126]:
submission = pd.DataFrame(columns=['ID', 'Is_Lead'])
submission['ID'] = test['ID']
submission['Is_Lead'] = prediction

In [127]:
submission.to_csv("tenth.csv", index=False)