In [2]:
# -*- coding: utf-8 -*-
import os
import sys
import time
import pandas as pd
import numpy as np
import torch
from torch.optim import Adagrad
from sklearn.metrics import log_loss, roc_auc_score, mean_squared_error, accuracy_score, f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import xgboost as xgb
import lightgbm as lgb
from lightgbm import log_evaluation, early_stopping
from catboost import CatBoostRegressor
import warnings

sys.path.append("/Users/wzq/Desktop/game")
from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names
from deepctr_torch.models import *
from deepctr_torch.callbacks import EarlyStopping, ModelCheckpoint

# jupyter配置
from IPython.display import display
pd.options.display.max_rows=10000 #Notebook 的一个cell的显示行数
pd.options.display.max_columns=100000#Notebook 的一个cell的显示列数
pd.set_option('display.max_colwidth', None)

In [6]:
def reduce_mem_usage(df):    
    start_mem = df.memory_usage().sum() / (1024 ** 2)    
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))    

    for col in df.columns:    
        col_type = df[col].dtype    

        if col_type != object:    
            c_min = df[col].min()    
            c_max = df[col].max()    
            if str(col_type)[:3] == 'int':    
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:    
                    df[col] = df[col].astype(np.int8)    
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:    
                    df[col] = df[col].astype(np.int16)    
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:    
                    df[col] = df[col].astype(np.int32)    
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:    
                    df[col] = df[col].astype(np.int64)    
            else:    
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:    
                    df[col] = df[col].astype(np.float16)    
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:    
                    df[col] = df[col].astype(np.float32)    
                else:    
                    df[col] = df[col].astype(np.float64)    
        else:    
            df[col] = df[col].astype('category')    

    end_mem = df.memory_usage().sum() / (1024 ** 2)    
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))    
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))    

    return df

In [7]:
data = pd.read_csv('./data/data_processed.csv')

In [8]:
reduce_mem_usage(data)

Memory usage of dataframe is 411.99 MB
Memory usage after optimization is: 101.12 MB
Decreased by 75.5%


Unnamed: 0,id,loanAmnt,term,interestRate,installment,grade,subGrade,employmentTitle,employmentLength,homeOwnership,annualIncome,verificationStatus,issueDate,isDefault,purpose,postCode,regionCode,dti,delinquency_2years,ficoRangeLow,ficoRangeHigh,openAcc,pubRec,pubRecBankruptcies,revolBal,revolUtil,totalAcc,initialListStatus,applicationType,earliesCreditLine,title,policyCode,n0,n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,issueDate_year,issueDate_month,issueDate_day,issueDate_weekday,issueDate_is_weekend,earliesCreditLine_year,earliesCreditLine_month
0,0,0.873535,1.0,0.553223,0.531250,4,21,305,0.199951,2,0.010002,2,2014-07-01,1.0,1,138,32,0.018051,0.0,0.477295,0.475098,0.077759,0.0,0.0,0.008324,0.054810,0.156250,0,0,2001-08-01,2,1.0,0.000000,0.060608,0.031738,0.031738,0.063477,0.128540,0.060608,0.048187,0.093750,0.044434,0.077759,0.0,0.0,0.0,0.066650,0.636230,0.545410,0.0,0.166626,0.0,0.802734,0.636230
1,1,0.443115,1.0,0.513184,0.263184,3,16,176661,0.500000,0,0.004181,2,2012-08-01,0.0,0,157,18,0.028824,0.0,0.340820,0.339355,0.144409,0.0,0.0,0.005196,0.043610,0.099976,1,0,2002-05-01,1510,1.0,0.000000,0.000000,0.000000,0.000000,0.158691,0.000000,0.000000,0.000000,0.000000,0.000000,0.144409,0.0,0.0,0.0,0.000000,0.454590,0.636230,0.0,0.333252,0.0,0.816895,0.363525
2,2,0.291260,1.0,0.454834,0.166992,3,17,27721,0.799805,0,0.006729,2,2015-10-01,0.0,0,338,14,0.023773,0.0,0.227295,0.226196,0.122192,0.0,0.0,0.001586,0.058044,0.156250,0,0,2006-05-01,1,1.0,0.000000,0.000000,0.047607,0.047607,0.000000,0.000000,0.159058,0.048187,0.039062,0.066650,0.122192,0.0,0.0,0.0,0.133301,0.727051,0.818359,0.0,0.500000,0.0,0.873047,0.363525
3,3,0.265869,0.0,0.075928,0.192139,0,3,40375,1.000000,1,0.010727,1,2015-08-01,0.0,4,149,11,0.018204,0.0,0.272705,0.271484,0.099976,0.0,0.0,0.003424,0.058960,0.162476,1,0,1999-05-01,5,1.0,0.117676,0.121216,0.095215,0.095215,0.063477,0.228516,0.030304,0.084351,0.164062,0.133301,0.099976,0.0,0.0,0.0,0.033325,0.727051,0.636230,0.0,0.833496,1.0,0.774414,0.363525
4,4,0.063293,0.0,0.299072,0.051178,2,11,51,0.000000,1,0.002636,2,2016-03-01,0.0,10,302,21,0.033173,0.0,0.295410,0.294189,0.133301,0.0,0.0,0.001013,0.035858,0.156250,0,0,1977-08-01,12,1.0,0.019608,0.060608,0.111084,0.111084,0.031738,0.057129,0.068176,0.120483,0.117188,0.155518,0.133301,0.0,0.0,0.0,0.133301,0.818359,0.181763,0.0,0.166626,0.0,0.464844,0.636230
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,999995,0.164551,0.0,0.227051,0.126709,1,6,262658,0.700195,1,0.002728,2,2012-10-01,,4,37,13,0.033905,0.0,0.272705,0.271484,0.088867,0.0,0.0,0.001728,0.060516,0.093750,1,0,2005-11-01,41174,1.0,0.000000,0.060608,0.031738,0.031738,0.031738,0.085693,0.083313,0.024094,0.046875,0.044434,0.088867,0.0,0.0,0.0,0.133301,0.454590,0.818359,0.0,0.000000,0.0,0.859375,0.909180
999996,999996,0.139282,0.0,0.036224,0.099426,0,1,33779,0.099976,1,0.005093,0,2015-10-01,,0,262,8,0.021378,0.0,0.431885,0.429932,0.277832,0.0,0.0,0.003498,0.040222,0.156250,0,0,2006-10-01,1,1.0,0.000000,0.151489,0.095215,0.095215,0.079346,0.071411,0.106079,0.144531,0.101562,0.133301,0.277832,0.0,0.0,0.0,0.000000,0.727051,0.818359,0.0,0.500000,0.0,0.873047,0.818359
999997,999997,0.341797,1.0,0.411621,0.191406,2,13,225414,0.799805,2,0.007275,2,2013-07-01,,4,47,27,0.016113,0.0,0.227295,0.226196,0.233276,0.0,0.0,0.010811,0.096375,0.381348,0,0,2001-12-01,5,1.0,0.000000,0.212158,0.206299,0.206299,0.142822,0.300049,0.136353,0.252930,0.328125,0.288818,0.233276,0.0,0.0,0.0,0.000000,0.545410,0.545410,0.0,0.000000,0.0,0.802734,1.000000
999998,999998,0.189819,0.0,0.496582,0.161865,3,16,94,0.399902,1,0.017273,0,2017-10-01,,0,100,8,0.010330,0.0,0.295410,0.294189,0.122192,0.0,0.0,0.009811,0.071716,0.137451,0,0,2005-08-01,1,1.0,0.000000,0.090881,0.095215,0.095215,0.063477,0.171387,0.037872,0.096375,0.148438,0.133301,0.122192,0.0,0.0,0.0,0.066650,0.909180,0.818359,0.0,1.000000,1.0,0.859375,0.636230


In [53]:
features = [f for f in data.columns if f not in ["issueDate", "isDefault", "earliesCreditLine", "policyCode"]]
target = data.iloc[:800000, :]['isDefault']
data = data[features]
train_data = data.iloc[:800000, :]
test_data = data.iloc[800000:, :]
display(train_data.shape)
display(train_data.head(10))
display(test_data.shape)
display(test_data.head(10))
display(target.shape)
display(target.head(10))

(800000, 50)

Unnamed: 0,id,loanAmnt,term,interestRate,installment,grade,subGrade,employmentTitle,employmentLength,homeOwnership,annualIncome,verificationStatus,purpose,postCode,regionCode,dti,delinquency_2years,ficoRangeLow,ficoRangeHigh,openAcc,pubRec,pubRecBankruptcies,revolBal,revolUtil,totalAcc,initialListStatus,applicationType,title,n0,n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,issueDate_year,issueDate_month,issueDate_day,issueDate_weekday,issueDate_is_weekend,earliesCreditLine_year,earliesCreditLine_month
0,0,0.873418,1.0,0.553349,0.531301,4,21,305,0.2,2,0.010001,2,1,138,32,0.01805,0.0,0.477273,0.475113,0.077778,0.0,0.0,0.008323,0.054802,0.15625,0,0,2,0.0,0.060606,0.031746,0.031746,0.063492,0.128571,0.060606,0.048193,0.09375,0.044444,0.077778,0.0,0.0,0.0,0.066667,0.636364,0.545455,0.0,0.166667,0.0,0.802817,0.636364
1,1,0.443038,1.0,0.51324,0.263246,3,16,176661,0.5,0,0.004182,2,0,157,18,0.02883,0.0,0.340909,0.339367,0.144444,0.0,0.0,0.005197,0.043595,0.1,1,0,1510,0.0,0.0,0.0,0.0,0.15873,0.0,0.0,0.0,0.0,0.0,0.144444,0.0,0.0,0.0,0.0,0.454545,0.636364,0.0,0.333333,0.0,0.816901,0.363636
2,2,0.291139,1.0,0.454829,0.167014,3,17,27721,0.8,0,0.006728,2,0,338,14,0.02377,0.0,0.227273,0.226244,0.122222,0.0,0.0,0.001586,0.058052,0.15625,0,0,1,0.0,0.0,0.047619,0.047619,0.0,0.0,0.159091,0.048193,0.039062,0.066667,0.122222,0.0,0.0,0.0,0.133333,0.727273,0.818182,0.0,0.5,0.0,0.873239,0.363636
3,3,0.265823,0.0,0.075935,0.192164,0,3,40375,1.0,1,0.010728,1,4,149,11,0.01821,0.0,0.272727,0.271493,0.1,0.0,0.0,0.003425,0.058949,0.1625,1,0,5,0.117647,0.121212,0.095238,0.095238,0.063492,0.228571,0.030303,0.084337,0.164062,0.133333,0.1,0.0,0.0,0.0,0.033333,0.727273,0.636364,0.0,0.833333,1.0,0.774648,0.363636
4,4,0.063291,0.0,0.299065,0.051169,2,11,51,0.0,1,0.002637,2,10,302,21,0.03316,0.0,0.295455,0.294118,0.133333,0.0,0.0,0.001013,0.035862,0.15625,0,0,12,0.019608,0.060606,0.111111,0.111111,0.031746,0.057143,0.068182,0.120482,0.117188,0.155556,0.133333,0.0,0.0,0.0,0.133333,0.818182,0.181818,0.0,0.166667,0.0,0.464789,0.636364
5,5,0.265823,0.0,0.104361,0.194333,0,4,44407,0.7,0,0.003546,2,9,513,21,0.01814,0.0,0.477273,0.475113,0.211111,0.0,0.0,0.001393,0.034854,0.3125,1,0,11,0.235294,0.030303,0.031746,0.031746,0.015873,0.014286,0.363636,0.024096,0.023438,0.044444,0.211111,0.0,0.0,0.0,0.0,0.909091,0.272727,0.0,0.833333,1.0,0.760563,0.545455
6,6,0.039241,0.0,0.092679,0.029352,0,3,146449,0.9,0,0.003182,0,0,518,14,0.01849,0.0,0.590909,0.588235,0.133333,0.0,0.0,0.001071,0.009526,0.13125,0,0,1,0.0,0.030303,0.047619,0.047619,0.111111,0.157143,0.022727,0.120482,0.140625,0.066667,0.133333,0.0,0.0,0.0,0.1,0.636364,0.818182,0.0,0.333333,0.0,0.873239,0.818182
7,7,0.278481,0.0,0.376558,0.226007,2,12,172154,0.1,1,0.002727,2,0,101,4,0.0336,0.0,0.181818,0.180995,0.088889,0.011628,0.083333,0.004827,0.066906,0.19375,1,0,1,0.0,0.121212,0.063492,0.063492,0.063492,0.228571,0.075758,0.060241,0.164062,0.088889,0.088889,0.0,0.0,0.0,0.066667,0.636364,0.0,0.0,0.333333,0.0,0.704225,1.0
8,8,0.291139,0.0,0.299065,0.229374,2,11,181,0.5,2,0.005455,1,0,793,13,0.02022,0.0,0.295455,0.294118,0.166667,0.0,0.0,0.009355,0.051552,0.11875,1,0,1,0.0,0.212121,0.206349,0.206349,0.111111,0.1,0.015152,0.156627,0.132812,0.244444,0.166667,0.0,0.0,0.0,0.2,0.818182,0.363636,0.0,1.0,1.0,0.704225,0.272727
9,9,0.151899,0.0,0.221184,0.116827,1,8,51,0.0,1,0.001391,2,0,60,11,0.02539,0.0,0.454545,0.452489,0.077778,0.0,0.0,0.001011,0.034293,0.38125,0,0,1,0.0,0.121212,0.079365,0.079365,0.063492,0.3,0.181818,0.072289,0.304688,0.111111,0.077778,0.0,0.0,0.0,0.266667,0.727273,0.909091,0.0,1.0,1.0,0.690141,0.0


(200000, 50)

Unnamed: 0,id,loanAmnt,term,interestRate,installment,grade,subGrade,employmentTitle,employmentLength,homeOwnership,annualIncome,verificationStatus,purpose,postCode,regionCode,dti,delinquency_2years,ficoRangeLow,ficoRangeHigh,openAcc,pubRec,pubRecBankruptcies,revolBal,revolUtil,totalAcc,initialListStatus,applicationType,title,n0,n1,n2,n3,n4,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14,issueDate_year,issueDate_month,issueDate_day,issueDate_weekday,issueDate_is_weekend,earliesCreditLine_year,earliesCreditLine_month
800000,800000,0.341772,0.0,0.221184,0.261119,1,7,6368,1.0,0,0.007273,0,0,164,21,0.01156,0.025641,0.409091,0.40724,0.188889,0.0,0.0,0.00339,0.034405,0.16875,0,0,1,0.019608,0.121212,0.095238,0.095238,0.095238,0.114286,0.030303,0.180723,0.148438,0.133333,0.188889,0.0,0.0,0.025641,0.1,0.636364,0.545455,0.0,0.166667,0.0,0.422535,0.909091
800001,800001,0.493671,1.0,0.363707,0.269265,2,14,51569,1.0,0,0.004546,0,2,236,8,0.0224,0.051282,0.204545,0.20362,0.055556,0.0,0.0,0.00308,0.063432,0.075,0,0,6,0.039216,0.030303,0.047619,0.047619,0.015873,0.014286,0.022727,0.036145,0.070312,0.066667,0.055556,0.0,0.0,0.051282,0.066667,0.727273,0.545455,0.0,0.333333,0.0,0.802817,0.545455
800002,800002,0.291139,0.0,0.571651,0.253848,3,18,20756,0.2,1,0.005455,2,0,527,20,0.0345,0.0,0.386364,0.384615,0.133333,0.0,0.0,0.000334,0.019724,0.25625,1,0,1,0.0,0.030303,0.063492,0.063492,0.015873,0.014286,0.272727,0.060241,0.046875,0.088889,0.133333,0.0,0.0,0.0,0.233333,0.818182,0.818182,0.0,0.833333,1.0,0.873239,0.636364
800003,800003,0.43038,1.0,0.350467,0.232754,2,13,592,0.4,0,0.003364,1,4,249,11,0.01495,0.0,0.272727,0.271493,0.111111,0.011628,0.083333,0.003528,0.058613,0.1,0,0,5,0.0,0.060606,0.031746,0.031746,0.063492,0.1,0.015152,0.096386,0.109375,0.044444,0.111111,0.0,0.0,0.0,0.1,0.636364,0.909091,0.0,0.833333,1.0,0.816901,0.545455
800004,800004,0.873418,0.0,0.458723,0.72611,3,15,290776,0.0,1,0.007273,1,0,116,8,0.02597,0.0,0.272727,0.271493,0.211111,0.0,0.0,0.011429,0.039897,0.125,0,0,1,0.0,0.242424,0.174603,0.174603,0.142857,0.157143,0.022727,0.192771,0.140625,0.244444,0.211111,0.0,0.0,0.0,0.033333,0.909091,0.818182,0.0,1.0,1.0,0.788732,1.0
800005,800005,0.392405,0.0,0.000389,0.274966,0,0,227,1.0,0,0.008182,0,0,481,8,0.01628,0.0,0.681818,0.678733,0.188889,0.0,0.0,0.002137,0.01356,0.20625,0,0,1,0.0,0.090909,0.079365,0.079365,0.126984,0.2,0.045455,0.156627,0.195312,0.111111,0.188889,0.0,0.0,0.0,0.1,0.909091,0.363636,0.0,0.0,0.0,0.788732,0.545455
800006,800006,0.620253,1.0,0.376947,0.341252,2,13,12,1.0,1,0.011501,2,0,311,24,0.02394,0.0,0.431818,0.429864,0.155556,0.0,0.0,0.010832,0.045164,0.14375,0,0,1,0.0,0.151515,0.111111,0.111111,0.095238,0.085714,0.045455,0.120482,0.132812,0.155556,0.155556,0.0,0.0,0.0,0.033333,0.818182,0.909091,0.0,0.166667,0.0,0.760563,0.0
800007,800007,0.101266,0.0,0.079439,0.073856,0,3,3117,0.6,0,0.004546,0,4,82,15,0.01432,0.0,0.568182,0.565611,0.088889,0.0,0.0,0.00179,0.024767,0.09375,1,1,5,0.0,0.090909,0.047619,0.047619,0.063492,0.085714,0.060606,0.048193,0.0625,0.066667,0.088889,0.0,0.0,0.0,0.033333,0.909091,0.818182,0.0,1.0,1.0,0.887324,0.090909
800008,800008,0.291139,1.0,0.437695,0.165351,3,16,327,0.3,1,0.005273,1,0,139,0,0.02417,0.051282,0.25,0.248869,0.155556,0.0,0.0,0.000476,0.020397,0.16875,0,0,1,0.019608,0.121212,0.063492,0.063492,0.063492,0.071429,0.159091,0.060241,0.0625,0.088889,0.155556,0.0,0.0,0.0,0.133333,0.727273,0.818182,0.0,0.5,0.0,0.84507,0.363636
800009,800009,0.405063,0.0,0.092679,0.294279,0,3,7325,1.0,0,0.012728,0,4,77,26,0.01493,0.0,0.409091,0.40724,0.111111,0.0,0.0,0.005826,0.084725,0.1625,0,0,5,0.039216,0.060606,0.063492,0.063492,0.031746,0.1,0.075758,0.048193,0.101562,0.088889,0.111111,0.0,0.0,0.0,0.1,0.636364,0.636364,0.0,0.666667,0.0,0.676056,0.727273


(800000,)

0    1.0
1    0.0
2    0.0
3    0.0
4    0.0
5    0.0
6    0.0
7    0.0
8    1.0
9    0.0
Name: isDefault, dtype: float64

In [121]:
def cv_model(clf, train_x, train_y, test_x, clf_name):
    folds = 5
    seed = 2020
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)

    train = np.zeros(train_x.shape[0])
    test = np.zeros(test_x.shape[0])

    cv_scores = []

    for i, (train_index, valid_index) in enumerate(kf.split(train_x, train_y)):
        print('************************************ {} ************************************'.format(str(i+1)))
        trn_x, trn_y, val_x, val_y = train_x.iloc[train_index], train_y[train_index], train_x.iloc[valid_index], train_y[valid_index]

        if clf_name == "lgb":
            train_matrix = clf.Dataset(trn_x, label=trn_y)
            valid_matrix = clf.Dataset(val_x, label=val_y)

            params = {
                'boosting_type': 'gbdt',
                'objective': 'binary',
                'metric': 'auc',
                'min_child_weight': 5,
                'num_leaves': 2 ** 5,
                'lambda_l2': 10,
                'feature_fraction': 0.8,
                'bagging_fraction': 0.8,
                'bagging_freq': 4,
                'learning_rate': 0.1,
                'seed': 2020,
                'nthread': 28,
                'n_jobs':24,
                'silent': True,
                'verbose': -1,
            }

            model = clf.train(params, train_matrix, 50000, valid_sets=[train_matrix, valid_matrix], 
                              callbacks = [log_evaluation(period=100), early_stopping(stopping_rounds=200)])
            # model = clf.train(params, train_matrix, 50000, valid_sets=[train_matrix, valid_matrix])
            val_pred = model.predict(val_x, num_iteration=model.best_iteration)
            test_pred = model.predict(test_x, num_iteration=model.best_iteration)
            
            # print(list(sorted(zip(features, model.feature_importance("gain")), key=lambda x: x[1], reverse=True))[:20])
                
        if clf_name == "xgb":
            train_matrix = clf.DMatrix(trn_x , label=trn_y)
            valid_matrix = clf.DMatrix(val_x , label=val_y)
            if i == 0:
                # 如果重复给test更改为DMatrix，会报错。因为DMatrix不支持输入DMatrix类型的数据。
                test_x = clf.DMatrix(data=test_x)
            
            
            params = {'booster': 'gbtree',
                      'objective': 'binary:logistic',
                      'eval_metric': 'auc',
                      'gamma': 1,
                      'min_child_weight': 1.5,
                      'max_depth': 5,
                      'lambda': 10,
                      'subsample': 0.7,
                      'colsample_bytree': 0.7,
                      'colsample_bylevel': 0.7,
                      'eta': 0.04,
                      'tree_method': 'exact',
                      'seed': 2020,
                      'nthread': 36,
                      "silent": True,
                      }
            
            watchlist = [(train_matrix, 'train'),(valid_matrix, 'eval')]
            
            model = clf.train(params, train_matrix, num_boost_round=50000, evals=watchlist, verbose_eval=200, early_stopping_rounds=200)
            val_pred  = model.predict(valid_matrix)
            test_pred = model.predict(test_x)
                 
        if clf_name == "cat":
            params = {'learning_rate': 0.05, 'depth': 5, 'l2_leaf_reg': 10, 'bootstrap_type': 'Bernoulli',
                      'od_type': 'Iter', 'od_wait': 50, 'random_seed': 11, 'allow_writing_files': False}
            
            model = clf(iterations=20000, **params)
            model.fit(trn_x, trn_y, eval_set=(val_x, val_y),
                      cat_features=[], use_best_model=True, verbose=500)
            
            val_pred  = model.predict(val_x)
            test_pred = model.predict(test_x)
            
        train[valid_index] = val_pred
        test += test_pred / kf.n_splits
        cv_scores.append(roc_auc_score(val_y, val_pred))
        
        print(cv_scores)
        
    print("%s_scotrainre_list:" % clf_name, cv_scores)
    print("%s_score_mean:" % clf_name, np.mean(cv_scores))
    print("%s_score_std:" % clf_name, np.std(cv_scores))
    return train, test

In [122]:
def lgb_model(x_train, y_train, x_test):
    # https://www.heywhale.com/mw/project/6585325cdcad99bb0a1f4686
    lgb_train, lgb_test = cv_model(lgb, x_train, y_train, x_test, "lgb")
    return lgb_train, lgb_test

def xgb_model(x_train, y_train, x_test):
    # https://www.cnblogs.com/Mephostopheles/p/18397154
    xgb_train, xgb_test = cv_model(xgb, x_train, y_train, x_test, "xgb")
    return xgb_train, xgb_test

def cat_model(x_train, y_train, x_test):
    cat_train, cat_test = cv_model(CatBoostRegressor, x_train, y_train, x_test, "cat")
    return cat_train, cat_test

In [123]:
# lgb_train, lgb_test = lgb_model(train_data, target, test_data)
xgb_train, xgb_test = xgb_model(train_data, target, test_data)
# cat_train, cat_test = cat_model(train_data, target, test_data)

************************************ 1 ************************************


Parameters: { "silent" } are not used.



[0]	train-auc:0.69969	eval-auc:0.70089
[200]	train-auc:0.72874	eval-auc:0.72579
[400]	train-auc:0.73605	eval-auc:0.72915
[600]	train-auc:0.74094	eval-auc:0.73051
[800]	train-auc:0.74481	eval-auc:0.73114
[1000]	train-auc:0.74859	eval-auc:0.73167
[1200]	train-auc:0.75195	eval-auc:0.73202
[1400]	train-auc:0.75501	eval-auc:0.73222
[1600]	train-auc:0.75793	eval-auc:0.73248
[1800]	train-auc:0.76079	eval-auc:0.73256
[1933]	train-auc:0.76258	eval-auc:0.73254
[0.7325413655498854]
************************************ 2 ************************************


Parameters: { "silent" } are not used.



[0]	train-auc:0.70092	eval-auc:0.69754
[200]	train-auc:0.72942	eval-auc:0.72167
[400]	train-auc:0.73661	eval-auc:0.72528
[600]	train-auc:0.74143	eval-auc:0.72667
[800]	train-auc:0.74537	eval-auc:0.72747
[1000]	train-auc:0.74902	eval-auc:0.72790
[1200]	train-auc:0.75249	eval-auc:0.72835
[1400]	train-auc:0.75573	eval-auc:0.72852
[1600]	train-auc:0.75874	eval-auc:0.72867
[1800]	train-auc:0.76160	eval-auc:0.72881
[2000]	train-auc:0.76431	eval-auc:0.72876
[2022]	train-auc:0.76463	eval-auc:0.72876
[0.7325413655498854, 0.7287646846428065]
************************************ 3 ************************************


Parameters: { "silent" } are not used.



[0]	train-auc:0.70012	eval-auc:0.69947
[200]	train-auc:0.72863	eval-auc:0.72515
[400]	train-auc:0.73582	eval-auc:0.72852
[600]	train-auc:0.74084	eval-auc:0.73012
[800]	train-auc:0.74493	eval-auc:0.73090
[1000]	train-auc:0.74849	eval-auc:0.73140


In [72]:
sample_result = pd.read_csv("./data/sample_submit.csv")
sample_result["isDefault"] = xgb_test
sample_result.to_csv("./data/sample_result_xgb_.csv", index=False)