In [1]:
import lightgbm as lgbm
import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
import matplotlib
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import pickle
from sklearn import preprocessing
from sklearn.metrics import roc_curve
from scipy import stats
from scipy.stats import zscore
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold as SKF
from sklearn import metrics
# from fancyimpute import *

from utils import *
%matplotlib inline
%load_ext autoreload
%autoreload 2

dtype = load_obj('dict_dtype')
my_dict = load_obj('my_dict')

In [2]:
data = pd.read_csv("atec_anti_fraud_train.csv",parse_dates=['date'], dtype = dtype)
test = pd.read_csv("atec_anti_fraud_test_b.csv",parse_dates=['date'], dtype = dtype)

data = process_dates(data, 'date')
test = process_dates(test, 'date')

In [3]:
# set the label of unlabeled data to 1
data.loc[data['label']==-1,'label'] = 1

#data = data[data['label']!=-1]

# sort the data with date
data.sort_values('date',inplace=True)

print(data.shape)

(994731, 304)


### xgboost without 5-fold (local: 0.409, lb: 0.309)

In [None]:
# set up the parameters
params = {'max_depth': 6, 'eta': 0.1, 'silent': 1, 'objective': 'binary:logistic'}
params['nthread'] = 4
params['eval_metric'] = ['logloss','auc']
params["scale_pos_weight"] = 2
params["subsample"] = 0.7
params["colsample_bytree "] = 0.8
num_rounds = 10000
early_stopping_rounds = 50

# set up the random seed for testing
params["seed"] = 6

xgtrain = xgb.DMatrix(data.drop(['id','date','label','Day'],axis=1).values, data.label.values)

evallist = [(xgtrain, 'train'), (xgvalid, 'valid')]
bst = xgb.train(params,
                xgtrain,
                num_rounds,
                evallist,
                # feval=my_score3,
                early_stopping_rounds=early_stopping_rounds)
# save the model
bst.save_model('model_log/single_1/0001.model')

cv_pred += bst.predict(xgtest)
valid_pred += bst.predict(xgvalid, ntree_limit=bst.best_iteration)

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))
pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0003.csv', index=False)

### 5-fold time dependent cv train on first 80% data xgboost (local:0.42, lb:0.3383)

In [None]:
# use the last 20% data as validation set
valid = data.iloc[data.shape[0]*4//5:,:]
data = data.iloc[:data.shape[0]*4//5,:]

print(data.shape[0] + valid.shape[0], data.shape[1])

test_id = test.id
cv_pred = np.zeros(len(test_id))
xgtest = xgb.DMatrix(test.drop(['id','date','Day'],axis=1).values)

valid_pred = np.zeros(valid.shape[0])
valid_id = valid.id
xgvalid = xgb.DMatrix(valid.drop(['id','date','label','Day'],axis=1).values, valid.label.values)

In [None]:
for i in range(0,9,2):
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    # set up the parameters
    params = {'max_depth': 6, 'eta': 0.1, 'silent': 1, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = ['logloss','auc']
    params["scale_pos_weight"] = 2
    params["subsample"] = 0.7
    params["colsample_bytree "] = 0.8
    num_rounds = 10000
    early_stopping_rounds = 50

    # set up the random seed for testing
    params["seed"] = 6
    
    xgtrain = xgb.DMatrix(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    
    evallist = [(xgtrain, 'train'), (xgvalid, 'valid')]
    bst = xgb.train(params,
                    xgtrain,
                    num_rounds,
                    evallist,
                    #feval=my_score3,
                    early_stopping_rounds=early_stopping_rounds)
    # save the model
    bst.save_model('model_log/multi_2/part_{}.model'.format(i//2))
    
    valid_pred += bst.predict(xgvalid, ntree_limit=bst.best_iteration)
    cv_pred += bst.predict(xgtest, ntree_limit=bst.best_iteration)

valid_pred /= 5    
cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0002.csv', index=False)

### oversampling + 5-fold time dependent cv train on first 80% data xgboost (local: 0.418)

In [None]:
# use the last 20% data as validation set
valid = data.iloc[data.shape[0]*4//5:,:]
data = data.iloc[:data.shape[0]*4//5,:]
# positive = data[data['label']==1]
# data = data[data['label']==0]

print(data.shape[0] + valid.shape[0], data.shape[1])

test_id = test.id
cv_pred = np.zeros(len(test_id))
xgtest = xgb.DMatrix(test.drop(['id','date','Day'],axis=1).values)

valid_pred = np.zeros(valid.shape[0])
valid_id = valid.id
xgvalid = xgb.DMatrix(valid.drop(['id','date','label','Day'],axis=1).values, valid.label.values)

In [None]:
for i in range(0,9,2):
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    
    train = data[data.Day.isin(day_target)]
#     train = train.append(positive)
    
    # set up the parameters
    params = {'max_depth': 6, 'eta': 0.1, 'silent': 1, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = ['logloss','auc']
    params["scale_pos_weight"] = 2
#     params["subsample"] = 0.7
    params["colsample_bytree "] = 0.8
    num_rounds = 10000
    early_stopping_rounds = 50

    # set up the random seed for testing
    params["seed"] = 6
    
    xgtrain = xgb.DMatrix(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    
    evallist = [(xgtrain, 'train'), (xgvalid, 'valid')]
    bst = xgb.train(params,
                    xgtrain,
                    num_rounds,
                    evallist,
                    #feval=my_score3,
                    early_stopping_rounds=early_stopping_rounds)
    # save the model
    bst.save_model('model_log/multi_2/part_{}.model'.format(i//2))
    
    valid_pred += bst.predict(xgvalid, ntree_limit=bst.best_iteration)
    cv_pred += bst.predict(xgtest, ntree_limit=bst.best_iteration)

valid_pred /= 5    
cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0007.csv', index=False)

### Train on the whole dataset xgboost 5-fold (no local, lb:0.3394)

In [None]:
test_id = test.id
cv_pred = np.zeros(len(test_id))
xgtest = xgb.DMatrix(test.drop(['id','date','Day'],axis=1).values)

In [None]:
tuned_rounds = [226, 327, 136, 418, 289]
for i in range(0,9,2):
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    # set up the parameters
    params = {'max_depth': 6, 'eta': 0.1, 'silent': 1, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = ['logloss','auc']
    params["scale_pos_weight"] = 2
    params["subsample"] = 0.7
    params["colsample_bytree "] = 0.8
    num_rounds = tuned_rounds[i//2]
    early_stopping_rounds = 50

    # set up the random seed for testing
    params["seed"] = 6
    
    xgtrain = xgb.DMatrix(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    
    evallist = [(xgtrain, 'train')]
    bst = xgb.train(params,
                    xgtrain,
                    num_rounds,
                    evallist,
                    #feval=my_score3,
                    early_stopping_rounds=early_stopping_rounds)
    # save the model
    bst.save_model('model_log/multi_3/part_{}.model'.format(i//2))
    
    cv_pred += bst.predict(xgtest, ntree_limit=bst.best_iteration)

cv_pred /= 5
pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0002.csv', index=False)

### oversampling + 5-fold time dependent cv train on first 80% data lightGBM (local: 0006.csv->0.4155) 

In [None]:
# use the last 20% data as validation set
valid = data.iloc[data.shape[0]*4//5:,:]
data = data.iloc[:data.shape[0]*4//5,:]
# positive = data[data['label']==1]
# data = data[data['label']==0]

# print(data.shape[0] + valid.shape[0], data.shape[1])

test_id = test.id
cv_pred = np.zeros(len(test_id))

valid_pred = np.zeros(valid.shape[0])
valid_id = valid.id

In [None]:
learning_rate = 0.1
num_leaves = 15
min_data_in_leaf = 2000
feature_fraction = 0.6
num_boost_round = 10000
params = {"objective": "binary",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "max_bin": 256,
          "feature_fraction": feature_fraction,
          "verbosity": 0,
          "drop_rate": 0.1,
          # "is_unbalance": True,
          "max_drop": 50,
          "min_child_samples": 10,
          "min_child_weight": 150,
          "min_split_gain": 0,
          "subsample": 0.9,
          "scale_pos_weight": 10
          }
for i in range(0,9,2):
    params['seed'] = i
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    
    train = data[data.Day.isin(day_target)]
#     train = train.append(positive)
    
    dtrain = lgbm.Dataset(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    dvalid = lgbm.Dataset(valid.drop(['id','date','label','Day'],axis=1).values, valid.label.values, reference=dtrain)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid, feval=my_score2, verbose_eval=100,
                     early_stopping_rounds=100)
    # make the predictions
    cv_pred += bst.predict(test.drop(['id','date','Day'],axis=1).values, num_iteration=bst.best_iteration)
    valid_pred += bst.predict(valid.drop(['id','date','label','Day'],axis=1).values, num_iteration=bst.best_iteration)

valid_pred /= 5    
cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0008.csv', index=False)

### 5-fold time dependent cv train on whole data lightGBM ( no local, lb: 0.33) 

In [4]:
# use the last 20% data as validation set
# valid = data.iloc[data.shape[0]*4//5:,:]
# data = data.iloc[:data.shape[0]*4//5,:]

# print(data.shape[0] + valid.shape[0], data.shape[1])

test_id = test.id
cv_pred = np.zeros(len(test_id))

# valid_pred = np.zeros(valid.shape[0])
# valid_id = valid.id

In [5]:
learning_rate = 0.1
num_leaves = 15
min_data_in_leaf = 2000
feature_fraction = 0.6
num_boost_rounds = [166, 125, 266, 159, 266]
params = {"objective": "binary",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "max_bin": 256,
          "feature_fraction": feature_fraction,
          "verbosity": 0,
          "drop_rate": 0.1,
          "is_unbalance": False,
          "max_drop": 50,
          "min_child_samples": 10,
          "min_child_weight": 150,
          "min_split_gain": 0,
          "subsample": 1,
          #"metric": 'auc'
          }
for i in range(0,9,2):
    params['seed'] = i
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    dtrain = lgbm.Dataset(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    bst = lgbm.train(params, dtrain, num_boost_rounds[i//2],\
                     #valid_sets=dvalid,\
                     feval=my_score2,\
                     verbose_eval=100)
    # make the predictions
    cv_pred += bst.predict(test.drop(['id','date','Day'],axis=1).values, num_iteration=bst.best_iteration)

cv_pred /= 5

# print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0011.csv', index=False)

### 5-fold time dependent cv train on first 80% data lightGBM (local: 0.415, lb: 0.3413) 

#### training process

In [4]:
# use the last 20% data as validation set
valid = data.iloc[data.shape[0]*4//5:,:]
data = data.iloc[:data.shape[0]*4//5,:]

# print(data.shape[0] + valid.shape[0], data.shape[1])

test_id = test.id
cv_pred = np.zeros(len(test_id))

valid_pred = np.zeros(valid.shape[0])
#valid_id = valid.id

In [5]:
print(data.date.max(), valid.date.max())

2017-10-24 00:00:00 2017-11-05 00:00:00


In [8]:
learning_rate = 0.1
num_leaves = 15
min_data_in_leaf = 2000
feature_fraction = 0.6
num_boost_round = 10000
params = {"objective": "binary",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "max_bin": 256,
          "feature_fraction": feature_fraction,
          "verbosity": 0,
          "drop_rate": 0.1,
          "is_unbalance": False,
          "max_drop": 50,
          "min_child_samples": 10,
          "min_child_weight": 150,
          "min_split_gain": 0,
          "subsample": 0.9,
          #"metric": 'auc'
          }
for i in range(0,9,2):
    params['seed'] = i
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    dtrain = lgbm.Dataset(train.drop(['id','date','label','Day'],axis=1).values, train.label.values)
    dvalid = lgbm.Dataset(valid.drop(['id','date','label','Day'],axis=1).values, valid.label.values, reference=dtrain)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid,\
                     feval=my_score2,\
                     verbose_eval=100,
                     early_stopping_rounds=100)
    # make the predictions
    cv_pred += bst.predict(test.drop(['id','date','Day'],axis=1).values, num_iteration=bst.best_iteration)
    valid_pred += bst.predict(valid.drop(['id','date','label','Day'],axis=1).values, num_iteration=bst.best_iteration)

valid_pred /= 5    
cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

# pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0009.csv', index=False)

Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.0391239	valid_0's score: 0.408669
[200]	valid_0's binary_logloss: 0.0382933	valid_0's score: 0.409345
Early stopping, best iteration is:
[166]	valid_0's binary_logloss: 0.0383707	valid_0's score: 0.4139
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.0393144	valid_0's score: 0.403379
[200]	valid_0's binary_logloss: 0.0386405	valid_0's score: 0.40047
Early stopping, best iteration is:
[125]	valid_0's binary_logloss: 0.0388871	valid_0's score: 0.408022
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.0385751	valid_0's score: 0.411784
[200]	valid_0's binary_logloss: 0.0378128	valid_0's score: 0.414575
[300]	valid_0's binary_logloss: 0.0377245	valid_0's score: 0.414487
Early stopping, best iteration is:
[266]	valid_0's binary_logloss: 0.0376993	valid_0's score: 0.417896
Training until validation sco

#### analysis on false negatives

In [9]:
train_cate = pd.read_csv("obj/id_cate_train.csv")
valid_pred = pd.DataFrame({'id': valid.id, 'score': valid_pred, 'label':  valid.label})
valid_pred = valid_pred.merge(train_cate, how='inner', on='id')
valid_pred.head()

Unnamed: 0,id,score,label,cate
0,96c11ea394631bcc2ae77455cd10cc3dc147f78632921e...,0.000395,0,2
1,eafae698fc60c7a192a1d13074215a25ac24a2e47e12c2...,0.000187,0,0
2,02dc77c122c4036ccab9f003aa760865b6f79632f930e5...,0.000227,0,2
3,e8c3a7fbd718708534a436d217a76c86ac2e0683b65ae1...,0.003345,0,7
4,d89c7b05241c510b3b24371fccd7be3667099f89f4a2ec...,0.248242,1,7


In [11]:
percent = valid_pred.label.values.sum()/valid_pred.shape[0]

threshold = valid_pred.score.quantile(1-percent)
valid_pred['pred_label'] = valid_pred.score.apply(lambda x: int(x>threshold))
valid_pred['isCorrect'] = valid_pred['label']==valid_pred['pred_label']
valid_pred['isCorrect'] = valid_pred['isCorrect'].astype('int')
valid_pred.head()

0.01710505813106003

In [24]:
valid_pred[valid_pred['isCorrect']==0].groupby('cate').id.count()

cate
0      935
1       29
2      431
3        4
4        1
5       81
6        3
7     1303
8       12
9       51
10      15
11      16
13       1
14      18
18      10
21       1
Name: id, dtype: int64

In [25]:
valid_pred.groupby('cate').id.count()

cate
0     86839
1      5361
2     41173
3      9274
4       319
5      9410
6      3603
7     24768
8     10426
9      1549
10     1934
11      602
12       27
13      196
14     3010
15       41
16      120
17       37
18      151
19        4
20       55
21        2
22        4
23       16
24       11
25        3
26        3
28        6
30        1
31        1
32        1
Name: id, dtype: int64

In [27]:
valid_pred[valid_pred['label']==1][valid_pred['isCorrect']==0].groupby('cate').id.count()

  """Entry point for launching an IPython kernel.


cate
0     396
1      11
2     160
3       4
5      64
6       3
7     738
8      12
9      28
10     12
11     14
13      1
14     12
18      1
Name: id, dtype: int64

In [28]:
valid_pred[valid_pred['label']==1].groupby('cate').id.count()

cate
0     1300
1       21
2      486
3        4
5       64
6        3
7     1415
8       19
9       35
10      15
11      16
13       9
14      15
18       1
Name: id, dtype: int64

In [29]:
display_all(valid_pred[valid_pred['label']==1][valid_pred['isCorrect']==0])

  """Entry point for launching an IPython kernel.


Unnamed: 0,id,score,label,cate,pred_label,isCorrect
4,d89c7b05241c510b3b24371fccd7be3667099f89f4a2ec...,0.248242,1,7,0,0
124,694cf434f6355cfc7856a1568ac3ff488609fadcb5fe8a...,0.273763,1,7,0,0
244,40eb54700d247e26fb28f56a18ce9b6aa5254b7f3d41a4...,0.050994,1,14,0,0
254,d9112cda7ae661fd2dad10a5f929e61d5fa17fdef124bb...,0.002252,1,7,0,0
293,30821797d2f24d7f1dc2dcce21cc3c96be719928abd76e...,0.226805,1,7,0,0
392,27f2a4d7eaeb69a2d45f8a5a143d48677afd456a9dcffa...,0.024728,1,0,0,0
766,678e3ef9449e3d53153660ea0d9411760e2cd83390a4ba...,0.036332,1,7,0,0
875,9333010380982d928c22dc3493d0278612166f010695bd...,0.086421,1,7,0,0
1005,34f99ed9074858c2f5ae38f2e7df27619c6254144f426a...,0.181416,1,0,0,0
1085,52dafb68c5f8c8e36730f1705843422e2c418ec5867df9...,0.174863,1,14,0,0


#### add weights depending on cate (failed)

In [31]:
data = pd.read_csv("atec_anti_fraud_train.csv",parse_dates=['date'], dtype = dtype)
data = data.merge(train_cate, how='inner', on='id')
data = data[['id', 'label', 'cate']]

tmp1 = data[data['label']==0].groupby('cate').id.count()
tmp2 = data[data['label']==1].groupby('cate').id.count()

df_tmp = pd.concat([tmp1,tmp2],axis=1).fillna(0)
df_tmp.columns = ['total', 'num_positive']

df_tmp['cate'] = df_tmp.index
df_tmp['weight'] = df_tmp['num_positive']/df_tmp['total']
df_tmp['weight'] = df_tmp['weight']/df_tmp['weight'].sum()

pred_valid_withWeight = pd.merge(valid_pred, df_tmp, how='left', on='cate')
pred_valid_withWeight['weighted_score'] = pred_valid_withWeight.score.multiply(pred_valid_withWeight.weight)

print('Final score of validation is {}'.format(my_score1(valid.label.values, pred_valid_withWeight.weighted_score.values)))

In [105]:
pred_valid_withWeight['weighted_score'] = pred_valid_withWeight.score

pred_valid_withWeight.loc[pred_valid_withWeight.cate==7,'weighted_score'] = pred_valid_withWeight.loc[pred_valid_withWeight.cate==7,'weighted_score']
pred_valid_withWeight.loc[pred_valid_withWeight.cate==0,'weighted_score'] = pred_valid_withWeight.loc[pred_valid_withWeight.cate==0,'weighted_score']

In [106]:
print('Final score of validation is {}'.format(my_score1(valid.label.values, pred_valid_withWeight.weighted_score.values)))

Final score of validation is 0.268939171319424


#### just focus on category 7

In [3]:
train_cate = pd.read_csv("obj/id_cate_train.csv")
data = pd.read_csv("atec_anti_fraud_train.csv",parse_dates=['date'], dtype = dtype)
data.loc[data['label']==-1,'label'] = 1
data = process_dates(data, 'date')
data = data.merge(train_cate, how='inner', on='id')
data.sort_values('date',inplace=True)

In [4]:
temp = data[data['cate']==7].head(1)\
    .drop(['id','label','date', 'cate', 'Day', 'Dayofweek', 'Is_month_end', 'Is_month_start'], axis=1).values[0]
temp_list = list()
for j in range(297):
    if np.isnan(temp[j]):
        temp_list.append(0)
    else:
        temp_list.append(1)

feature_list = list()
for i in range(297):
    if temp_list[i]:
        feature_list.append('f{}'.format(i+1))

In [5]:
valid = data.iloc[data.shape[0]*4//5:,:][['id','label','date', 'cate', 'Day', 'Dayofweek', 'Is_month_end', 'Is_month_start']+feature_list]
data = data.iloc[:data.shape[0]*4//5,:][['id','label','date', 'cate', 'Day', 'Dayofweek', 'Is_month_end', 'Is_month_start']+feature_list]
valid = valid[valid['cate']==7]
#data = data[data['cate']==7]

In [6]:
# initialize the predictions
valid_pred = np.zeros(valid.shape[0])

In [7]:
# set up the parameters
params = {'max_depth': 5, 'eta': 0.009, 'silent': 1, 'objective': 'binary:logistic'}
params['nthread'] = 4
params['eval_metric'] = ['logloss','auc']
params["scale_pos_weight"] = 2
params["subsample"] = 0.5
params["colsample_bytree "] = 0.5
num_rounds = 10000
early_stopping_rounds = 200

# set up the random seed for testing
params["seed"] = 6

xgtrain = xgb.DMatrix(data.drop(['id','date','label','Day','cate'],axis=1).values, data.label.values)
xgvalid = xgb.DMatrix(valid.drop(['id','date','label','Day','cate'],axis=1).values, valid.label.values)

evallist = [(xgtrain, 'train'), (xgvalid, 'valid')]
bst = xgb.train(params,
                xgtrain,
                num_rounds,
                evallist,
                # feval=my_score3,
                early_stopping_rounds=early_stopping_rounds)
# # save the model
# bst.save_model('model_log/single_1/0001.model')

valid_pred += bst.predict(xgvalid, ntree_limit=bst.best_iteration)

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))
# pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0003.csv', index=False)

[0]	train-logloss:0.684848	train-auc:0.874014	valid-logloss:0.686086	valid-auc:0.808284
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 200 rounds.
[1]	train-logloss:0.676695	train-auc:0.874631	valid-logloss:0.679138	valid-auc:0.811398
[2]	train-logloss:0.668684	train-auc:0.874739	valid-logloss:0.672319	valid-auc:0.814056
[3]	train-logloss:0.660814	train-auc:0.886912	valid-logloss:0.665545	valid-auc:0.822289
[4]	train-logloss:0.653086	train-auc:0.886881	valid-logloss:0.658985	valid-auc:0.82283
[5]	train-logloss:0.645485	train-auc:0.887078	valid-logloss:0.652497	valid-auc:0.823653
[6]	train-logloss:0.638013	train-auc:0.888848	valid-logloss:0.646004	valid-auc:0.826896
[7]	train-logloss:0.63066	train-auc:0.888533	valid-logloss:0.639737	valid-auc:0.825335
[8]	train-logloss:0.623428	train-auc:0.888684	valid-logloss:0.633567	valid-auc:0.825992
[9]	train-logloss:0.616321	train-auc:0.888851	valid-logloss:0.6274

[91]	train-logloss:0.277893	train-auc:0.906023	valid-logloss:0.337056	valid-auc:0.838845
[92]	train-logloss:0.27558	train-auc:0.906047	valid-logloss:0.335057	valid-auc:0.838922
[93]	train-logloss:0.273293	train-auc:0.906083	valid-logloss:0.333051	valid-auc:0.839414
[94]	train-logloss:0.27103	train-auc:0.906098	valid-logloss:0.331139	valid-auc:0.83948
[95]	train-logloss:0.268795	train-auc:0.906127	valid-logloss:0.329214	valid-auc:0.839313
[96]	train-logloss:0.266585	train-auc:0.906195	valid-logloss:0.327246	valid-auc:0.840025
[97]	train-logloss:0.264398	train-auc:0.906203	valid-logloss:0.325385	valid-auc:0.839863
[98]	train-logloss:0.26224	train-auc:0.906228	valid-logloss:0.323552	valid-auc:0.839796
[99]	train-logloss:0.260108	train-auc:0.906195	valid-logloss:0.321776	valid-auc:0.839733
[100]	train-logloss:0.257999	train-auc:0.906241	valid-logloss:0.319925	valid-auc:0.839806
[101]	train-logloss:0.255913	train-auc:0.906239	valid-logloss:0.318153	valid-auc:0.839955
[102]	train-logloss:0.2

[183]	train-logloss:0.143453	train-auc:0.912499	valid-logloss:0.223325	valid-auc:0.855414
[184]	train-logloss:0.142598	train-auc:0.912486	valid-logloss:0.222619	valid-auc:0.855394
[185]	train-logloss:0.14175	train-auc:0.912462	valid-logloss:0.221929	valid-auc:0.855253
[186]	train-logloss:0.140908	train-auc:0.912464	valid-logloss:0.221225	valid-auc:0.85524
[187]	train-logloss:0.140071	train-auc:0.912506	valid-logloss:0.220547	valid-auc:0.855474
[188]	train-logloss:0.139243	train-auc:0.913772	valid-logloss:0.219901	valid-auc:0.855586
[189]	train-logloss:0.138425	train-auc:0.91384	valid-logloss:0.219267	valid-auc:0.855956
[190]	train-logloss:0.137615	train-auc:0.913895	valid-logloss:0.218609	valid-auc:0.856159
[191]	train-logloss:0.136816	train-auc:0.913942	valid-logloss:0.217961	valid-auc:0.856696
[192]	train-logloss:0.136018	train-auc:0.914048	valid-logloss:0.217304	valid-auc:0.856934
[193]	train-logloss:0.135236	train-auc:0.914067	valid-logloss:0.216693	valid-auc:0.857107
[194]	train-l

[275]	train-logloss:0.091314	train-auc:0.923783	valid-logloss:0.181506	valid-auc:0.865044
[276]	train-logloss:0.090969	train-auc:0.923821	valid-logloss:0.181238	valid-auc:0.865142
[277]	train-logloss:0.090624	train-auc:0.923847	valid-logloss:0.180972	valid-auc:0.865244
[278]	train-logloss:0.09028	train-auc:0.923934	valid-logloss:0.18069	valid-auc:0.865322
[279]	train-logloss:0.089941	train-auc:0.923999	valid-logloss:0.18041	valid-auc:0.865338
[280]	train-logloss:0.089607	train-auc:0.923995	valid-logloss:0.180152	valid-auc:0.865482
[281]	train-logloss:0.089276	train-auc:0.923993	valid-logloss:0.179899	valid-auc:0.86566
[282]	train-logloss:0.088948	train-auc:0.923986	valid-logloss:0.179622	valid-auc:0.866315
[283]	train-logloss:0.088623	train-auc:0.92402	valid-logloss:0.179352	valid-auc:0.86635
[284]	train-logloss:0.0883	train-auc:0.924059	valid-logloss:0.179109	valid-auc:0.866501
[285]	train-logloss:0.087982	train-auc:0.924109	valid-logloss:0.178872	valid-auc:0.866502
[286]	train-loglos

[367]	train-logloss:0.069831	train-auc:0.930787	valid-logloss:0.164873	valid-auc:0.869907
[368]	train-logloss:0.069681	train-auc:0.930827	valid-logloss:0.164769	valid-auc:0.869986
[369]	train-logloss:0.069534	train-auc:0.930906	valid-logloss:0.164641	valid-auc:0.87012
[370]	train-logloss:0.06939	train-auc:0.930969	valid-logloss:0.164536	valid-auc:0.87019
[371]	train-logloss:0.069247	train-auc:0.931023	valid-logloss:0.164425	valid-auc:0.870177
[372]	train-logloss:0.069106	train-auc:0.931059	valid-logloss:0.164326	valid-auc:0.870238
[373]	train-logloss:0.068967	train-auc:0.931121	valid-logloss:0.164186	valid-auc:0.870425
[374]	train-logloss:0.068827	train-auc:0.931199	valid-logloss:0.164093	valid-auc:0.870434
[375]	train-logloss:0.068692	train-auc:0.93124	valid-logloss:0.163995	valid-auc:0.870571
[376]	train-logloss:0.068557	train-auc:0.931322	valid-logloss:0.163926	valid-auc:0.87049
[377]	train-logloss:0.068423	train-auc:0.931676	valid-logloss:0.163805	valid-auc:0.870587
[378]	train-log

[459]	train-logloss:0.060583	train-auc:0.936827	valid-logloss:0.157925	valid-auc:0.875064
[460]	train-logloss:0.060518	train-auc:0.936867	valid-logloss:0.157882	valid-auc:0.875102
[461]	train-logloss:0.060453	train-auc:0.936911	valid-logloss:0.157823	valid-auc:0.875207
[462]	train-logloss:0.060387	train-auc:0.936987	valid-logloss:0.157759	valid-auc:0.875276
[463]	train-logloss:0.060323	train-auc:0.937015	valid-logloss:0.15772	valid-auc:0.875276
[464]	train-logloss:0.060261	train-auc:0.937056	valid-logloss:0.157673	valid-auc:0.875306
[465]	train-logloss:0.060198	train-auc:0.937063	valid-logloss:0.157647	valid-auc:0.875306
[466]	train-logloss:0.060138	train-auc:0.937091	valid-logloss:0.157589	valid-auc:0.875323
[467]	train-logloss:0.060078	train-auc:0.937119	valid-logloss:0.157553	valid-auc:0.875356
[468]	train-logloss:0.060016	train-auc:0.937157	valid-logloss:0.157516	valid-auc:0.8754
[469]	train-logloss:0.059957	train-auc:0.937191	valid-logloss:0.157467	valid-auc:0.875485
[470]	train-l

[551]	train-logloss:0.056411	train-auc:0.939878	valid-logloss:0.154907	valid-auc:0.878049
[552]	train-logloss:0.056379	train-auc:0.939898	valid-logloss:0.154873	valid-auc:0.878146
[553]	train-logloss:0.056349	train-auc:0.939932	valid-logloss:0.15486	valid-auc:0.878191
[554]	train-logloss:0.05632	train-auc:0.939948	valid-logloss:0.154858	valid-auc:0.878156
[555]	train-logloss:0.05629	train-auc:0.939976	valid-logloss:0.154858	valid-auc:0.878145
[556]	train-logloss:0.056261	train-auc:0.940014	valid-logloss:0.154844	valid-auc:0.878155
[557]	train-logloss:0.056232	train-auc:0.940037	valid-logloss:0.154819	valid-auc:0.878191
[558]	train-logloss:0.0562	train-auc:0.940071	valid-logloss:0.154786	valid-auc:0.878215
[559]	train-logloss:0.056169	train-auc:0.940102	valid-logloss:0.154767	valid-auc:0.878252
[560]	train-logloss:0.05614	train-auc:0.940133	valid-logloss:0.154736	valid-auc:0.878281
[561]	train-logloss:0.056107	train-auc:0.940184	valid-logloss:0.154721	valid-auc:0.878308
[562]	train-logl

[643]	train-logloss:0.054302	train-auc:0.94247	valid-logloss:0.153525	valid-auc:0.879964
[644]	train-logloss:0.054287	train-auc:0.942506	valid-logloss:0.153519	valid-auc:0.88001
[645]	train-logloss:0.054274	train-auc:0.942516	valid-logloss:0.153512	valid-auc:0.880034
[646]	train-logloss:0.05426	train-auc:0.942524	valid-logloss:0.153501	valid-auc:0.880045
[647]	train-logloss:0.054242	train-auc:0.94254	valid-logloss:0.153475	valid-auc:0.880134
[648]	train-logloss:0.054226	train-auc:0.942569	valid-logloss:0.153459	valid-auc:0.880185
[649]	train-logloss:0.054209	train-auc:0.942599	valid-logloss:0.153467	valid-auc:0.880211
[650]	train-logloss:0.054193	train-auc:0.942612	valid-logloss:0.153473	valid-auc:0.88018
[651]	train-logloss:0.054177	train-auc:0.942635	valid-logloss:0.153462	valid-auc:0.88026
[652]	train-logloss:0.05416	train-auc:0.942674	valid-logloss:0.15345	valid-auc:0.880279
[653]	train-logloss:0.054141	train-auc:0.942707	valid-logloss:0.153451	valid-auc:0.880241
[654]	train-loglos

[735]	train-logloss:0.053083	train-auc:0.944476	valid-logloss:0.152991	valid-auc:0.881251
[736]	train-logloss:0.053076	train-auc:0.944487	valid-logloss:0.152996	valid-auc:0.881251
[737]	train-logloss:0.053063	train-auc:0.944509	valid-logloss:0.152996	valid-auc:0.881236
[738]	train-logloss:0.053052	train-auc:0.944525	valid-logloss:0.15298	valid-auc:0.881227
[739]	train-logloss:0.053041	train-auc:0.944551	valid-logloss:0.152978	valid-auc:0.881276
[740]	train-logloss:0.053029	train-auc:0.944572	valid-logloss:0.15297	valid-auc:0.88131
[741]	train-logloss:0.05302	train-auc:0.944607	valid-logloss:0.152967	valid-auc:0.8813
[742]	train-logloss:0.053009	train-auc:0.944633	valid-logloss:0.152951	valid-auc:0.881314
[743]	train-logloss:0.052999	train-auc:0.944656	valid-logloss:0.152941	valid-auc:0.88133
[744]	train-logloss:0.052989	train-auc:0.944676	valid-logloss:0.152947	valid-auc:0.881316
[745]	train-logloss:0.05298	train-auc:0.944699	valid-logloss:0.152965	valid-auc:0.881274
[746]	train-loglos

[827]	train-logloss:0.052243	train-auc:0.946247	valid-logloss:0.152538	valid-auc:0.882668
[828]	train-logloss:0.052233	train-auc:0.94627	valid-logloss:0.152528	valid-auc:0.882684
[829]	train-logloss:0.052224	train-auc:0.946292	valid-logloss:0.152533	valid-auc:0.882697
[830]	train-logloss:0.052217	train-auc:0.9463	valid-logloss:0.152536	valid-auc:0.882691
[831]	train-logloss:0.052211	train-auc:0.946323	valid-logloss:0.152543	valid-auc:0.882704
[832]	train-logloss:0.052205	train-auc:0.946341	valid-logloss:0.152535	valid-auc:0.882737
[833]	train-logloss:0.052197	train-auc:0.946362	valid-logloss:0.15254	valid-auc:0.882737
[834]	train-logloss:0.052189	train-auc:0.946379	valid-logloss:0.15251	valid-auc:0.882781
[835]	train-logloss:0.05218	train-auc:0.9464	valid-logloss:0.152504	valid-auc:0.882792
[836]	train-logloss:0.052174	train-auc:0.946415	valid-logloss:0.152512	valid-auc:0.882789
[837]	train-logloss:0.052166	train-auc:0.946427	valid-logloss:0.152511	valid-auc:0.882793
[838]	train-loglos

[919]	train-logloss:0.051599	train-auc:0.947829	valid-logloss:0.15237	valid-auc:0.8835
[920]	train-logloss:0.051595	train-auc:0.947833	valid-logloss:0.152377	valid-auc:0.883515
[921]	train-logloss:0.051589	train-auc:0.947848	valid-logloss:0.152372	valid-auc:0.883545
[922]	train-logloss:0.051581	train-auc:0.947865	valid-logloss:0.15237	valid-auc:0.883539
[923]	train-logloss:0.051576	train-auc:0.947872	valid-logloss:0.15237	valid-auc:0.88354
[924]	train-logloss:0.051573	train-auc:0.94788	valid-logloss:0.152374	valid-auc:0.883539
[925]	train-logloss:0.051566	train-auc:0.9479	valid-logloss:0.152366	valid-auc:0.883573
[926]	train-logloss:0.051559	train-auc:0.947917	valid-logloss:0.152379	valid-auc:0.883591
[927]	train-logloss:0.051554	train-auc:0.947932	valid-logloss:0.152378	valid-auc:0.883601
[928]	train-logloss:0.051546	train-auc:0.947958	valid-logloss:0.152371	valid-auc:0.88362
[929]	train-logloss:0.05154	train-auc:0.947975	valid-logloss:0.152371	valid-auc:0.883608
[930]	train-logloss:0

[1011]	train-logloss:0.051089	train-auc:0.949138	valid-logloss:0.152217	valid-auc:0.884364
[1012]	train-logloss:0.05108	train-auc:0.949158	valid-logloss:0.152215	valid-auc:0.88436
[1013]	train-logloss:0.051075	train-auc:0.949172	valid-logloss:0.152218	valid-auc:0.884363
[1014]	train-logloss:0.051069	train-auc:0.949185	valid-logloss:0.152218	valid-auc:0.884365
[1015]	train-logloss:0.051063	train-auc:0.949197	valid-logloss:0.152212	valid-auc:0.884362
[1016]	train-logloss:0.051055	train-auc:0.949209	valid-logloss:0.152211	valid-auc:0.884368
[1017]	train-logloss:0.051052	train-auc:0.949224	valid-logloss:0.152209	valid-auc:0.884366
[1018]	train-logloss:0.051043	train-auc:0.949236	valid-logloss:0.152214	valid-auc:0.884355
[1019]	train-logloss:0.051038	train-auc:0.949256	valid-logloss:0.152215	valid-auc:0.884358
[1020]	train-logloss:0.051036	train-auc:0.949264	valid-logloss:0.15222	valid-auc:0.884361
[1021]	train-logloss:0.051033	train-auc:0.949272	valid-logloss:0.152236	valid-auc:0.884354
[1

[1102]	train-logloss:0.050651	train-auc:0.950219	valid-logloss:0.152036	valid-auc:0.88506
[1103]	train-logloss:0.050646	train-auc:0.950225	valid-logloss:0.152023	valid-auc:0.885087
[1104]	train-logloss:0.050643	train-auc:0.950231	valid-logloss:0.152031	valid-auc:0.885057
[1105]	train-logloss:0.050639	train-auc:0.950235	valid-logloss:0.152024	valid-auc:0.88506
[1106]	train-logloss:0.050634	train-auc:0.950248	valid-logloss:0.152019	valid-auc:0.885073
[1107]	train-logloss:0.050629	train-auc:0.950257	valid-logloss:0.152022	valid-auc:0.885061
[1108]	train-logloss:0.050621	train-auc:0.950269	valid-logloss:0.152028	valid-auc:0.88505
[1109]	train-logloss:0.050617	train-auc:0.950282	valid-logloss:0.15203	valid-auc:0.885081
[1110]	train-logloss:0.050613	train-auc:0.950292	valid-logloss:0.152037	valid-auc:0.885081
[1111]	train-logloss:0.050609	train-auc:0.950299	valid-logloss:0.152044	valid-auc:0.885068
[1112]	train-logloss:0.050605	train-auc:0.950304	valid-logloss:0.152042	valid-auc:0.885068
[11

[1193]	train-logloss:0.050254	train-auc:0.951208	valid-logloss:0.151934	valid-auc:0.885804
[1194]	train-logloss:0.050249	train-auc:0.951215	valid-logloss:0.151944	valid-auc:0.885786
[1195]	train-logloss:0.050245	train-auc:0.951225	valid-logloss:0.151935	valid-auc:0.885808
[1196]	train-logloss:0.05024	train-auc:0.95124	valid-logloss:0.151915	valid-auc:0.885842
[1197]	train-logloss:0.050237	train-auc:0.951247	valid-logloss:0.151925	valid-auc:0.885842
[1198]	train-logloss:0.050233	train-auc:0.951259	valid-logloss:0.151923	valid-auc:0.885847
[1199]	train-logloss:0.050229	train-auc:0.951263	valid-logloss:0.151926	valid-auc:0.885848
[1200]	train-logloss:0.050225	train-auc:0.951278	valid-logloss:0.151935	valid-auc:0.885832
[1201]	train-logloss:0.05022	train-auc:0.951292	valid-logloss:0.15194	valid-auc:0.885829
[1202]	train-logloss:0.050218	train-auc:0.951297	valid-logloss:0.151935	valid-auc:0.885832
[1203]	train-logloss:0.050214	train-auc:0.951309	valid-logloss:0.151938	valid-auc:0.885827
[12

[1284]	train-logloss:0.049887	train-auc:0.952125	valid-logloss:0.151829	valid-auc:0.886618
[1285]	train-logloss:0.049882	train-auc:0.952138	valid-logloss:0.151828	valid-auc:0.886623
[1286]	train-logloss:0.049878	train-auc:0.952149	valid-logloss:0.151826	valid-auc:0.886616
[1287]	train-logloss:0.049875	train-auc:0.952156	valid-logloss:0.151824	valid-auc:0.886615
[1288]	train-logloss:0.049871	train-auc:0.952163	valid-logloss:0.151823	valid-auc:0.886609
[1289]	train-logloss:0.049868	train-auc:0.952175	valid-logloss:0.151826	valid-auc:0.886614
[1290]	train-logloss:0.049862	train-auc:0.952187	valid-logloss:0.151831	valid-auc:0.88661
[1291]	train-logloss:0.049858	train-auc:0.952204	valid-logloss:0.15183	valid-auc:0.886606
[1292]	train-logloss:0.049855	train-auc:0.95221	valid-logloss:0.151826	valid-auc:0.886608
[1293]	train-logloss:0.049852	train-auc:0.952217	valid-logloss:0.15182	valid-auc:0.886613
[1294]	train-logloss:0.049848	train-auc:0.952225	valid-logloss:0.151823	valid-auc:0.886635
[12

[1375]	train-logloss:0.049545	train-auc:0.952951	valid-logloss:0.151692	valid-auc:0.887104
[1376]	train-logloss:0.049542	train-auc:0.952963	valid-logloss:0.151693	valid-auc:0.887089
[1377]	train-logloss:0.049538	train-auc:0.952972	valid-logloss:0.151683	valid-auc:0.887096
[1378]	train-logloss:0.049537	train-auc:0.952977	valid-logloss:0.151696	valid-auc:0.887091
[1379]	train-logloss:0.049534	train-auc:0.952984	valid-logloss:0.151688	valid-auc:0.887114
[1380]	train-logloss:0.04953	train-auc:0.952993	valid-logloss:0.151693	valid-auc:0.887116
[1381]	train-logloss:0.049528	train-auc:0.952999	valid-logloss:0.151694	valid-auc:0.887115
[1382]	train-logloss:0.049525	train-auc:0.953012	valid-logloss:0.151697	valid-auc:0.887116
[1383]	train-logloss:0.049521	train-auc:0.953026	valid-logloss:0.151687	valid-auc:0.887135
[1384]	train-logloss:0.049518	train-auc:0.953035	valid-logloss:0.151687	valid-auc:0.887138
[1385]	train-logloss:0.049516	train-auc:0.953044	valid-logloss:0.151689	valid-auc:0.887137


[1466]	train-logloss:0.049248	train-auc:0.953752	valid-logloss:0.151522	valid-auc:0.88792
[1467]	train-logloss:0.049246	train-auc:0.953755	valid-logloss:0.151516	valid-auc:0.887925
[1468]	train-logloss:0.049243	train-auc:0.953764	valid-logloss:0.151519	valid-auc:0.88794
[1469]	train-logloss:0.049241	train-auc:0.95377	valid-logloss:0.151514	valid-auc:0.887944
[1470]	train-logloss:0.049234	train-auc:0.953774	valid-logloss:0.151507	valid-auc:0.887944
[1471]	train-logloss:0.049229	train-auc:0.953782	valid-logloss:0.151492	valid-auc:0.887969
[1472]	train-logloss:0.049225	train-auc:0.953795	valid-logloss:0.151489	valid-auc:0.88796
[1473]	train-logloss:0.049223	train-auc:0.953804	valid-logloss:0.151497	valid-auc:0.887953
[1474]	train-logloss:0.04922	train-auc:0.95381	valid-logloss:0.151496	valid-auc:0.887953
[1475]	train-logloss:0.049216	train-auc:0.953822	valid-logloss:0.151504	valid-auc:0.887927
[1476]	train-logloss:0.049211	train-auc:0.953831	valid-logloss:0.151505	valid-auc:0.887923
[1477

[1557]	train-logloss:0.048947	train-auc:0.954441	valid-logloss:0.151311	valid-auc:0.888393
[1558]	train-logloss:0.048946	train-auc:0.95445	valid-logloss:0.151311	valid-auc:0.888417
[1559]	train-logloss:0.048944	train-auc:0.95446	valid-logloss:0.151313	valid-auc:0.888419
[1560]	train-logloss:0.048939	train-auc:0.954469	valid-logloss:0.151303	valid-auc:0.888447
[1561]	train-logloss:0.048937	train-auc:0.954476	valid-logloss:0.151302	valid-auc:0.888451
[1562]	train-logloss:0.048933	train-auc:0.954487	valid-logloss:0.15132	valid-auc:0.888422
[1563]	train-logloss:0.048927	train-auc:0.954495	valid-logloss:0.151299	valid-auc:0.888443
[1564]	train-logloss:0.048922	train-auc:0.954502	valid-logloss:0.151297	valid-auc:0.88844
[1565]	train-logloss:0.04892	train-auc:0.954505	valid-logloss:0.151294	valid-auc:0.888442
[1566]	train-logloss:0.048917	train-auc:0.954511	valid-logloss:0.151294	valid-auc:0.888437
[1567]	train-logloss:0.048914	train-auc:0.954518	valid-logloss:0.151294	valid-auc:0.888454
[156

[1648]	train-logloss:0.048671	train-auc:0.955138	valid-logloss:0.151219	valid-auc:0.888705
[1649]	train-logloss:0.048667	train-auc:0.955145	valid-logloss:0.151226	valid-auc:0.888699
[1650]	train-logloss:0.048662	train-auc:0.955153	valid-logloss:0.151219	valid-auc:0.8887
[1651]	train-logloss:0.048658	train-auc:0.955161	valid-logloss:0.151215	valid-auc:0.888703
[1652]	train-logloss:0.048654	train-auc:0.955175	valid-logloss:0.151206	valid-auc:0.888718
[1653]	train-logloss:0.04865	train-auc:0.955183	valid-logloss:0.151194	valid-auc:0.888741
[1654]	train-logloss:0.048646	train-auc:0.955192	valid-logloss:0.151178	valid-auc:0.888746
[1655]	train-logloss:0.048642	train-auc:0.9552	valid-logloss:0.151178	valid-auc:0.888754
[1656]	train-logloss:0.048639	train-auc:0.955208	valid-logloss:0.15118	valid-auc:0.888748
[1657]	train-logloss:0.048637	train-auc:0.955217	valid-logloss:0.151173	valid-auc:0.888766
[1658]	train-logloss:0.048633	train-auc:0.955225	valid-logloss:0.151179	valid-auc:0.88876
[1659]

[1739]	train-logloss:0.048404	train-auc:0.955835	valid-logloss:0.150912	valid-auc:0.88941
[1740]	train-logloss:0.048402	train-auc:0.955842	valid-logloss:0.150906	valid-auc:0.889414
[1741]	train-logloss:0.048398	train-auc:0.955852	valid-logloss:0.1509	valid-auc:0.88942
[1742]	train-logloss:0.048393	train-auc:0.955862	valid-logloss:0.15091	valid-auc:0.889427
[1743]	train-logloss:0.048391	train-auc:0.955868	valid-logloss:0.150911	valid-auc:0.889419
[1744]	train-logloss:0.048389	train-auc:0.955875	valid-logloss:0.150907	valid-auc:0.889436
[1745]	train-logloss:0.048385	train-auc:0.955886	valid-logloss:0.150909	valid-auc:0.889454
[1746]	train-logloss:0.048382	train-auc:0.955892	valid-logloss:0.150914	valid-auc:0.889448
[1747]	train-logloss:0.04838	train-auc:0.955902	valid-logloss:0.150909	valid-auc:0.889456
[1748]	train-logloss:0.048377	train-auc:0.955907	valid-logloss:0.150906	valid-auc:0.889452
[1749]	train-logloss:0.048375	train-auc:0.955918	valid-logloss:0.150905	valid-auc:0.889455
[1750

[1830]	train-logloss:0.048136	train-auc:0.956475	valid-logloss:0.150815	valid-auc:0.889615
[1831]	train-logloss:0.048132	train-auc:0.95648	valid-logloss:0.150811	valid-auc:0.889611
[1832]	train-logloss:0.048128	train-auc:0.956489	valid-logloss:0.150816	valid-auc:0.889611
[1833]	train-logloss:0.048126	train-auc:0.956496	valid-logloss:0.150814	valid-auc:0.889631
[1834]	train-logloss:0.048123	train-auc:0.956505	valid-logloss:0.15081	valid-auc:0.889629
[1835]	train-logloss:0.048122	train-auc:0.956513	valid-logloss:0.150824	valid-auc:0.889601
[1836]	train-logloss:0.048119	train-auc:0.956516	valid-logloss:0.150827	valid-auc:0.88959
[1837]	train-logloss:0.048118	train-auc:0.95652	valid-logloss:0.150828	valid-auc:0.889591
[1838]	train-logloss:0.048116	train-auc:0.956525	valid-logloss:0.150828	valid-auc:0.889589
[1839]	train-logloss:0.048113	train-auc:0.95653	valid-logloss:0.150824	valid-auc:0.889599
[1840]	train-logloss:0.048109	train-auc:0.956541	valid-logloss:0.150821	valid-auc:0.889598
[184

[1921]	train-logloss:0.047902	train-auc:0.957043	valid-logloss:0.150794	valid-auc:0.889829
[1922]	train-logloss:0.047899	train-auc:0.957051	valid-logloss:0.150795	valid-auc:0.889832
[1923]	train-logloss:0.047897	train-auc:0.957056	valid-logloss:0.150792	valid-auc:0.889833
[1924]	train-logloss:0.047894	train-auc:0.957062	valid-logloss:0.150791	valid-auc:0.889827
[1925]	train-logloss:0.047888	train-auc:0.957072	valid-logloss:0.150786	valid-auc:0.889825
[1926]	train-logloss:0.047886	train-auc:0.957075	valid-logloss:0.150784	valid-auc:0.889835
[1927]	train-logloss:0.047884	train-auc:0.957081	valid-logloss:0.150789	valid-auc:0.889837
[1928]	train-logloss:0.047881	train-auc:0.957089	valid-logloss:0.150783	valid-auc:0.889846
[1929]	train-logloss:0.04788	train-auc:0.957092	valid-logloss:0.150793	valid-auc:0.889856
[1930]	train-logloss:0.047874	train-auc:0.957102	valid-logloss:0.150781	valid-auc:0.889905
[1931]	train-logloss:0.047873	train-auc:0.95711	valid-logloss:0.150778	valid-auc:0.889915
[

[2012]	train-logloss:0.047657	train-auc:0.957624	valid-logloss:0.150691	valid-auc:0.890313
[2013]	train-logloss:0.047654	train-auc:0.957629	valid-logloss:0.150694	valid-auc:0.890319
[2014]	train-logloss:0.047652	train-auc:0.957637	valid-logloss:0.150688	valid-auc:0.890324
[2015]	train-logloss:0.047649	train-auc:0.95764	valid-logloss:0.150685	valid-auc:0.890329
[2016]	train-logloss:0.047647	train-auc:0.957643	valid-logloss:0.15068	valid-auc:0.890333
[2017]	train-logloss:0.047643	train-auc:0.957652	valid-logloss:0.150672	valid-auc:0.890337
[2018]	train-logloss:0.04764	train-auc:0.957655	valid-logloss:0.150678	valid-auc:0.890333
[2019]	train-logloss:0.047639	train-auc:0.957658	valid-logloss:0.150679	valid-auc:0.890336
[2020]	train-logloss:0.047638	train-auc:0.957666	valid-logloss:0.150675	valid-auc:0.890349
[2021]	train-logloss:0.047633	train-auc:0.957672	valid-logloss:0.150668	valid-auc:0.890363
[2022]	train-logloss:0.047629	train-auc:0.95768	valid-logloss:0.150666	valid-auc:0.890393
[20

[2103]	train-logloss:0.04743	train-auc:0.958161	valid-logloss:0.15056	valid-auc:0.890747
[2104]	train-logloss:0.047429	train-auc:0.958168	valid-logloss:0.150563	valid-auc:0.890747
[2105]	train-logloss:0.047427	train-auc:0.958173	valid-logloss:0.150564	valid-auc:0.890749
[2106]	train-logloss:0.047424	train-auc:0.958178	valid-logloss:0.15057	valid-auc:0.890738
[2107]	train-logloss:0.047422	train-auc:0.958182	valid-logloss:0.150561	valid-auc:0.890733
[2108]	train-logloss:0.047419	train-auc:0.958187	valid-logloss:0.150543	valid-auc:0.890742
[2109]	train-logloss:0.047418	train-auc:0.958192	valid-logloss:0.150537	valid-auc:0.890757
[2110]	train-logloss:0.047416	train-auc:0.9582	valid-logloss:0.15054	valid-auc:0.890749
[2111]	train-logloss:0.047415	train-auc:0.958205	valid-logloss:0.150538	valid-auc:0.890771
[2112]	train-logloss:0.047414	train-auc:0.958207	valid-logloss:0.150539	valid-auc:0.890773
[2113]	train-logloss:0.047414	train-auc:0.958212	valid-logloss:0.150542	valid-auc:0.890774
[2114

[2194]	train-logloss:0.04721	train-auc:0.95867	valid-logloss:0.150346	valid-auc:0.891072
[2195]	train-logloss:0.047207	train-auc:0.958675	valid-logloss:0.150341	valid-auc:0.891085
[2196]	train-logloss:0.047205	train-auc:0.958676	valid-logloss:0.150341	valid-auc:0.891085
[2197]	train-logloss:0.047205	train-auc:0.95868	valid-logloss:0.150342	valid-auc:0.891087
[2198]	train-logloss:0.047201	train-auc:0.958687	valid-logloss:0.150352	valid-auc:0.891055
[2199]	train-logloss:0.047199	train-auc:0.958693	valid-logloss:0.150354	valid-auc:0.891057
[2200]	train-logloss:0.047197	train-auc:0.958694	valid-logloss:0.150352	valid-auc:0.891056
[2201]	train-logloss:0.047197	train-auc:0.9587	valid-logloss:0.15036	valid-auc:0.891052
[2202]	train-logloss:0.047195	train-auc:0.958702	valid-logloss:0.150357	valid-auc:0.891051
[2203]	train-logloss:0.047193	train-auc:0.958707	valid-logloss:0.150359	valid-auc:0.891042
[2204]	train-logloss:0.047188	train-auc:0.958714	valid-logloss:0.150354	valid-auc:0.891057
[2205

[2285]	train-logloss:0.046996	train-auc:0.959145	valid-logloss:0.150298	valid-auc:0.891463
[2286]	train-logloss:0.046993	train-auc:0.95915	valid-logloss:0.150299	valid-auc:0.891482
[2287]	train-logloss:0.046991	train-auc:0.959157	valid-logloss:0.150305	valid-auc:0.891464
[2288]	train-logloss:0.046987	train-auc:0.95916	valid-logloss:0.150305	valid-auc:0.891463
[2289]	train-logloss:0.046984	train-auc:0.959166	valid-logloss:0.150318	valid-auc:0.891436
[2290]	train-logloss:0.046983	train-auc:0.959173	valid-logloss:0.150336	valid-auc:0.891428
[2291]	train-logloss:0.046978	train-auc:0.95918	valid-logloss:0.150321	valid-auc:0.891453
[2292]	train-logloss:0.046974	train-auc:0.95919	valid-logloss:0.150308	valid-auc:0.891501
[2293]	train-logloss:0.046973	train-auc:0.959193	valid-logloss:0.150311	valid-auc:0.891496
[2294]	train-logloss:0.046973	train-auc:0.959195	valid-logloss:0.150319	valid-auc:0.891494
[2295]	train-logloss:0.04697	train-auc:0.959201	valid-logloss:0.150316	valid-auc:0.891509
[229

[2376]	train-logloss:0.046775	train-auc:0.959639	valid-logloss:0.150162	valid-auc:0.891803
[2377]	train-logloss:0.046772	train-auc:0.959643	valid-logloss:0.150159	valid-auc:0.891802
[2378]	train-logloss:0.04677	train-auc:0.959646	valid-logloss:0.150162	valid-auc:0.891796
[2379]	train-logloss:0.046767	train-auc:0.959652	valid-logloss:0.150168	valid-auc:0.891775
[2380]	train-logloss:0.046766	train-auc:0.959657	valid-logloss:0.150162	valid-auc:0.891803
[2381]	train-logloss:0.046765	train-auc:0.959662	valid-logloss:0.150172	valid-auc:0.891794
[2382]	train-logloss:0.046763	train-auc:0.959668	valid-logloss:0.150171	valid-auc:0.891791
[2383]	train-logloss:0.046761	train-auc:0.959671	valid-logloss:0.150169	valid-auc:0.891797
[2384]	train-logloss:0.046758	train-auc:0.959676	valid-logloss:0.150169	valid-auc:0.891802
[2385]	train-logloss:0.046757	train-auc:0.959684	valid-logloss:0.150172	valid-auc:0.891801
[2386]	train-logloss:0.046754	train-auc:0.95969	valid-logloss:0.15017	valid-auc:0.891802
[2

[2467]	train-logloss:0.046566	train-auc:0.960119	valid-logloss:0.150047	valid-auc:0.892335
[2468]	train-logloss:0.046565	train-auc:0.960126	valid-logloss:0.150045	valid-auc:0.89234
[2469]	train-logloss:0.046562	train-auc:0.960129	valid-logloss:0.150037	valid-auc:0.892344
[2470]	train-logloss:0.046559	train-auc:0.960135	valid-logloss:0.150022	valid-auc:0.892365
[2471]	train-logloss:0.046557	train-auc:0.960137	valid-logloss:0.150024	valid-auc:0.892364
[2472]	train-logloss:0.046555	train-auc:0.960143	valid-logloss:0.150028	valid-auc:0.892354
[2473]	train-logloss:0.046554	train-auc:0.960147	valid-logloss:0.15002	valid-auc:0.892361
[2474]	train-logloss:0.046553	train-auc:0.960149	valid-logloss:0.150016	valid-auc:0.892363
[2475]	train-logloss:0.046549	train-auc:0.960157	valid-logloss:0.150015	valid-auc:0.89235
[2476]	train-logloss:0.046545	train-auc:0.960161	valid-logloss:0.150015	valid-auc:0.892355
[2477]	train-logloss:0.046543	train-auc:0.960166	valid-logloss:0.150011	valid-auc:0.892352
[2

[2558]	train-logloss:0.046368	train-auc:0.960518	valid-logloss:0.149929	valid-auc:0.892583
[2559]	train-logloss:0.046367	train-auc:0.960523	valid-logloss:0.149927	valid-auc:0.892587
[2560]	train-logloss:0.046363	train-auc:0.960528	valid-logloss:0.149921	valid-auc:0.892606
[2561]	train-logloss:0.046361	train-auc:0.960535	valid-logloss:0.149918	valid-auc:0.892598
[2562]	train-logloss:0.04636	train-auc:0.960542	valid-logloss:0.149918	valid-auc:0.892601
[2563]	train-logloss:0.04636	train-auc:0.960546	valid-logloss:0.14992	valid-auc:0.892606
[2564]	train-logloss:0.046358	train-auc:0.960548	valid-logloss:0.14991	valid-auc:0.892617
[2565]	train-logloss:0.046355	train-auc:0.960552	valid-logloss:0.149907	valid-auc:0.892618
[2566]	train-logloss:0.046352	train-auc:0.960557	valid-logloss:0.149899	valid-auc:0.892639
[2567]	train-logloss:0.046349	train-auc:0.960562	valid-logloss:0.149894	valid-auc:0.892654
[2568]	train-logloss:0.046347	train-auc:0.960565	valid-logloss:0.149893	valid-auc:0.892665
[25

[2649]	train-logloss:0.046174	train-auc:0.960954	valid-logloss:0.149797	valid-auc:0.892928
[2650]	train-logloss:0.046171	train-auc:0.960962	valid-logloss:0.14979	valid-auc:0.89294
[2651]	train-logloss:0.046169	train-auc:0.960964	valid-logloss:0.149792	valid-auc:0.892929
[2652]	train-logloss:0.046167	train-auc:0.960971	valid-logloss:0.149798	valid-auc:0.892924
[2653]	train-logloss:0.046163	train-auc:0.960974	valid-logloss:0.149792	valid-auc:0.892934
[2654]	train-logloss:0.046162	train-auc:0.960978	valid-logloss:0.149792	valid-auc:0.892941
[2655]	train-logloss:0.046162	train-auc:0.960985	valid-logloss:0.149791	valid-auc:0.892946
[2656]	train-logloss:0.046158	train-auc:0.960995	valid-logloss:0.149793	valid-auc:0.892942
[2657]	train-logloss:0.046157	train-auc:0.960999	valid-logloss:0.149796	valid-auc:0.892947
[2658]	train-logloss:0.046155	train-auc:0.961001	valid-logloss:0.149794	valid-auc:0.892946
[2659]	train-logloss:0.046151	train-auc:0.961009	valid-logloss:0.149797	valid-auc:0.892945
[

[2740]	train-logloss:0.045988	train-auc:0.961387	valid-logloss:0.149749	valid-auc:0.89314
[2741]	train-logloss:0.045989	train-auc:0.961389	valid-logloss:0.149752	valid-auc:0.893139
[2742]	train-logloss:0.045988	train-auc:0.961393	valid-logloss:0.149756	valid-auc:0.893137
[2743]	train-logloss:0.045987	train-auc:0.961396	valid-logloss:0.149754	valid-auc:0.89314
[2744]	train-logloss:0.045985	train-auc:0.9614	valid-logloss:0.149749	valid-auc:0.893136
[2745]	train-logloss:0.045985	train-auc:0.961404	valid-logloss:0.149753	valid-auc:0.89314
[2746]	train-logloss:0.045983	train-auc:0.961411	valid-logloss:0.149752	valid-auc:0.893142
[2747]	train-logloss:0.045982	train-auc:0.961415	valid-logloss:0.149751	valid-auc:0.89315
[2748]	train-logloss:0.045978	train-auc:0.96142	valid-logloss:0.149742	valid-auc:0.893173
[2749]	train-logloss:0.045977	train-auc:0.961426	valid-logloss:0.149743	valid-auc:0.89318
[2750]	train-logloss:0.045973	train-auc:0.961434	valid-logloss:0.149743	valid-auc:0.893168
[2751]	

[2831]	train-logloss:0.045805	train-auc:0.961781	valid-logloss:0.149653	valid-auc:0.893444
[2832]	train-logloss:0.045801	train-auc:0.961785	valid-logloss:0.149645	valid-auc:0.893451
[2833]	train-logloss:0.0458	train-auc:0.961788	valid-logloss:0.149652	valid-auc:0.89344
[2834]	train-logloss:0.0458	train-auc:0.961791	valid-logloss:0.149655	valid-auc:0.893439
[2835]	train-logloss:0.045797	train-auc:0.961795	valid-logloss:0.149656	valid-auc:0.893439
[2836]	train-logloss:0.045795	train-auc:0.961797	valid-logloss:0.149648	valid-auc:0.893441
[2837]	train-logloss:0.045794	train-auc:0.961803	valid-logloss:0.149644	valid-auc:0.893444
[2838]	train-logloss:0.045791	train-auc:0.961806	valid-logloss:0.149636	valid-auc:0.893468
[2839]	train-logloss:0.04579	train-auc:0.96181	valid-logloss:0.149635	valid-auc:0.893475
[2840]	train-logloss:0.045788	train-auc:0.961813	valid-logloss:0.149629	valid-auc:0.893482
[2841]	train-logloss:0.045787	train-auc:0.961816	valid-logloss:0.149635	valid-auc:0.893472
[2842]

[2922]	train-logloss:0.045631	train-auc:0.962207	valid-logloss:0.149472	valid-auc:0.893827
[2923]	train-logloss:0.045628	train-auc:0.962215	valid-logloss:0.149464	valid-auc:0.893826
[2924]	train-logloss:0.045628	train-auc:0.962218	valid-logloss:0.149472	valid-auc:0.89383
[2925]	train-logloss:0.045626	train-auc:0.962221	valid-logloss:0.149471	valid-auc:0.893832
[2926]	train-logloss:0.045625	train-auc:0.962224	valid-logloss:0.149469	valid-auc:0.893837
[2927]	train-logloss:0.045624	train-auc:0.962228	valid-logloss:0.149475	valid-auc:0.893838
[2928]	train-logloss:0.04562	train-auc:0.962231	valid-logloss:0.149477	valid-auc:0.893834
[2929]	train-logloss:0.045619	train-auc:0.962235	valid-logloss:0.149476	valid-auc:0.89384
[2930]	train-logloss:0.045617	train-auc:0.962239	valid-logloss:0.149466	valid-auc:0.893846
[2931]	train-logloss:0.045614	train-auc:0.962246	valid-logloss:0.149463	valid-auc:0.89385
[2932]	train-logloss:0.045613	train-auc:0.962249	valid-logloss:0.149467	valid-auc:0.893844
[29

[3013]	train-logloss:0.045462	train-auc:0.962578	valid-logloss:0.149433	valid-auc:0.893932
[3014]	train-logloss:0.045461	train-auc:0.962584	valid-logloss:0.149432	valid-auc:0.89394
[3015]	train-logloss:0.045457	train-auc:0.962588	valid-logloss:0.14942	valid-auc:0.893954
[3016]	train-logloss:0.045456	train-auc:0.96259	valid-logloss:0.149421	valid-auc:0.893956
[3017]	train-logloss:0.045454	train-auc:0.962595	valid-logloss:0.14942	valid-auc:0.893955
[3018]	train-logloss:0.04545	train-auc:0.962599	valid-logloss:0.149415	valid-auc:0.89395
[3019]	train-logloss:0.04545	train-auc:0.962604	valid-logloss:0.149411	valid-auc:0.893973
[3020]	train-logloss:0.045448	train-auc:0.962606	valid-logloss:0.149409	valid-auc:0.89397
[3021]	train-logloss:0.045444	train-auc:0.96261	valid-logloss:0.149407	valid-auc:0.893967
[3022]	train-logloss:0.045444	train-auc:0.962616	valid-logloss:0.149411	valid-auc:0.893967
[3023]	train-logloss:0.045443	train-auc:0.962619	valid-logloss:0.149414	valid-auc:0.893969
[3024]	t

[3104]	train-logloss:0.045292	train-auc:0.962947	valid-logloss:0.149413	valid-auc:0.89415
[3105]	train-logloss:0.045288	train-auc:0.962951	valid-logloss:0.149411	valid-auc:0.894146
[3106]	train-logloss:0.045287	train-auc:0.962953	valid-logloss:0.149412	valid-auc:0.894146
[3107]	train-logloss:0.045285	train-auc:0.962954	valid-logloss:0.149437	valid-auc:0.894111
[3108]	train-logloss:0.045282	train-auc:0.962962	valid-logloss:0.14944	valid-auc:0.894099
[3109]	train-logloss:0.045279	train-auc:0.962968	valid-logloss:0.14945	valid-auc:0.894092
[3110]	train-logloss:0.045279	train-auc:0.96297	valid-logloss:0.149451	valid-auc:0.894093
[3111]	train-logloss:0.045277	train-auc:0.962973	valid-logloss:0.149448	valid-auc:0.894103
[3112]	train-logloss:0.045276	train-auc:0.962977	valid-logloss:0.149448	valid-auc:0.894108
[3113]	train-logloss:0.045274	train-auc:0.962983	valid-logloss:0.149445	valid-auc:0.894118
[3114]	train-logloss:0.045275	train-auc:0.962984	valid-logloss:0.14945	valid-auc:0.894118
[311

[3195]	train-logloss:0.045135	train-auc:0.963292	valid-logloss:0.149431	valid-auc:0.894161
[3196]	train-logloss:0.045134	train-auc:0.963297	valid-logloss:0.149448	valid-auc:0.894136
[3197]	train-logloss:0.04513	train-auc:0.963308	valid-logloss:0.14945	valid-auc:0.894144
[3198]	train-logloss:0.045127	train-auc:0.963312	valid-logloss:0.149435	valid-auc:0.894153
[3199]	train-logloss:0.045126	train-auc:0.963313	valid-logloss:0.149437	valid-auc:0.894152
[3200]	train-logloss:0.045125	train-auc:0.963315	valid-logloss:0.149433	valid-auc:0.894158
[3201]	train-logloss:0.045123	train-auc:0.96332	valid-logloss:0.149428	valid-auc:0.894163
[3202]	train-logloss:0.045121	train-auc:0.963325	valid-logloss:0.149422	valid-auc:0.894166
[3203]	train-logloss:0.045117	train-auc:0.963329	valid-logloss:0.149412	valid-auc:0.894198
[3204]	train-logloss:0.045114	train-auc:0.963335	valid-logloss:0.149418	valid-auc:0.894194
[3205]	train-logloss:0.045113	train-auc:0.963341	valid-logloss:0.149408	valid-auc:0.894241
[3

[3286]	train-logloss:0.044966	train-auc:0.96366	valid-logloss:0.149442	valid-auc:0.894164
[3287]	train-logloss:0.044964	train-auc:0.963666	valid-logloss:0.149437	valid-auc:0.894169
[3288]	train-logloss:0.044963	train-auc:0.96367	valid-logloss:0.149441	valid-auc:0.89417
[3289]	train-logloss:0.044961	train-auc:0.963672	valid-logloss:0.14944	valid-auc:0.894171
[3290]	train-logloss:0.04496	train-auc:0.963677	valid-logloss:0.149446	valid-auc:0.894167
[3291]	train-logloss:0.044957	train-auc:0.963681	valid-logloss:0.149452	valid-auc:0.894148
[3292]	train-logloss:0.044954	train-auc:0.963686	valid-logloss:0.149448	valid-auc:0.894143
[3293]	train-logloss:0.044952	train-auc:0.963688	valid-logloss:0.149452	valid-auc:0.894146
[3294]	train-logloss:0.044951	train-auc:0.963691	valid-logloss:0.149453	valid-auc:0.894149
[3295]	train-logloss:0.044946	train-auc:0.963695	valid-logloss:0.149443	valid-auc:0.89415
[3296]	train-logloss:0.044944	train-auc:0.963697	valid-logloss:0.149443	valid-auc:0.894147
[3297

[3377]	train-logloss:0.044791	train-auc:0.964028	valid-logloss:0.14947	valid-auc:0.894233
[3378]	train-logloss:0.044789	train-auc:0.964033	valid-logloss:0.149474	valid-auc:0.894231
[3379]	train-logloss:0.044788	train-auc:0.964035	valid-logloss:0.149476	valid-auc:0.894226
[3380]	train-logloss:0.044784	train-auc:0.964039	valid-logloss:0.149468	valid-auc:0.894273
[3381]	train-logloss:0.044783	train-auc:0.964043	valid-logloss:0.149465	valid-auc:0.894268
[3382]	train-logloss:0.04478	train-auc:0.964047	valid-logloss:0.149439	valid-auc:0.894333
[3383]	train-logloss:0.04478	train-auc:0.964051	valid-logloss:0.149441	valid-auc:0.894332
[3384]	train-logloss:0.044777	train-auc:0.964053	valid-logloss:0.149447	valid-auc:0.894312
[3385]	train-logloss:0.044774	train-auc:0.96406	valid-logloss:0.149449	valid-auc:0.894314
[3386]	train-logloss:0.044773	train-auc:0.964063	valid-logloss:0.149447	valid-auc:0.894322
[3387]	train-logloss:0.044771	train-auc:0.964067	valid-logloss:0.149448	valid-auc:0.89432
[338

[3468]	train-logloss:0.044631	train-auc:0.96437	valid-logloss:0.1495	valid-auc:0.894374
[3469]	train-logloss:0.044629	train-auc:0.964377	valid-logloss:0.14951	valid-auc:0.894364
[3470]	train-logloss:0.044627	train-auc:0.964381	valid-logloss:0.149527	valid-auc:0.894344
[3471]	train-logloss:0.044628	train-auc:0.964382	valid-logloss:0.149523	valid-auc:0.894348
[3472]	train-logloss:0.044625	train-auc:0.964387	valid-logloss:0.149526	valid-auc:0.894347
[3473]	train-logloss:0.044622	train-auc:0.96439	valid-logloss:0.149515	valid-auc:0.894344
[3474]	train-logloss:0.044621	train-auc:0.964395	valid-logloss:0.149506	valid-auc:0.894346
[3475]	train-logloss:0.044619	train-auc:0.964398	valid-logloss:0.149512	valid-auc:0.894342
[3476]	train-logloss:0.044616	train-auc:0.964401	valid-logloss:0.149501	valid-auc:0.894393
[3477]	train-logloss:0.044614	train-auc:0.964406	valid-logloss:0.149502	valid-auc:0.8944
[3478]	train-logloss:0.044612	train-auc:0.964411	valid-logloss:0.149504	valid-auc:0.894394
[3479]

KeyboardInterrupt: 

In [20]:
learning_rate = 0.1
num_leaves = 15
min_data_in_leaf = 200
feature_fraction = 0.6
num_boost_round = 10000
params = {"objective": "binary",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "max_bin": 256,
          "feature_fraction": feature_fraction,
          "verbosity": 0,
          "drop_rate": 0.1,
          "is_unbalance": False,
          "max_drop": 50,
          "min_child_samples": 10,
          "min_child_weight": 150,
          "min_split_gain": 0,
          "subsample": 0.9,
          #"metric": 'auc'
          }
for i in range(0,9,2):
    params['seed'] = i
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    dtrain = lgbm.Dataset(train.drop(['id','date','label','Day','cate'],axis=1).values, train.label.values)
    dvalid = lgbm.Dataset(valid.drop(['id','date','label','Day','cate'],axis=1).values, valid.label.values, reference=dtrain)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid,\
                     feval=my_score2,\
                     verbose_eval=100,
                     early_stopping_rounds=100)
    # make the predictions
#     cv_pred += bst.predict(test.drop(['id','date','Day'],axis=1).values, num_iteration=bst.best_iteration)
    valid_pred += bst.predict(valid.drop(['id','date','label','Day'],axis=1).values, num_iteration=bst.best_iteration)

valid_pred /= 5    
# cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

# pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0009.csv', index=False)

Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.110506	valid_0's score: 0.215695
[200]	valid_0's binary_logloss: 0.107953	valid_0's score: 0.246487
[300]	valid_0's binary_logloss: 0.10763	valid_0's score: 0.245516
Early stopping, best iteration is:
[227]	valid_0's binary_logloss: 0.107736	valid_0's score: 0.250822
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.110789	valid_0's score: 0.213602
[200]	valid_0's binary_logloss: 0.109406	valid_0's score: 0.217638
[300]	valid_0's binary_logloss: 0.108724	valid_0's score: 0.222123
[400]	valid_0's binary_logloss: 0.108285	valid_0's score: 0.223842
[500]	valid_0's binary_logloss: 0.108128	valid_0's score: 0.221898
Early stopping, best iteration is:
[450]	valid_0's binary_logloss: 0.108121	valid_0's score: 0.22728
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.110635	valid_0's score: 0.201644
[200]	

In [15]:
valid_pred_df = pd.DataFrame({'id': valid.id, 'score': valid_pred, 'label':  valid.label})
valid_pred_df = valid_pred_df.merge(train_cate, how='inner', on='id')
valid_pred_df.head()

Unnamed: 0,id,score,label,cate
0,e8c3a7fbd718708534a436d217a76c86ac2e0683b65ae1...,9.1e-05,0,7
1,d89c7b05241c510b3b24371fccd7be3667099f89f4a2ec...,0.755644,1,7
2,eb9115c595b131a80f9fc50676382f72badb8c26aa2584...,0.000325,0,7
3,eef20507bbf041f0231f053f3161061bcec68dae1d7ed0...,0.000518,0,7
4,2c3976f164024e349127ba8d5a84e48dc109ec7299d6ea...,0.360241,0,7


In [16]:
percent = valid_pred_df.label.values.sum()/valid_pred_df.shape[0]

threshold = valid_pred_df.score.quantile(1-percent)
valid_pred_df['pred_label'] = valid_pred_df.score.apply(lambda x: int(x>threshold))
valid_pred_df['isCorrect'] = valid_pred_df['label']==valid_pred_df['pred_label']
valid_pred_df['isCorrect'] = valid_pred_df['isCorrect'].astype('int')
valid_pred_df.head()

Unnamed: 0,id,score,label,cate,pred_label,isCorrect
0,e8c3a7fbd718708534a436d217a76c86ac2e0683b65ae1...,9.1e-05,0,7,0,1
1,d89c7b05241c510b3b24371fccd7be3667099f89f4a2ec...,0.755644,1,7,0,0
2,eb9115c595b131a80f9fc50676382f72badb8c26aa2584...,0.000325,0,7,0,1
3,eef20507bbf041f0231f053f3161061bcec68dae1d7ed0...,0.000518,0,7,0,1
4,2c3976f164024e349127ba8d5a84e48dc109ec7299d6ea...,0.360241,0,7,0,1


In [18]:
# use training data with cate==7 
print('Incorrect predictions: {}'.format(valid_pred_df[valid_pred_df['isCorrect']==0].shape[0]))
print('Total predictions: {}'.format(valid_pred_df.shape[0]))
print('False negatives: {}'.format(valid_pred_df[valid_pred_df['label']==1][valid_pred_df['isCorrect']==0].shape[0]))
print('Total positives: {}'.format(valid_pred_df[valid_pred_df['label']==1].shape[0]))

Incorrect predictions: 1374
Total predictions: 24768
False negatives: 687
Total positives: 1415


  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
# use all training data
print('Incorrect predictions: {}'.format(valid_pred_df[valid_pred_df['isCorrect']==0].shape[0]))
print('Total predictions: {}'.format(valid_pred_df.shape[0]))
print('False negatives: {}'.format(valid_pred_df[valid_pred_df['label']==1][valid_pred_df['isCorrect']==0].shape[0]))
print('Total positives: {}'.format(valid_pred_df[valid_pred_df['label']==1].shape[0]))

### Add weights to training data

In [4]:
train_cate = pd.read_csv("obj/id_cate_train.csv")
data = data.merge(train_cate, how='inner', on='id')
data['weight'] = data.cate.apply(lambda x: 2 if x==2 or x==7 else 1)

In [5]:
# use the last 20% data as validation set
valid = data.iloc[data.shape[0]*4//5:,:]
data = data.iloc[:data.shape[0]*4//5,:]

# weight_valid = valid.weight
# weight_data = data.weight

In [9]:
test_id = test.id
cv_pred = np.zeros(len(test_id))

valid_pred = np.zeros(valid.shape[0])
#valid_id = valid.id

In [10]:
learning_rate = 0.1
num_leaves = 15
min_data_in_leaf = 2000
feature_fraction = 0.6
num_boost_round = 10000
params = {"objective": "binary",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "max_bin": 256,
          "feature_fraction": feature_fraction,
          "verbosity": 0,
          "drop_rate": 0.1,
          "is_unbalance": False,
          "max_drop": 50,
          "min_child_samples": 10,
          "min_child_weight": 150,
          "min_split_gain": 0,
          "subsample": 0.8,
          #"metric": 'auc'
          }
for i in range(0,9,2):
    params['seed'] = i
    day_target = list(set([tmp+1 for tmp in range(31)])-\
                      set([10*tmp+i+1 if 10*tmp+i+1<=31 else 10*tmp+i-9 for tmp in range(4)]+\
                          [10*tmp+i+2 if 10*tmp+i+2<=31 else 10*tmp+i-8 for tmp in range(4)]))
    # print(day_target)
    train = data[data.Day.isin(day_target)]
    
    dtrain = lgbm.Dataset(train.drop(['id','date','label','Day','weight'],axis=1).values,\
                          train.label.values, weight=train.weight.values)
    dvalid = lgbm.Dataset(valid.drop(['id','date','label','Day','weight'],axis=1).values,\
                          valid.label.values, reference=dtrain, weight=valid.weight.values)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid,\
                     feval=my_score2,\
                     verbose_eval=100,
                     early_stopping_rounds=300)
    # make the predictions
    cv_pred += bst.predict(test.drop(['id','date','Day'],axis=1).values, num_iteration=bst.best_iteration)
    valid_pred += bst.predict(valid.drop(['id','date','label','Day'],axis=1).values, num_iteration=bst.best_iteration)

valid_pred /= 5    
cv_pred /= 5

print('Final score of validation is {}'.format(my_score1(valid.label.values, valid_pred)))

pd.DataFrame({'id': test_id, 'score': cv_pred}).to_csv('submission/test_b/0013.csv', index=False)

Training until validation scores don't improve for 300 rounds.
[100]	valid_0's binary_logloss: 0.0468065	valid_0's score: 0.412607
[200]	valid_0's binary_logloss: 0.0460782	valid_0's score: 0.414193
[300]	valid_0's binary_logloss: 0.0460244	valid_0's score: 0.40861
[400]	valid_0's binary_logloss: 0.0461325	valid_0's score: 0.405995
[500]	valid_0's binary_logloss: 0.0461557	valid_0's score: 0.404819
Early stopping, best iteration is:
[225]	valid_0's binary_logloss: 0.0460397	valid_0's score: 0.415604
Training until validation scores don't improve for 300 rounds.
[100]	valid_0's binary_logloss: 0.0472591	valid_0's score: 0.395093
[200]	valid_0's binary_logloss: 0.0464122	valid_0's score: 0.397825
[300]	valid_0's binary_logloss: 0.0465003	valid_0's score: 0.402028
[400]	valid_0's binary_logloss: 0.046374	valid_0's score: 0.401704
[500]	valid_0's binary_logloss: 0.0464695	valid_0's score: 0.399824
Early stopping, best iteration is:
[296]	valid_0's binary_logloss: 0.0465018	valid_0's score: