## 导入相关包

In [1]:
import pandas as pd
import numpy as np

## 读取数据（训练数据前10000行，测试数据前100条）

In [2]:
train_data = pd.read_csv('train_all.csv',nrows=10000)
test_data = pd.read_csv('test_all.csv',nrows=100)

## 读取全部数据

In [3]:
# train_data = pd.read_csv('train_all.csv',nrows=None)
# test_data = pd.read_csv('test_all.csv',nrows=None)

## 获取训练和测试数据

In [5]:
features_columns = [col for col in train_data.columns if col not in ['user_id','label']]
train = train_data[features_columns].values
test = test_data[features_columns].values
target =train_data['label'].values

## 切分40%数据用于线下验证

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.4, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)  

(1200, 229) (1200,)
(800, 229) (800,)


0.9275

## 交叉验证：评估估算器性能

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
scores = cross_val_score(clf, train, target, cv=5)
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) 

[0.9375 0.935  0.935  0.935  0.935 ]
Accuracy: 0.94 (+/- 0.00)


## F1验证

In [9]:
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
scores = cross_val_score(clf, train, target, cv=5, scoring='f1_macro')
print(scores)  
print("F1: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

[0.48387097 0.48320413 0.48320413 0.48320413 0.48320413]
F1: 0.48 (+/- 0.00)


## ShuffleSplit切分数据

In [12]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
cross_val_score(clf, train, target, cv=cv)  

array([0.93333333, 0.943     , 0.93133333, 0.93833333, 0.934     ])

## 自己写交叉验证

### KFlod切分数据

In [10]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
kf = KFold(n_splits=5)
for k, (train_index, test_index) in enumerate(kf.split(train)):
    X_train, X_test, y_train, y_test = train[train_index], train[test_index], target[train_index], target[test_index]
    clf = clf.fit(X_train, y_train)
    print(k, clf.score(X_test, y_test))

0 0.9375
1 0.925
2 0.935
3 0.9325
4 0.9475


### StratifiedKFold切分数据(label均分)

In [11]:
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0, n_jobs=-1)
skf = StratifiedKFold(n_splits=5)
for k, (train_index, test_index) in enumerate(skf.split(train, target)):
    X_train, X_test, y_train, y_test = train[train_index], train[test_index], target[train_index], target[test_index]
    clf = clf.fit(X_train, y_train)
    print(k, clf.score(X_test, y_test))

0 0.9375
1 0.935
2 0.935
3 0.935
4 0.935


## 模型调参

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier


# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.5, random_state=0)

# model 
clf = RandomForestClassifier(n_jobs=-1)

# Set the parameters by cross-validation

tuned_parameters = {
                    'n_estimators': [50, 100, 200]
#                     ,'criterion': ['gini', 'entropy']
#                     ,'max_depth': [2, 5]
#                     ,'max_features': ['log2', 'sqrt', 'int']
#                     ,'bootstrap': [True, False]
#                     ,'warm_start': [True, False]
                    }

scores = ['precision']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(clf, tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'n_estimators': 100}

Grid scores on development set:

0.622 (+/-0.400) for {'n_estimators': 50}
0.639 (+/-0.423) for {'n_estimators': 100}
0.622 (+/-0.400) for {'n_estimators': 200}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation set.

              precision    recall  f1-score   support

         0.0       0.93      0.98      0.95       928
         1.0       0.00      0.00      0.00        72

    accuracy                           0.91      1000
   macro avg       0.46      0.49      0.48      1000
weighted avg       0.86      0.91      0.89      1000




## 模糊矩阵

In [13]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

# label name
class_names = ['no-repeat', 'repeat']

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
clf = RandomForestClassifier(n_jobs=-1)
y_pred = clf.fit(X_train, y_train).predict(X_test)


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()


# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

Confusion matrix, without normalization
[[460   7]
 [ 32   1]]
Normalized confusion matrix
[[0.99 0.01]
 [0.97 0.03]]


<Figure size 640x480 with 2 Axes>

<Figure size 640x480 with 2 Axes>

In [14]:
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

# label name
class_names = ['no-repeat', 'repeat']

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
clf = RandomForestClassifier(n_jobs=-1)
y_pred = clf.fit(X_train, y_train).predict(X_test)

print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

   no-repeat       0.93      0.99      0.96       467
      repeat       0.12      0.03      0.05        33

    accuracy                           0.92       500
   macro avg       0.53      0.51      0.50       500
weighted avg       0.88      0.92      0.90       500



## 不同的分类模型

### LR 模型

In [17]:
train_data.head()

Unnamed: 0,user_id,merchant_id,age_range,gender,user_cnt,seller_nunique,cat_nunique,brand_nunique,item_nunique,time_stamp_nunique,...,embeeding_93,embeeding_94,embeeding_95,embeeding_96,embeeding_97,embeeding_98,embeeding_99,lgb_clf,xgb_clf,label
0,34176,3906.0,6.0,0.0,451.0,109.0,45.0,106.0,256.0,47.0,...,-0.014218,0.026865,-0.533239,0.231678,-0.427409,0.412302,0.85033,0.928806,0.833814,0.0
1,34176,121.0,6.0,0.0,451.0,109.0,45.0,106.0,256.0,47.0,...,-0.014218,0.026865,-0.533239,0.231678,-0.427409,0.412302,0.85033,0.921832,0.879359,0.0
2,34176,4356.0,6.0,0.0,451.0,109.0,45.0,106.0,256.0,47.0,...,-0.014218,0.026865,-0.533239,0.231678,-0.427409,0.412302,0.85033,0.940439,0.93082,1.0
3,34176,2217.0,6.0,0.0,451.0,109.0,45.0,106.0,256.0,47.0,...,-0.014218,0.026865,-0.533239,0.231678,-0.427409,0.412302,0.85033,0.928806,0.834748,0.0
4,230784,4818.0,0.0,0.0,54.0,20.0,17.0,19.0,31.0,16.0,...,-0.128846,-0.287453,-0.151249,0.713761,-0.563686,-0.011947,0.135482,0.940592,0.929739,0.0


In [19]:
train_data.age_range.unique(),train_data.gender.unique()

(array([6., 0., 4., 5., 3., 2., 7., 8.]), array([0., 1., 2.]))

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()
X = stdScaler.fit_transform(train)

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, target, random_state=0)

clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(X_train, y_train)
clf.score(X_test, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.9

### KNN 模型

In [22]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()
X = stdScaler.fit_transform(train)

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, target, random_state=0)

clf = KNeighborsClassifier(n_neighbors=3).fit(X_train, y_train)
clf.score(X_test, y_test)

0.926

## GaussianNB 模型

In [23]:
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()
X = stdScaler.fit_transform(train)

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, target, random_state=0)

clf = GaussianNB().fit(X_train, y_train)
clf.score(X_test, y_test)

0.436

## tree树模型

In [24]:
from sklearn import tree

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.836

## bagging模型

In [25]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)
clf = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.934

## 随机森林模型

In [26]:
from sklearn.ensemble import RandomForestClassifier

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)
clf = clf = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.924

## ExTree模型

In [27]:
from sklearn.ensemble import ExtraTreesClassifier

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)
clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.922

## AdaBoost模型

In [28]:
from sklearn.ensemble import AdaBoostClassifier

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)
clf = AdaBoostClassifier(n_estimators=100)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.928

## GBDT模型

In [29]:
from sklearn.ensemble import GradientBoostingClassifier

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(train, target, random_state=0)
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.926

## VOTE模型投票

In [30]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()
X = stdScaler.fit_transform(train)
y = target


clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()

eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')

for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):
    scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Accuracy: 0.89 (+/- 0.01) [Logistic Regression]
Accuracy: 0.93 (+/- 0.00) [Random Forest]
Accuracy: 0.47 (+/- 0.01) [naive Bayes]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Accuracy: 0.90 (+/- 0.01) [Ensemble]


## lgb 模型

In [41]:
import lightgbm

X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.4, random_state=0)
X_test, X_valid, y_test, y_valid = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

clf = lightgbm

train_matrix = clf.Dataset(X_train, label=y_train)
test_matrix = clf.Dataset(X_test, label=y_test)
params = {
          'boosting_type': 'gbdt',
          #'boosting_type': 'dart',
          'objective': 'multiclass',
          'metric': 'multi_logloss',
          'min_child_weight': 1.5,
          'num_leaves': 2**5,
          'lambda_l2': 10,
          'subsample': 0.7,
          'colsample_bytree': 0.7,
          'colsample_bylevel': 0.7,
          'learning_rate': 0.03,
          'tree_method': 'exact',
          'seed': 2017,
          "num_class": 2,
          'silent': True,
          }
num_round = 10000
early_stopping_rounds = 100
model = clf.train(params, 
                  train_matrix,
                  num_round,
                  valid_sets=test_matrix,
                  early_stopping_rounds=early_stopping_rounds)
pre= model.predict(X_valid,num_iteration=model.best_iteration)

Please use silent argument of the Dataset constructor to pass this parameter.
  .format(key))


[1]	valid_0's multi_logloss: 0.317263
Training until validation scores don't improve for 100 rounds
[2]	valid_0's multi_logloss: 0.317184
[3]	valid_0's multi_logloss: 0.317602
[4]	valid_0's multi_logloss: 0.317859
[5]	valid_0's multi_logloss: 0.318005
[6]	valid_0's multi_logloss: 0.317868
[7]	valid_0's multi_logloss: 0.31785
[8]	valid_0's multi_logloss: 0.318022
[9]	valid_0's multi_logloss: 0.318117
[10]	valid_0's multi_logloss: 0.318356
[11]	valid_0's multi_logloss: 0.318413
[12]	valid_0's multi_logloss: 0.318844
[13]	valid_0's multi_logloss: 0.319306
[14]	valid_0's multi_logloss: 0.319465
[15]	valid_0's multi_logloss: 0.319577
[16]	valid_0's multi_logloss: 0.319914
[17]	valid_0's multi_logloss: 0.319986
[18]	valid_0's multi_logloss: 0.320511
[19]	valid_0's multi_logloss: 0.320969
[20]	valid_0's multi_logloss: 0.321219
[21]	valid_0's multi_logloss: 0.321337
[22]	valid_0's multi_logloss: 0.321495
[23]	valid_0's multi_logloss: 0.321668
[24]	valid_0's multi_logloss: 0.321823
[25]	valid_0

In [32]:
print('score : ', np.mean((pre[:,1]>0.5)==y_valid))

score :  0.9475


## xgb 模型

In [46]:
import xgboost

X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.4, random_state=0)
X_test, X_valid, y_test, y_valid = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

clf = xgboost

train_matrix = clf.DMatrix(X_train, label=y_train, missing=-1)
test_matrix = clf.DMatrix(X_test, label=y_test, missing=-1)
z = clf.DMatrix(X_valid, label=y_valid, missing=-1)
params = {'booster': 'gbtree',
          'objective': 'multi:softprob',
          'eval_metric': 'mlogloss',
          'gamma': 1,
          'min_child_weight': 1.5,
          'max_depth': 5,
          'lambda': 10,
          'subsample': 0.7,
          'colsample_bytree': 0.7,
          'colsample_bylevel': 0.7,
          'eta': 0.03,
          'tree_method': 'exact',
          'seed': 2017,
          "num_class": 2
          }

num_round = 10000
early_stopping_rounds = 100
watchlist = [(train_matrix, 'train'),
             (test_matrix, 'eval')
             ]

model = clf.train(params,
                  train_matrix,
                  num_boost_round=num_round,
                  evals=watchlist,
                  early_stopping_rounds=early_stopping_rounds
                  )
pre = model.predict(z,ntree_limit=model.best_ntree_limit)

[0]	train-mlogloss:0.67031	eval-mlogloss:0.67265
Multiple eval metrics have been passed: 'eval-mlogloss' will be used for early stopping.

Will train until eval-mlogloss hasn't improved in 100 rounds.
[1]	train-mlogloss:0.64879	eval-mlogloss:0.65312
[2]	train-mlogloss:0.62844	eval-mlogloss:0.63498
[3]	train-mlogloss:0.60916	eval-mlogloss:0.61784
[4]	train-mlogloss:0.59086	eval-mlogloss:0.60183
[5]	train-mlogloss:0.57368	eval-mlogloss:0.58669
[6]	train-mlogloss:0.55731	eval-mlogloss:0.57199
[7]	train-mlogloss:0.54182	eval-mlogloss:0.55845
[8]	train-mlogloss:0.52705	eval-mlogloss:0.54562
[9]	train-mlogloss:0.51293	eval-mlogloss:0.53343
[10]	train-mlogloss:0.49945	eval-mlogloss:0.52189
[11]	train-mlogloss:0.48654	eval-mlogloss:0.51070
[12]	train-mlogloss:0.47422	eval-mlogloss:0.50013
[13]	train-mlogloss:0.46269	eval-mlogloss:0.49014
[14]	train-mlogloss:0.45145	eval-mlogloss:0.48065
[15]	train-mlogloss:0.44092	eval-mlogloss:0.47199
[16]	train-mlogloss:0.43060	eval-mlogloss:0.46339
[17]	tra

[160]	train-mlogloss:0.12293	eval-mlogloss:0.32637
[161]	train-mlogloss:0.12250	eval-mlogloss:0.32659
[162]	train-mlogloss:0.12193	eval-mlogloss:0.32677
[163]	train-mlogloss:0.12143	eval-mlogloss:0.32699
[164]	train-mlogloss:0.12089	eval-mlogloss:0.32710
[165]	train-mlogloss:0.12036	eval-mlogloss:0.32743
[166]	train-mlogloss:0.11989	eval-mlogloss:0.32788
[167]	train-mlogloss:0.11931	eval-mlogloss:0.32834
[168]	train-mlogloss:0.11888	eval-mlogloss:0.32879
[169]	train-mlogloss:0.11838	eval-mlogloss:0.32892
[170]	train-mlogloss:0.11793	eval-mlogloss:0.32911
[171]	train-mlogloss:0.11761	eval-mlogloss:0.32947
[172]	train-mlogloss:0.11726	eval-mlogloss:0.32965
[173]	train-mlogloss:0.11683	eval-mlogloss:0.32959
[174]	train-mlogloss:0.11640	eval-mlogloss:0.32982
[175]	train-mlogloss:0.11612	eval-mlogloss:0.33011
[176]	train-mlogloss:0.11567	eval-mlogloss:0.33041
[177]	train-mlogloss:0.11525	eval-mlogloss:0.33045
[178]	train-mlogloss:0.11467	eval-mlogloss:0.33045
[179]	train-mlogloss:0.11426	ev

In [65]:
print('score : ', np.mean((pre[:,1]>0.5)==y_valid))

score :  0.9435


# 自己封装模型

## Stacking,Bootstrap,Bagging技术实践

In [51]:
"""
    导入相关包
"""
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

In [52]:
class SBBTree():
    """
        SBBTree
        Stacking,Bootstap,Bagging
    """
    def __init__(
                    self, 
                    params,
                    stacking_num,
                    bagging_num,
                    bagging_test_size,
                    num_boost_round,
                    early_stopping_rounds
                ):
        """
            Initializes the SBBTree.
            Args:
              params : lgb params.
              stacking_num : k_flod stacking.
              bagging_num : bootstrap num.
              bagging_test_size : bootstrap sample rate.
              num_boost_round : boost num.
              early_stopping_rounds : early_stopping_rounds.
        """
        self.params = params
        self.stacking_num = stacking_num
        self.bagging_num = bagging_num
        self.bagging_test_size = bagging_test_size
        self.num_boost_round = num_boost_round
        self.early_stopping_rounds = early_stopping_rounds

        self.model = lgb
        self.stacking_model = []
        self.bagging_model = []

    def fit(self, X, y):
        """ fit model. """
        if self.stacking_num > 1:
            layer_train = np.zeros((X.shape[0], 2))
            self.SK = StratifiedKFold(n_splits=self.stacking_num, shuffle=True, random_state=1)
            for k,(train_index, test_index) in enumerate(self.SK.split(X, y)):
                X_train = X[train_index]
                y_train = y[train_index]
                X_test = X[test_index]
                y_test = y[test_index]

                lgb_train = lgb.Dataset(X_train, y_train)
                lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

                gbm = lgb.train(self.params,
                            lgb_train,
                            num_boost_round=self.num_boost_round,
                            valid_sets=lgb_eval,
                            early_stopping_rounds=self.early_stopping_rounds)

                self.stacking_model.append(gbm)

                pred_y = gbm.predict(X_test, num_iteration=gbm.best_iteration)
                layer_train[test_index, 1] = pred_y

            X = np.hstack((X, layer_train[:,1].reshape((-1,1)))) 
        else:
            pass
        for bn in range(self.bagging_num):
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.bagging_test_size, random_state=bn)

            lgb_train = lgb.Dataset(X_train, y_train)
            lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

            gbm = lgb.train(self.params,
                        lgb_train,
                        num_boost_round=10000,
                        valid_sets=lgb_eval,
                        early_stopping_rounds=200)

            self.bagging_model.append(gbm)

    def predict(self, X_pred):
        """ predict test data. """
        if self.stacking_num > 1:
            test_pred = np.zeros((X_pred.shape[0], self.stacking_num))
            for sn,gbm in enumerate(self.stacking_model):
                pred = gbm.predict(X_pred, num_iteration=gbm.best_iteration)
                test_pred[:, sn] = pred
            X_pred = np.hstack((X_pred, test_pred.mean(axis=1).reshape((-1,1))))  
        else:
            pass 
        for bn,gbm in enumerate(self.bagging_model):
            pred = gbm.predict(X_pred, num_iteration=gbm.best_iteration)
            if bn == 0:
                pred_out=pred
            else:
                pred_out+=pred
        return pred_out/self.bagging_num

## 测试自己封装的模型类

In [53]:
"""
    TEST CODE
"""
from sklearn.datasets import make_classification
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_gaussian_quantiles
from sklearn import metrics
from sklearn.metrics import f1_score
# X, y = make_classification(n_samples=1000, n_features=25, n_clusters_per_class=1, n_informative=15, random_state=1)
X, y = make_gaussian_quantiles(mean=None, cov=1.0, n_samples=1000, n_features=50, n_classes=2, shuffle=True, random_state=2)
# data = load_breast_cancer()
# X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
params = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'auc',
        'num_leaves': 9,
        'learning_rate': 0.03,
        'feature_fraction_seed': 2,
        'feature_fraction': 0.9,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'min_data': 20,
        'min_hessian': 1,
        'verbose': -1,
        'silent': 0
        }
# test 1
model = SBBTree(params=params, stacking_num=2, bagging_num=1,  bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200)
model.fit(X,y)
X_pred = X[0].reshape((1,-1))
pred=model.predict(X_pred)
print('pred')
print(pred)
print('TEST 1 ok')


# test 1
model = SBBTree(params, stacking_num=1, bagging_num=1, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200)
model.fit(X_train,y_train)
pred1=model.predict(X_test)

# test 2 
model = SBBTree(params, stacking_num=1, bagging_num=3, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200)
model.fit(X_train,y_train)
pred2=model.predict(X_test)

# test 3 
model = SBBTree(params, stacking_num=5, bagging_num=1, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200)
model.fit(X_train,y_train)
pred3=model.predict(X_test)

# test 4 
model = SBBTree(params, stacking_num=5, bagging_num=3, bagging_test_size=0.33, num_boost_round=10000, early_stopping_rounds=200)
model.fit(X_train,y_train)
pred4=model.predict(X_test)

fpr, tpr, thresholds = metrics.roc_curve(y_test+1, pred1, pos_label=2)
print('auc: ',metrics.auc(fpr, tpr))

fpr, tpr, thresholds = metrics.roc_curve(y_test+1, pred2, pos_label=2)
print('auc: ',metrics.auc(fpr, tpr))

fpr, tpr, thresholds = metrics.roc_curve(y_test+1, pred3, pos_label=2)
print('auc: ',metrics.auc(fpr, tpr))

fpr, tpr, thresholds = metrics.roc_curve(y_test+1, pred4, pos_label=2)
print('auc: ',metrics.auc(fpr, tpr))

Please use silent argument of the Dataset constructor to pass this parameter.
  .format(key))


[1]	valid_0's auc: 0.589432
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.59292
[3]	valid_0's auc: 0.59092
[4]	valid_0's auc: 0.61048
[5]	valid_0's auc: 0.611936
[6]	valid_0's auc: 0.610368
[7]	valid_0's auc: 0.611112
[8]	valid_0's auc: 0.613168
[9]	valid_0's auc: 0.6156
[10]	valid_0's auc: 0.620088
[11]	valid_0's auc: 0.624968
[12]	valid_0's auc: 0.625864
[13]	valid_0's auc: 0.620952
[14]	valid_0's auc: 0.622024
[15]	valid_0's auc: 0.616968
[16]	valid_0's auc: 0.615464
[17]	valid_0's auc: 0.614152
[18]	valid_0's auc: 0.611928
[19]	valid_0's auc: 0.61068
[20]	valid_0's auc: 0.614656
[21]	valid_0's auc: 0.617568
[22]	valid_0's auc: 0.622752
[23]	valid_0's auc: 0.624368
[24]	valid_0's auc: 0.630704
[25]	valid_0's auc: 0.633136
[26]	valid_0's auc: 0.632496
[27]	valid_0's auc: 0.631648
[28]	valid_0's auc: 0.634224
[29]	valid_0's auc: 0.636512
[30]	valid_0's auc: 0.635392
[31]	valid_0's auc: 0.63744
[32]	valid_0's auc: 0.639216
[33]	valid_0's auc: 0.6389

[349]	valid_0's auc: 0.733184
[350]	valid_0's auc: 0.732976
[351]	valid_0's auc: 0.733024
[352]	valid_0's auc: 0.733536
[353]	valid_0's auc: 0.733824
[354]	valid_0's auc: 0.734224
[355]	valid_0's auc: 0.73496
[356]	valid_0's auc: 0.73536
[357]	valid_0's auc: 0.735664
[358]	valid_0's auc: 0.735568
[359]	valid_0's auc: 0.736256
[360]	valid_0's auc: 0.736064
[361]	valid_0's auc: 0.736688
[362]	valid_0's auc: 0.736768
[363]	valid_0's auc: 0.737456
[364]	valid_0's auc: 0.737536
[365]	valid_0's auc: 0.737936
[366]	valid_0's auc: 0.737152
[367]	valid_0's auc: 0.736304
[368]	valid_0's auc: 0.736288
[369]	valid_0's auc: 0.735072
[370]	valid_0's auc: 0.734912
[371]	valid_0's auc: 0.735568
[372]	valid_0's auc: 0.736368
[373]	valid_0's auc: 0.736752
[374]	valid_0's auc: 0.73736
[375]	valid_0's auc: 0.737888
[376]	valid_0's auc: 0.737296
[377]	valid_0's auc: 0.737616
[378]	valid_0's auc: 0.73744
[379]	valid_0's auc: 0.737472
[380]	valid_0's auc: 0.737296
[381]	valid_0's auc: 0.738288
[382]	valid_0'

[798]	valid_0's auc: 0.74704
[799]	valid_0's auc: 0.746768
[800]	valid_0's auc: 0.746784
[801]	valid_0's auc: 0.74664
[802]	valid_0's auc: 0.746576
[803]	valid_0's auc: 0.746496
[804]	valid_0's auc: 0.746656
[805]	valid_0's auc: 0.746672
[806]	valid_0's auc: 0.746528
[807]	valid_0's auc: 0.746352
[808]	valid_0's auc: 0.745968
[809]	valid_0's auc: 0.746064
[810]	valid_0's auc: 0.746016
[811]	valid_0's auc: 0.745616
[812]	valid_0's auc: 0.745264
[813]	valid_0's auc: 0.745248
[814]	valid_0's auc: 0.745296
[815]	valid_0's auc: 0.745136
[816]	valid_0's auc: 0.745216
[817]	valid_0's auc: 0.745344
[818]	valid_0's auc: 0.74536
[819]	valid_0's auc: 0.745152
[820]	valid_0's auc: 0.745152
[821]	valid_0's auc: 0.745376
[822]	valid_0's auc: 0.745216
[823]	valid_0's auc: 0.745152
[824]	valid_0's auc: 0.745104
[825]	valid_0's auc: 0.744768
[826]	valid_0's auc: 0.744752
[827]	valid_0's auc: 0.744672
[828]	valid_0's auc: 0.744448
[829]	valid_0's auc: 0.74472
[830]	valid_0's auc: 0.744448
[831]	valid_0'

[233]	valid_0's auc: 0.75896
[234]	valid_0's auc: 0.759424
[235]	valid_0's auc: 0.76032
[236]	valid_0's auc: 0.760112
[237]	valid_0's auc: 0.759264
[238]	valid_0's auc: 0.759808
[239]	valid_0's auc: 0.759216
[240]	valid_0's auc: 0.759168
[241]	valid_0's auc: 0.760176
[242]	valid_0's auc: 0.760416
[243]	valid_0's auc: 0.760928
[244]	valid_0's auc: 0.760864
[245]	valid_0's auc: 0.760864
[246]	valid_0's auc: 0.761488
[247]	valid_0's auc: 0.761984
[248]	valid_0's auc: 0.761888
[249]	valid_0's auc: 0.762432
[250]	valid_0's auc: 0.762384
[251]	valid_0's auc: 0.763648
[252]	valid_0's auc: 0.764736
[253]	valid_0's auc: 0.764992
[254]	valid_0's auc: 0.765712
[255]	valid_0's auc: 0.766576
[256]	valid_0's auc: 0.767248
[257]	valid_0's auc: 0.766832
[258]	valid_0's auc: 0.766384
[259]	valid_0's auc: 0.766976
[260]	valid_0's auc: 0.766944
[261]	valid_0's auc: 0.767152
[262]	valid_0's auc: 0.76696
[263]	valid_0's auc: 0.766448
[264]	valid_0's auc: 0.766384
[265]	valid_0's auc: 0.766048
[266]	valid_0

[654]	valid_0's auc: 0.787664
[655]	valid_0's auc: 0.787328
[656]	valid_0's auc: 0.787472
[657]	valid_0's auc: 0.787488
[658]	valid_0's auc: 0.787424
[659]	valid_0's auc: 0.787536
[660]	valid_0's auc: 0.78736
[661]	valid_0's auc: 0.78752
[662]	valid_0's auc: 0.787632
[663]	valid_0's auc: 0.787456
[664]	valid_0's auc: 0.78776
[665]	valid_0's auc: 0.787936
[666]	valid_0's auc: 0.787584
[667]	valid_0's auc: 0.787184
[668]	valid_0's auc: 0.787456
[669]	valid_0's auc: 0.787488
[670]	valid_0's auc: 0.78752
[671]	valid_0's auc: 0.787424
[672]	valid_0's auc: 0.787648
[673]	valid_0's auc: 0.78736
[674]	valid_0's auc: 0.787232
[675]	valid_0's auc: 0.787264
[676]	valid_0's auc: 0.787216
[677]	valid_0's auc: 0.787376
[678]	valid_0's auc: 0.787616
[679]	valid_0's auc: 0.788016
[680]	valid_0's auc: 0.788112
[681]	valid_0's auc: 0.788272
[682]	valid_0's auc: 0.788432
[683]	valid_0's auc: 0.788368
[684]	valid_0's auc: 0.788576
[685]	valid_0's auc: 0.788272
[686]	valid_0's auc: 0.788032
[687]	valid_0's

[364]	valid_0's auc: 0.782278
[365]	valid_0's auc: 0.781359
[366]	valid_0's auc: 0.780845
[367]	valid_0's auc: 0.78044
[368]	valid_0's auc: 0.780367
[369]	valid_0's auc: 0.779926
[370]	valid_0's auc: 0.779301
[371]	valid_0's auc: 0.779301
[372]	valid_0's auc: 0.779852
[373]	valid_0's auc: 0.780771
[374]	valid_0's auc: 0.780404
[375]	valid_0's auc: 0.780845
[376]	valid_0's auc: 0.780918
[377]	valid_0's auc: 0.781175
[378]	valid_0's auc: 0.781065
[379]	valid_0's auc: 0.781065
[380]	valid_0's auc: 0.780698
[381]	valid_0's auc: 0.781469
[382]	valid_0's auc: 0.782057
[383]	valid_0's auc: 0.782204
[384]	valid_0's auc: 0.78316
[385]	valid_0's auc: 0.782903
[386]	valid_0's auc: 0.783564
[387]	valid_0's auc: 0.783785
[388]	valid_0's auc: 0.784336
[389]	valid_0's auc: 0.784263
[390]	valid_0's auc: 0.784299
[391]	valid_0's auc: 0.784263
[392]	valid_0's auc: 0.784226
[393]	valid_0's auc: 0.78441
[394]	valid_0's auc: 0.784557
[395]	valid_0's auc: 0.78463
[396]	valid_0's auc: 0.784667
[397]	valid_0'

[682]	valid_0's auc: 0.792716
[683]	valid_0's auc: 0.792716
[684]	valid_0's auc: 0.793046
[685]	valid_0's auc: 0.792642
[686]	valid_0's auc: 0.793083
[687]	valid_0's auc: 0.793267
[688]	valid_0's auc: 0.793524
[689]	valid_0's auc: 0.793598
[690]	valid_0's auc: 0.793818
[691]	valid_0's auc: 0.793671
[692]	valid_0's auc: 0.793745
[693]	valid_0's auc: 0.793634
[694]	valid_0's auc: 0.793304
[695]	valid_0's auc: 0.793524
[696]	valid_0's auc: 0.793414
[697]	valid_0's auc: 0.793083
[698]	valid_0's auc: 0.793157
[699]	valid_0's auc: 0.793046
[700]	valid_0's auc: 0.79301
[701]	valid_0's auc: 0.792826
[702]	valid_0's auc: 0.792679
[703]	valid_0's auc: 0.792532
[704]	valid_0's auc: 0.792495
[705]	valid_0's auc: 0.792458
[706]	valid_0's auc: 0.792311
[707]	valid_0's auc: 0.792385
[708]	valid_0's auc: 0.792091
[709]	valid_0's auc: 0.791687
[710]	valid_0's auc: 0.791576
[711]	valid_0's auc: 0.791576
[712]	valid_0's auc: 0.792275
[713]	valid_0's auc: 0.792422
[714]	valid_0's auc: 0.792532
[715]	valid

[91]	valid_0's auc: 0.704822
[92]	valid_0's auc: 0.705474
[93]	valid_0's auc: 0.704169
[94]	valid_0's auc: 0.70474
[95]	valid_0's auc: 0.705556
[96]	valid_0's auc: 0.70629
[97]	valid_0's auc: 0.708248
[98]	valid_0's auc: 0.707514
[99]	valid_0's auc: 0.708738
[100]	valid_0's auc: 0.709227
[101]	valid_0's auc: 0.710941
[102]	valid_0's auc: 0.710451
[103]	valid_0's auc: 0.710451
[104]	valid_0's auc: 0.71037
[105]	valid_0's auc: 0.711512
[106]	valid_0's auc: 0.713307
[107]	valid_0's auc: 0.711675
[108]	valid_0's auc: 0.71347
[109]	valid_0's auc: 0.714449
[110]	valid_0's auc: 0.71657
[111]	valid_0's auc: 0.718365
[112]	valid_0's auc: 0.717875
[113]	valid_0's auc: 0.719752
[114]	valid_0's auc: 0.719099
[115]	valid_0's auc: 0.719099
[116]	valid_0's auc: 0.71967
[117]	valid_0's auc: 0.719344
[118]	valid_0's auc: 0.71861
[119]	valid_0's auc: 0.719507
[120]	valid_0's auc: 0.720241
[121]	valid_0's auc: 0.719752
[122]	valid_0's auc: 0.720241
[123]	valid_0's auc: 0.720731
[124]	valid_0's auc: 0.720

[483]	valid_0's auc: 0.7572
[484]	valid_0's auc: 0.757118
[485]	valid_0's auc: 0.756303
[486]	valid_0's auc: 0.756303
[487]	valid_0's auc: 0.755079
[488]	valid_0's auc: 0.755323
[489]	valid_0's auc: 0.754997
[490]	valid_0's auc: 0.755895
[491]	valid_0's auc: 0.756629
[492]	valid_0's auc: 0.75671
[493]	valid_0's auc: 0.757363
[494]	valid_0's auc: 0.758097
[495]	valid_0's auc: 0.757853
[496]	valid_0's auc: 0.757608
[497]	valid_0's auc: 0.75875
[498]	valid_0's auc: 0.75875
[499]	valid_0's auc: 0.75875
[500]	valid_0's auc: 0.75924
[501]	valid_0's auc: 0.75924
[502]	valid_0's auc: 0.759484
[503]	valid_0's auc: 0.759158
[504]	valid_0's auc: 0.759566
[505]	valid_0's auc: 0.759811
[506]	valid_0's auc: 0.759729
[507]	valid_0's auc: 0.75924
[508]	valid_0's auc: 0.758261
[509]	valid_0's auc: 0.758179
[510]	valid_0's auc: 0.758424
[511]	valid_0's auc: 0.758261
[512]	valid_0's auc: 0.758016
[513]	valid_0's auc: 0.757934
[514]	valid_0's auc: 0.757934
[515]	valid_0's auc: 0.757934
[516]	valid_0's auc

[102]	valid_0's auc: 0.710451
[103]	valid_0's auc: 0.710451
[104]	valid_0's auc: 0.71037
[105]	valid_0's auc: 0.711512
[106]	valid_0's auc: 0.713307
[107]	valid_0's auc: 0.711675
[108]	valid_0's auc: 0.71347
[109]	valid_0's auc: 0.714449
[110]	valid_0's auc: 0.71657
[111]	valid_0's auc: 0.718365
[112]	valid_0's auc: 0.717875
[113]	valid_0's auc: 0.719752
[114]	valid_0's auc: 0.719099
[115]	valid_0's auc: 0.719099
[116]	valid_0's auc: 0.71967
[117]	valid_0's auc: 0.719344
[118]	valid_0's auc: 0.71861
[119]	valid_0's auc: 0.719507
[120]	valid_0's auc: 0.720241
[121]	valid_0's auc: 0.719752
[122]	valid_0's auc: 0.720241
[123]	valid_0's auc: 0.720731
[124]	valid_0's auc: 0.720323
[125]	valid_0's auc: 0.722444
[126]	valid_0's auc: 0.722689
[127]	valid_0's auc: 0.722526
[128]	valid_0's auc: 0.721465
[129]	valid_0's auc: 0.723587
[130]	valid_0's auc: 0.725381
[131]	valid_0's auc: 0.726197
[132]	valid_0's auc: 0.725871
[133]	valid_0's auc: 0.726442
[134]	valid_0's auc: 0.727421
[135]	valid_0's

[513]	valid_0's auc: 0.757934
[514]	valid_0's auc: 0.757934
[515]	valid_0's auc: 0.757934
[516]	valid_0's auc: 0.757526
[517]	valid_0's auc: 0.757771
[518]	valid_0's auc: 0.757445
[519]	valid_0's auc: 0.757445
[520]	valid_0's auc: 0.757037
[521]	valid_0's auc: 0.756384
[522]	valid_0's auc: 0.756384
[523]	valid_0's auc: 0.757282
[524]	valid_0's auc: 0.757282
[525]	valid_0's auc: 0.756955
[526]	valid_0's auc: 0.756139
[527]	valid_0's auc: 0.756139
[528]	valid_0's auc: 0.755976
[529]	valid_0's auc: 0.756466
[530]	valid_0's auc: 0.756058
[531]	valid_0's auc: 0.756874
[532]	valid_0's auc: 0.756874
[533]	valid_0's auc: 0.757363
[534]	valid_0's auc: 0.757118
[535]	valid_0's auc: 0.757282
[536]	valid_0's auc: 0.758261
[537]	valid_0's auc: 0.758587
[538]	valid_0's auc: 0.758913
[539]	valid_0's auc: 0.759321
[540]	valid_0's auc: 0.759484
[541]	valid_0's auc: 0.759403
[542]	valid_0's auc: 0.759158
[543]	valid_0's auc: 0.759158
[544]	valid_0's auc: 0.759648
[545]	valid_0's auc: 0.760382
[546]	vali

[166]	valid_0's auc: 0.726534
[167]	valid_0's auc: 0.727915
[168]	valid_0's auc: 0.728566
[169]	valid_0's auc: 0.728241
[170]	valid_0's auc: 0.728728
[171]	valid_0's auc: 0.727509
[172]	valid_0's auc: 0.727428
[173]	valid_0's auc: 0.725396
[174]	valid_0's auc: 0.724909
[175]	valid_0's auc: 0.723283
[176]	valid_0's auc: 0.723364
[177]	valid_0's auc: 0.724746
[178]	valid_0's auc: 0.72182
[179]	valid_0's auc: 0.723121
[180]	valid_0's auc: 0.723283
[181]	valid_0's auc: 0.722145
[182]	valid_0's auc: 0.720845
[183]	valid_0's auc: 0.721577
[184]	valid_0's auc: 0.718813
[185]	valid_0's auc: 0.718732
[186]	valid_0's auc: 0.722308
[187]	valid_0's auc: 0.721658
[188]	valid_0's auc: 0.721333
[189]	valid_0's auc: 0.72052
[190]	valid_0's auc: 0.72182
[191]	valid_0's auc: 0.72117
[192]	valid_0's auc: 0.719626
[193]	valid_0's auc: 0.71987
[194]	valid_0's auc: 0.719057
[195]	valid_0's auc: 0.720114
[196]	valid_0's auc: 0.720358
[197]	valid_0's auc: 0.720601
[198]	valid_0's auc: 0.721008
[199]	valid_0's

[582]	valid_0's auc: 0.74872
[583]	valid_0's auc: 0.748883
[584]	valid_0's auc: 0.749695
[585]	valid_0's auc: 0.75002
[586]	valid_0's auc: 0.749533
[587]	valid_0's auc: 0.749451
[588]	valid_0's auc: 0.74937
[589]	valid_0's auc: 0.749289
[590]	valid_0's auc: 0.748964
[591]	valid_0's auc: 0.748639
[592]	valid_0's auc: 0.748395
[593]	valid_0's auc: 0.74872
[594]	valid_0's auc: 0.74872
[595]	valid_0's auc: 0.748232
[596]	valid_0's auc: 0.748232
[597]	valid_0's auc: 0.74872
[598]	valid_0's auc: 0.749289
[599]	valid_0's auc: 0.749533
[600]	valid_0's auc: 0.749533
[601]	valid_0's auc: 0.749614
[602]	valid_0's auc: 0.749858
[603]	valid_0's auc: 0.749939
[604]	valid_0's auc: 0.749858
[605]	valid_0's auc: 0.750102
[606]	valid_0's auc: 0.750183
[607]	valid_0's auc: 0.749939
[608]	valid_0's auc: 0.75067
[609]	valid_0's auc: 0.75067
[610]	valid_0's auc: 0.75067
[611]	valid_0's auc: 0.75067
[612]	valid_0's auc: 0.750752
[613]	valid_0's auc: 0.750914
[614]	valid_0's auc: 0.75002
[615]	valid_0's auc: 

[887]	valid_0's auc: 0.754815
[888]	valid_0's auc: 0.755221
[889]	valid_0's auc: 0.755221
[890]	valid_0's auc: 0.755303
[891]	valid_0's auc: 0.755059
[892]	valid_0's auc: 0.755059
[893]	valid_0's auc: 0.755465
[894]	valid_0's auc: 0.755221
[895]	valid_0's auc: 0.755384
[896]	valid_0's auc: 0.755547
[897]	valid_0's auc: 0.755384
[898]	valid_0's auc: 0.755628
[899]	valid_0's auc: 0.756034
[900]	valid_0's auc: 0.75579
[901]	valid_0's auc: 0.755547
[902]	valid_0's auc: 0.755628
[903]	valid_0's auc: 0.75514
[904]	valid_0's auc: 0.754978
[905]	valid_0's auc: 0.754896
[906]	valid_0's auc: 0.754896
[907]	valid_0's auc: 0.754978
[908]	valid_0's auc: 0.754653
[909]	valid_0's auc: 0.754571
[910]	valid_0's auc: 0.75449
[911]	valid_0's auc: 0.754409
[912]	valid_0's auc: 0.754409
[913]	valid_0's auc: 0.754978
[914]	valid_0's auc: 0.755059
[915]	valid_0's auc: 0.754734
[916]	valid_0's auc: 0.75514
[917]	valid_0's auc: 0.75514
[918]	valid_0's auc: 0.754815
[919]	valid_0's auc: 0.755059
[920]	valid_0's

[1297]	valid_0's auc: 0.757172
[1298]	valid_0's auc: 0.757659
[1299]	valid_0's auc: 0.757497
[1300]	valid_0's auc: 0.757253
[1301]	valid_0's auc: 0.757334
[1302]	valid_0's auc: 0.757416
[1303]	valid_0's auc: 0.757822
[1304]	valid_0's auc: 0.758147
[1305]	valid_0's auc: 0.758066
[1306]	valid_0's auc: 0.758228
[1307]	valid_0's auc: 0.75831
[1308]	valid_0's auc: 0.758066
[1309]	valid_0's auc: 0.758147
[1310]	valid_0's auc: 0.758228
[1311]	valid_0's auc: 0.758066
[1312]	valid_0's auc: 0.758391
[1313]	valid_0's auc: 0.758472
[1314]	valid_0's auc: 0.758635
[1315]	valid_0's auc: 0.758716
[1316]	valid_0's auc: 0.75896
[1317]	valid_0's auc: 0.759204
[1318]	valid_0's auc: 0.75961
[1319]	valid_0's auc: 0.759122
[1320]	valid_0's auc: 0.759772
[1321]	valid_0's auc: 0.759529
[1322]	valid_0's auc: 0.759854
[1323]	valid_0's auc: 0.75961
[1324]	valid_0's auc: 0.75961
[1325]	valid_0's auc: 0.75961
[1326]	valid_0's auc: 0.759691
[1327]	valid_0's auc: 0.759529
[1328]	valid_0's auc: 0.759122
[1329]	valid_0

[290]	valid_0's auc: 0.773809
[291]	valid_0's auc: 0.774215
[292]	valid_0's auc: 0.774215
[293]	valid_0's auc: 0.774377
[294]	valid_0's auc: 0.773971
[295]	valid_0's auc: 0.774052
[296]	valid_0's auc: 0.774945
[297]	valid_0's auc: 0.773809
[298]	valid_0's auc: 0.774702
[299]	valid_0's auc: 0.773484
[300]	valid_0's auc: 0.773484
[301]	valid_0's auc: 0.773321
[302]	valid_0's auc: 0.773403
[303]	valid_0's auc: 0.773484
[304]	valid_0's auc: 0.772753
[305]	valid_0's auc: 0.772347
[306]	valid_0's auc: 0.773159
[307]	valid_0's auc: 0.774052
[308]	valid_0's auc: 0.773321
[309]	valid_0's auc: 0.774702
[310]	valid_0's auc: 0.774539
[311]	valid_0's auc: 0.774864
[312]	valid_0's auc: 0.775595
[313]	valid_0's auc: 0.774864
[314]	valid_0's auc: 0.776082
[315]	valid_0's auc: 0.776082
[316]	valid_0's auc: 0.776325
[317]	valid_0's auc: 0.776244
[318]	valid_0's auc: 0.776488
[319]	valid_0's auc: 0.776407
[320]	valid_0's auc: 0.776244
[321]	valid_0's auc: 0.776082
[322]	valid_0's auc: 0.776244
[323]	vali

Please use silent argument of the Dataset constructor to pass this parameter.
  .format(key))


[2]	valid_0's auc: 0.613266
[3]	valid_0's auc: 0.623746
[4]	valid_0's auc: 0.632107
[5]	valid_0's auc: 0.646711
[6]	valid_0's auc: 0.693757
[7]	valid_0's auc: 0.702007
[8]	valid_0's auc: 0.722074
[9]	valid_0's auc: 0.718172
[10]	valid_0's auc: 0.702787
[11]	valid_0's auc: 0.705017
[12]	valid_0's auc: 0.712263
[13]	valid_0's auc: 0.716388
[14]	valid_0's auc: 0.710814
[15]	valid_0's auc: 0.725975
[16]	valid_0's auc: 0.728205
[17]	valid_0's auc: 0.736232
[18]	valid_0's auc: 0.73534
[19]	valid_0's auc: 0.740914
[20]	valid_0's auc: 0.746265
[21]	valid_0's auc: 0.743813
[22]	valid_0's auc: 0.741583
[23]	valid_0's auc: 0.740691
[24]	valid_0's auc: 0.736232
[25]	valid_0's auc: 0.736009
[26]	valid_0's auc: 0.73311
[27]	valid_0's auc: 0.735117
[28]	valid_0's auc: 0.739576
[29]	valid_0's auc: 0.735786
[30]	valid_0's auc: 0.73311
[31]	valid_0's auc: 0.732664
[32]	valid_0's auc: 0.735786
[33]	valid_0's auc: 0.737793
[34]	valid_0's auc: 0.738685
[35]	valid_0's auc: 0.740468
[36]	valid_0's auc: 0.733

[379]	valid_0's auc: 0.810256
[380]	valid_0's auc: 0.810256
[381]	valid_0's auc: 0.810702
[382]	valid_0's auc: 0.810256
[383]	valid_0's auc: 0.810925
[384]	valid_0's auc: 0.810033
[385]	valid_0's auc: 0.813155
[386]	valid_0's auc: 0.814716
[387]	valid_0's auc: 0.816945
[388]	valid_0's auc: 0.817614
[389]	valid_0's auc: 0.816499
[390]	valid_0's auc: 0.817168
[391]	valid_0's auc: 0.816276
[392]	valid_0's auc: 0.816054
[393]	valid_0's auc: 0.815162
[394]	valid_0's auc: 0.815608
[395]	valid_0's auc: 0.816499
[396]	valid_0's auc: 0.817168
[397]	valid_0's auc: 0.81806
[398]	valid_0's auc: 0.818729
[399]	valid_0's auc: 0.819844
[400]	valid_0's auc: 0.819844
[401]	valid_0's auc: 0.820959
[402]	valid_0's auc: 0.820067
[403]	valid_0's auc: 0.82029
[404]	valid_0's auc: 0.820513
[405]	valid_0's auc: 0.819398
[406]	valid_0's auc: 0.819398
[407]	valid_0's auc: 0.819398
[408]	valid_0's auc: 0.819175
[409]	valid_0's auc: 0.819844
[410]	valid_0's auc: 0.821851
[411]	valid_0's auc: 0.822297
[412]	valid_

[812]	valid_0's auc: 0.836566
[813]	valid_0's auc: 0.836566
[814]	valid_0's auc: 0.837235
[815]	valid_0's auc: 0.837012
[816]	valid_0's auc: 0.835897
[817]	valid_0's auc: 0.835897
[818]	valid_0's auc: 0.835006
[819]	valid_0's auc: 0.835006
[820]	valid_0's auc: 0.834337
[821]	valid_0's auc: 0.834114
[822]	valid_0's auc: 0.834337
Early stopping, best iteration is:
[622]	valid_0's auc: 0.843701
[1]	valid_0's auc: 0.58623
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.574532
[3]	valid_0's auc: 0.54902
[4]	valid_0's auc: 0.563168
[5]	valid_0's auc: 0.572415
[6]	valid_0's auc: 0.577317
[7]	valid_0's auc: 0.588458
[8]	valid_0's auc: 0.582219
[9]	valid_0's auc: 0.588458
[10]	valid_0's auc: 0.604947
[11]	valid_0's auc: 0.607398
[12]	valid_0's auc: 0.607175
[13]	valid_0's auc: 0.613414
[14]	valid_0's auc: 0.609848
[15]	valid_0's auc: 0.615419
[16]	valid_0's auc: 0.620989
[17]	valid_0's auc: 0.622549
[18]	valid_0's auc: 0.627005
[19]	valid_0's auc: 0.631016
[20

[360]	valid_0's auc: 0.725267
[361]	valid_0's auc: 0.726381
[362]	valid_0's auc: 0.725713
[363]	valid_0's auc: 0.725936
[364]	valid_0's auc: 0.72549
[365]	valid_0's auc: 0.726159
[366]	valid_0's auc: 0.72549
[367]	valid_0's auc: 0.723708
[368]	valid_0's auc: 0.724153
[369]	valid_0's auc: 0.724376
[370]	valid_0's auc: 0.721257
[371]	valid_0's auc: 0.723708
[372]	valid_0's auc: 0.726159
[373]	valid_0's auc: 0.727273
[374]	valid_0's auc: 0.727273
[375]	valid_0's auc: 0.72861
[376]	valid_0's auc: 0.727941
[377]	valid_0's auc: 0.728832
[378]	valid_0's auc: 0.72861
[379]	valid_0's auc: 0.728387
[380]	valid_0's auc: 0.727718
[381]	valid_0's auc: 0.729055
[382]	valid_0's auc: 0.727718
[383]	valid_0's auc: 0.72861
[384]	valid_0's auc: 0.729501
[385]	valid_0's auc: 0.728832
[386]	valid_0's auc: 0.729501
[387]	valid_0's auc: 0.729055
[388]	valid_0's auc: 0.728832
[389]	valid_0's auc: 0.727496
[390]	valid_0's auc: 0.728164
[391]	valid_0's auc: 0.729055
[392]	valid_0's auc: 0.728832
[393]	valid_0's

[332]	valid_0's auc: 0.750223
[333]	valid_0's auc: 0.750446
[334]	valid_0's auc: 0.750446
[335]	valid_0's auc: 0.750668
[336]	valid_0's auc: 0.750446
[337]	valid_0's auc: 0.749332
[338]	valid_0's auc: 0.748886
[339]	valid_0's auc: 0.749109
[340]	valid_0's auc: 0.749777
[341]	valid_0's auc: 0.749777
[342]	valid_0's auc: 0.75
[343]	valid_0's auc: 0.75
[344]	valid_0's auc: 0.750891
[345]	valid_0's auc: 0.752228
[346]	valid_0's auc: 0.752228
[347]	valid_0's auc: 0.752674
[348]	valid_0's auc: 0.752005
[349]	valid_0's auc: 0.751783
[350]	valid_0's auc: 0.752005
[351]	valid_0's auc: 0.752897
[352]	valid_0's auc: 0.753565
[353]	valid_0's auc: 0.752897
[354]	valid_0's auc: 0.752228
[355]	valid_0's auc: 0.752674
[356]	valid_0's auc: 0.751783
[357]	valid_0's auc: 0.752005
[358]	valid_0's auc: 0.752674
[359]	valid_0's auc: 0.752674
[360]	valid_0's auc: 0.753565
[361]	valid_0's auc: 0.754011
[362]	valid_0's auc: 0.752674
[363]	valid_0's auc: 0.753119
[364]	valid_0's auc: 0.75156
[365]	valid_0's auc

[761]	valid_0's auc: 0.776292
[762]	valid_0's auc: 0.77607
[763]	valid_0's auc: 0.776292
[764]	valid_0's auc: 0.776738
[765]	valid_0's auc: 0.77607
[766]	valid_0's auc: 0.775847
[767]	valid_0's auc: 0.775401
[768]	valid_0's auc: 0.774733
[769]	valid_0's auc: 0.774064
[770]	valid_0's auc: 0.774733
[771]	valid_0's auc: 0.774733
[772]	valid_0's auc: 0.77451
[773]	valid_0's auc: 0.77451
[774]	valid_0's auc: 0.774955
[775]	valid_0's auc: 0.774733
[776]	valid_0's auc: 0.774955
[777]	valid_0's auc: 0.773841
[778]	valid_0's auc: 0.774064
[779]	valid_0's auc: 0.774287
[780]	valid_0's auc: 0.774064
[781]	valid_0's auc: 0.774955
[782]	valid_0's auc: 0.775847
[783]	valid_0's auc: 0.776738
[784]	valid_0's auc: 0.776292
[785]	valid_0's auc: 0.776961
[786]	valid_0's auc: 0.776515
[787]	valid_0's auc: 0.77607
[788]	valid_0's auc: 0.776515
[789]	valid_0's auc: 0.776292
[790]	valid_0's auc: 0.775624
[791]	valid_0's auc: 0.775401
[792]	valid_0's auc: 0.775847
[793]	valid_0's auc: 0.775847
[794]	valid_0's

[198]	valid_0's auc: 0.757353
[199]	valid_0's auc: 0.75869
[200]	valid_0's auc: 0.760472
[201]	valid_0's auc: 0.760695
[202]	valid_0's auc: 0.761586
[203]	valid_0's auc: 0.762478
[204]	valid_0's auc: 0.762032
[205]	valid_0's auc: 0.760918
[206]	valid_0's auc: 0.759804
[207]	valid_0's auc: 0.759581
[208]	valid_0's auc: 0.760027
[209]	valid_0's auc: 0.761586
[210]	valid_0's auc: 0.760695
[211]	valid_0's auc: 0.759581
[212]	valid_0's auc: 0.760472
[213]	valid_0's auc: 0.761586
[214]	valid_0's auc: 0.762478
[215]	valid_0's auc: 0.763146
[216]	valid_0's auc: 0.762701
[217]	valid_0's auc: 0.761586
[218]	valid_0's auc: 0.760918
[219]	valid_0's auc: 0.761586
[220]	valid_0's auc: 0.762255
[221]	valid_0's auc: 0.761141
[222]	valid_0's auc: 0.761809
[223]	valid_0's auc: 0.761364
[224]	valid_0's auc: 0.761809
[225]	valid_0's auc: 0.759581
[226]	valid_0's auc: 0.760695
[227]	valid_0's auc: 0.759804
[228]	valid_0's auc: 0.762032
[229]	valid_0's auc: 0.762701
[230]	valid_0's auc: 0.762478
[231]	valid

[576]	valid_0's auc: 0.805481
[577]	valid_0's auc: 0.804144
[578]	valid_0's auc: 0.803922
[579]	valid_0's auc: 0.80303
[580]	valid_0's auc: 0.802585
[581]	valid_0's auc: 0.803922
[582]	valid_0's auc: 0.803476
[583]	valid_0's auc: 0.804813
[584]	valid_0's auc: 0.804813
[585]	valid_0's auc: 0.804813
[586]	valid_0's auc: 0.805481
[587]	valid_0's auc: 0.80459
[588]	valid_0's auc: 0.805258
[589]	valid_0's auc: 0.805258
[590]	valid_0's auc: 0.805036
[591]	valid_0's auc: 0.805481
[592]	valid_0's auc: 0.804813
[593]	valid_0's auc: 0.805481
[594]	valid_0's auc: 0.805927
[595]	valid_0's auc: 0.806595
[596]	valid_0's auc: 0.807264
[597]	valid_0's auc: 0.807264
[598]	valid_0's auc: 0.806818
[599]	valid_0's auc: 0.80615
[600]	valid_0's auc: 0.805927
[601]	valid_0's auc: 0.806373
[602]	valid_0's auc: 0.806818
[603]	valid_0's auc: 0.806818
[604]	valid_0's auc: 0.806595
[605]	valid_0's auc: 0.805704
[606]	valid_0's auc: 0.805704
[607]	valid_0's auc: 0.805481
[608]	valid_0's auc: 0.80459
[609]	valid_0'

[100]	valid_0's auc: 0.706551
[101]	valid_0's auc: 0.708779
[102]	valid_0's auc: 0.712344
[103]	valid_0's auc: 0.714572
[104]	valid_0's auc: 0.718137
[105]	valid_0's auc: 0.726381
[106]	valid_0's auc: 0.724376
[107]	valid_0's auc: 0.723485
[108]	valid_0's auc: 0.722594
[109]	valid_0's auc: 0.722816
[110]	valid_0's auc: 0.724153
[111]	valid_0's auc: 0.725045
[112]	valid_0's auc: 0.725267
[113]	valid_0's auc: 0.724153
[114]	valid_0's auc: 0.726381
[115]	valid_0's auc: 0.724599
[116]	valid_0's auc: 0.725045
[117]	valid_0's auc: 0.725713
[118]	valid_0's auc: 0.727941
[119]	valid_0's auc: 0.728387
[120]	valid_0's auc: 0.726827
[121]	valid_0's auc: 0.730169
[122]	valid_0's auc: 0.730169
[123]	valid_0's auc: 0.730838
[124]	valid_0's auc: 0.732843
[125]	valid_0's auc: 0.73574
[126]	valid_0's auc: 0.737745
[127]	valid_0's auc: 0.737522
[128]	valid_0's auc: 0.738414
[129]	valid_0's auc: 0.737522
[130]	valid_0's auc: 0.740865
[131]	valid_0's auc: 0.743093
[132]	valid_0's auc: 0.743984
[133]	valid

[472]	valid_0's auc: 0.784759
[473]	valid_0's auc: 0.785205
[474]	valid_0's auc: 0.785205
[475]	valid_0's auc: 0.786319
[476]	valid_0's auc: 0.786096
[477]	valid_0's auc: 0.786765
[478]	valid_0's auc: 0.786542
[479]	valid_0's auc: 0.786319
[480]	valid_0's auc: 0.785873
[481]	valid_0's auc: 0.784982
[482]	valid_0's auc: 0.784982
[483]	valid_0's auc: 0.784537
[484]	valid_0's auc: 0.784091
[485]	valid_0's auc: 0.783868
[486]	valid_0's auc: 0.784314
[487]	valid_0's auc: 0.784314
[488]	valid_0's auc: 0.784759
[489]	valid_0's auc: 0.786096
[490]	valid_0's auc: 0.786319
[491]	valid_0's auc: 0.785873
[492]	valid_0's auc: 0.786319
[493]	valid_0's auc: 0.786542
[494]	valid_0's auc: 0.786988
[495]	valid_0's auc: 0.787879
[496]	valid_0's auc: 0.785873
[497]	valid_0's auc: 0.784759
[498]	valid_0's auc: 0.785205
[499]	valid_0's auc: 0.785205
[500]	valid_0's auc: 0.785205
[501]	valid_0's auc: 0.783422
[502]	valid_0's auc: 0.784314
[503]	valid_0's auc: 0.784982
[504]	valid_0's auc: 0.785651
[505]	vali

[181]	valid_0's auc: 0.741046
[182]	valid_0's auc: 0.740148
[183]	valid_0's auc: 0.740964
[184]	valid_0's auc: 0.741454
[185]	valid_0's auc: 0.741699
[186]	valid_0's auc: 0.741535
[187]	valid_0's auc: 0.741699
[188]	valid_0's auc: 0.741699
[189]	valid_0's auc: 0.741617
[190]	valid_0's auc: 0.74227
[191]	valid_0's auc: 0.74227
[192]	valid_0's auc: 0.743249
[193]	valid_0's auc: 0.743657
[194]	valid_0's auc: 0.743901
[195]	valid_0's auc: 0.743738
[196]	valid_0's auc: 0.744799
[197]	valid_0's auc: 0.74488
[198]	valid_0's auc: 0.74537
[199]	valid_0's auc: 0.745044
[200]	valid_0's auc: 0.745207
[201]	valid_0's auc: 0.74537
[202]	valid_0's auc: 0.746431
[203]	valid_0's auc: 0.746186
[204]	valid_0's auc: 0.74537
[205]	valid_0's auc: 0.745125
[206]	valid_0's auc: 0.745207
[207]	valid_0's auc: 0.745941
[208]	valid_0's auc: 0.746757
[209]	valid_0's auc: 0.74692
[210]	valid_0's auc: 0.746675
[211]	valid_0's auc: 0.747246
[212]	valid_0's auc: 0.747083
[213]	valid_0's auc: 0.747573
[214]	valid_0's a

Please use silent argument of the Dataset constructor to pass this parameter.
  .format(key))


[72]	valid_0's auc: 0.750948
[73]	valid_0's auc: 0.756299
[74]	valid_0's auc: 0.756745
[75]	valid_0's auc: 0.757414
[76]	valid_0's auc: 0.75786
[77]	valid_0's auc: 0.753177
[78]	valid_0's auc: 0.755853
[79]	valid_0's auc: 0.753846
[80]	valid_0's auc: 0.7534
[81]	valid_0's auc: 0.757191
[82]	valid_0's auc: 0.753846
[83]	valid_0's auc: 0.756522
[84]	valid_0's auc: 0.757414
[85]	valid_0's auc: 0.758974
[86]	valid_0's auc: 0.759197
[87]	valid_0's auc: 0.758305
[88]	valid_0's auc: 0.758751
[89]	valid_0's auc: 0.761873
[90]	valid_0's auc: 0.761873
[91]	valid_0's auc: 0.761204
[92]	valid_0's auc: 0.760981
[93]	valid_0's auc: 0.76165
[94]	valid_0's auc: 0.76165
[95]	valid_0's auc: 0.762319
[96]	valid_0's auc: 0.764326
[97]	valid_0's auc: 0.762988
[98]	valid_0's auc: 0.762988
[99]	valid_0's auc: 0.764771
[100]	valid_0's auc: 0.766555
[101]	valid_0's auc: 0.7699
[102]	valid_0's auc: 0.768562
[103]	valid_0's auc: 0.768116
[104]	valid_0's auc: 0.769231
[105]	valid_0's auc: 0.769454
[106]	valid_0's

[467]	valid_0's auc: 0.82631
[468]	valid_0's auc: 0.828094
[469]	valid_0's auc: 0.828317
[470]	valid_0's auc: 0.827871
[471]	valid_0's auc: 0.827425
[472]	valid_0's auc: 0.827648
[473]	valid_0's auc: 0.826756
[474]	valid_0's auc: 0.826533
[475]	valid_0's auc: 0.826533
[476]	valid_0's auc: 0.827202
[477]	valid_0's auc: 0.827871
[478]	valid_0's auc: 0.828986
[479]	valid_0's auc: 0.82854
[480]	valid_0's auc: 0.828094
[481]	valid_0's auc: 0.828986
[482]	valid_0's auc: 0.829654
[483]	valid_0's auc: 0.829654
[484]	valid_0's auc: 0.830323
[485]	valid_0's auc: 0.829208
[486]	valid_0's auc: 0.829208
[487]	valid_0's auc: 0.829877
[488]	valid_0's auc: 0.829877
[489]	valid_0's auc: 0.829877
[490]	valid_0's auc: 0.831215
[491]	valid_0's auc: 0.831884
[492]	valid_0's auc: 0.833222
[493]	valid_0's auc: 0.832776
[494]	valid_0's auc: 0.832553
[495]	valid_0's auc: 0.831215
[496]	valid_0's auc: 0.830992
[497]	valid_0's auc: 0.830323
[498]	valid_0's auc: 0.831661
[499]	valid_0's auc: 0.832999
[500]	valid_

[35]	valid_0's auc: 0.659759
[36]	valid_0's auc: 0.65107
[37]	valid_0's auc: 0.654189
[38]	valid_0's auc: 0.653743
[39]	valid_0's auc: 0.64795
[40]	valid_0's auc: 0.649064
[41]	valid_0's auc: 0.65107
[42]	valid_0's auc: 0.654412
[43]	valid_0's auc: 0.655971
[44]	valid_0's auc: 0.655526
[45]	valid_0's auc: 0.654189
[46]	valid_0's auc: 0.657308
[47]	valid_0's auc: 0.658645
[48]	valid_0's auc: 0.664884
[49]	valid_0's auc: 0.667335
[50]	valid_0's auc: 0.670455
[51]	valid_0's auc: 0.677807
[52]	valid_0's auc: 0.682932
[53]	valid_0's auc: 0.686275
[54]	valid_0's auc: 0.687611
[55]	valid_0's auc: 0.692068
[56]	valid_0's auc: 0.693627
[57]	valid_0's auc: 0.693405
[58]	valid_0's auc: 0.695187
[59]	valid_0's auc: 0.698752
[60]	valid_0's auc: 0.700535
[61]	valid_0's auc: 0.697861
[62]	valid_0's auc: 0.694742
[63]	valid_0's auc: 0.692513
[64]	valid_0's auc: 0.693405
[65]	valid_0's auc: 0.691622
[66]	valid_0's auc: 0.686943
[67]	valid_0's auc: 0.687834
[68]	valid_0's auc: 0.681818
[69]	valid_0's au

[384]	valid_0's auc: 0.729501
[385]	valid_0's auc: 0.728832
[386]	valid_0's auc: 0.729501
[387]	valid_0's auc: 0.729055
[388]	valid_0's auc: 0.728832
[389]	valid_0's auc: 0.727496
[390]	valid_0's auc: 0.728164
[391]	valid_0's auc: 0.729055
[392]	valid_0's auc: 0.728832
[393]	valid_0's auc: 0.729947
[394]	valid_0's auc: 0.729055
[395]	valid_0's auc: 0.727718
[396]	valid_0's auc: 0.730392
[397]	valid_0's auc: 0.72861
[398]	valid_0's auc: 0.729501
[399]	valid_0's auc: 0.730392
[400]	valid_0's auc: 0.730169
[401]	valid_0's auc: 0.733066
[402]	valid_0's auc: 0.732398
Early stopping, best iteration is:
[202]	valid_0's auc: 0.74287
[1]	valid_0's auc: 0.570856
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.582665
[3]	valid_0's auc: 0.595031
[4]	valid_0's auc: 0.59336
[5]	valid_0's auc: 0.574532
[6]	valid_0's auc: 0.559492
[7]	valid_0's auc: 0.564394
[8]	valid_0's auc: 0.584002
[9]	valid_0's auc: 0.584225
[10]	valid_0's auc: 0.590352
[11]	valid_0's auc: 0.594

[372]	valid_0's auc: 0.753119
[373]	valid_0's auc: 0.753119
[374]	valid_0's auc: 0.754456
[375]	valid_0's auc: 0.755125
[376]	valid_0's auc: 0.756462
[377]	valid_0's auc: 0.758021
[378]	valid_0's auc: 0.757353
[379]	valid_0's auc: 0.757799
[380]	valid_0's auc: 0.758021
[381]	valid_0's auc: 0.757799
[382]	valid_0's auc: 0.75869
[383]	valid_0's auc: 0.759581
[384]	valid_0's auc: 0.759358
[385]	valid_0's auc: 0.758467
[386]	valid_0's auc: 0.75713
[387]	valid_0's auc: 0.756016
[388]	valid_0's auc: 0.756684
[389]	valid_0's auc: 0.756239
[390]	valid_0's auc: 0.756462
[391]	valid_0's auc: 0.755793
[392]	valid_0's auc: 0.756239
[393]	valid_0's auc: 0.755125
[394]	valid_0's auc: 0.755125
[395]	valid_0's auc: 0.756239
[396]	valid_0's auc: 0.755793
[397]	valid_0's auc: 0.754011
[398]	valid_0's auc: 0.753342
[399]	valid_0's auc: 0.754234
[400]	valid_0's auc: 0.753342
[401]	valid_0's auc: 0.752451
[402]	valid_0's auc: 0.752451
[403]	valid_0's auc: 0.752674
[404]	valid_0's auc: 0.753119
[405]	valid_

[770]	valid_0's auc: 0.774733
[771]	valid_0's auc: 0.774733
[772]	valid_0's auc: 0.77451
[773]	valid_0's auc: 0.77451
[774]	valid_0's auc: 0.774955
[775]	valid_0's auc: 0.774733
[776]	valid_0's auc: 0.774955
[777]	valid_0's auc: 0.773841
[778]	valid_0's auc: 0.774064
[779]	valid_0's auc: 0.774287
[780]	valid_0's auc: 0.774064
[781]	valid_0's auc: 0.774955
[782]	valid_0's auc: 0.775847
[783]	valid_0's auc: 0.776738
[784]	valid_0's auc: 0.776292
[785]	valid_0's auc: 0.776961
[786]	valid_0's auc: 0.776515
[787]	valid_0's auc: 0.77607
[788]	valid_0's auc: 0.776515
[789]	valid_0's auc: 0.776292
[790]	valid_0's auc: 0.775624
[791]	valid_0's auc: 0.775401
[792]	valid_0's auc: 0.775847
[793]	valid_0's auc: 0.775847
[794]	valid_0's auc: 0.775847
[795]	valid_0's auc: 0.775401
[796]	valid_0's auc: 0.775178
[797]	valid_0's auc: 0.774287
[798]	valid_0's auc: 0.77295
[799]	valid_0's auc: 0.773396
[800]	valid_0's auc: 0.772504
[801]	valid_0's auc: 0.773619
[802]	valid_0's auc: 0.773619
[803]	valid_0'

[216]	valid_0's auc: 0.762701
[217]	valid_0's auc: 0.761586
[218]	valid_0's auc: 0.760918
[219]	valid_0's auc: 0.761586
[220]	valid_0's auc: 0.762255
[221]	valid_0's auc: 0.761141
[222]	valid_0's auc: 0.761809
[223]	valid_0's auc: 0.761364
[224]	valid_0's auc: 0.761809
[225]	valid_0's auc: 0.759581
[226]	valid_0's auc: 0.760695
[227]	valid_0's auc: 0.759804
[228]	valid_0's auc: 0.762032
[229]	valid_0's auc: 0.762701
[230]	valid_0's auc: 0.762478
[231]	valid_0's auc: 0.761364
[232]	valid_0's auc: 0.762701
[233]	valid_0's auc: 0.759804
[234]	valid_0's auc: 0.76025
[235]	valid_0's auc: 0.760472
[236]	valid_0's auc: 0.76025
[237]	valid_0's auc: 0.761809
[238]	valid_0's auc: 0.761586
[239]	valid_0's auc: 0.763146
[240]	valid_0's auc: 0.764483
[241]	valid_0's auc: 0.764483
[242]	valid_0's auc: 0.766043
[243]	valid_0's auc: 0.764929
[244]	valid_0's auc: 0.764706
[245]	valid_0's auc: 0.765374
[246]	valid_0's auc: 0.767157
[247]	valid_0's auc: 0.766266
[248]	valid_0's auc: 0.766488
[249]	valid_

[610]	valid_0's auc: 0.804813
[611]	valid_0's auc: 0.805258
[612]	valid_0's auc: 0.805704
[613]	valid_0's auc: 0.806373
[614]	valid_0's auc: 0.806818
[615]	valid_0's auc: 0.807041
[616]	valid_0's auc: 0.807487
[617]	valid_0's auc: 0.808155
[618]	valid_0's auc: 0.808378
[619]	valid_0's auc: 0.809046
[620]	valid_0's auc: 0.808824
[621]	valid_0's auc: 0.809046
[622]	valid_0's auc: 0.809269
[623]	valid_0's auc: 0.808378
[624]	valid_0's auc: 0.808601
[625]	valid_0's auc: 0.807932
[626]	valid_0's auc: 0.808824
[627]	valid_0's auc: 0.807932
[628]	valid_0's auc: 0.809046
[629]	valid_0's auc: 0.809715
[630]	valid_0's auc: 0.809715
[631]	valid_0's auc: 0.808378
[632]	valid_0's auc: 0.808155
[633]	valid_0's auc: 0.807932
[634]	valid_0's auc: 0.808155
[635]	valid_0's auc: 0.808155
[636]	valid_0's auc: 0.808824
[637]	valid_0's auc: 0.808824
[638]	valid_0's auc: 0.808155
[639]	valid_0's auc: 0.808155
[640]	valid_0's auc: 0.808378
[641]	valid_0's auc: 0.808155
[642]	valid_0's auc: 0.808155
[643]	vali

[128]	valid_0's auc: 0.738414
[129]	valid_0's auc: 0.737522
[130]	valid_0's auc: 0.740865
[131]	valid_0's auc: 0.743093
[132]	valid_0's auc: 0.743984
[133]	valid_0's auc: 0.744875
[134]	valid_0's auc: 0.745098
[135]	valid_0's auc: 0.745766
[136]	valid_0's auc: 0.743316
[137]	valid_0's auc: 0.743093
[138]	valid_0's auc: 0.743093
[139]	valid_0's auc: 0.74131
[140]	valid_0's auc: 0.741533
[141]	valid_0's auc: 0.744207
[142]	valid_0's auc: 0.747995
[143]	valid_0's auc: 0.748663
[144]	valid_0's auc: 0.747772
[145]	valid_0's auc: 0.747326
[146]	valid_0's auc: 0.747549
[147]	valid_0's auc: 0.749109
[148]	valid_0's auc: 0.748886
[149]	valid_0's auc: 0.751783
[150]	valid_0's auc: 0.749109
[151]	valid_0's auc: 0.750668
[152]	valid_0's auc: 0.749332
[153]	valid_0's auc: 0.750223
[154]	valid_0's auc: 0.751783
[155]	valid_0's auc: 0.750223
[156]	valid_0's auc: 0.750446
[157]	valid_0's auc: 0.752005
[158]	valid_0's auc: 0.755125
[159]	valid_0's auc: 0.755793
[160]	valid_0's auc: 0.757353
[161]	valid

[534]	valid_0's auc: 0.782308
[535]	valid_0's auc: 0.782308
[536]	valid_0's auc: 0.782531
[537]	valid_0's auc: 0.782754
[538]	valid_0's auc: 0.782308
[539]	valid_0's auc: 0.782977
[540]	valid_0's auc: 0.7832
[541]	valid_0's auc: 0.782086
[542]	valid_0's auc: 0.782086
[543]	valid_0's auc: 0.781863
[544]	valid_0's auc: 0.780749
[545]	valid_0's auc: 0.780526
[546]	valid_0's auc: 0.780749
[547]	valid_0's auc: 0.781417
[548]	valid_0's auc: 0.781194
[549]	valid_0's auc: 0.782086
[550]	valid_0's auc: 0.780971
[551]	valid_0's auc: 0.780303
[552]	valid_0's auc: 0.781194
[553]	valid_0's auc: 0.780526
[554]	valid_0's auc: 0.78008
[555]	valid_0's auc: 0.780303
[556]	valid_0's auc: 0.779857
[557]	valid_0's auc: 0.778743
[558]	valid_0's auc: 0.779412
[559]	valid_0's auc: 0.779635
[560]	valid_0's auc: 0.779189
[561]	valid_0's auc: 0.778966
[562]	valid_0's auc: 0.778966
[563]	valid_0's auc: 0.778298
[564]	valid_0's auc: 0.778298
[565]	valid_0's auc: 0.778966
[566]	valid_0's auc: 0.781194
[567]	valid_0

[230]	valid_0's auc: 0.747165
[231]	valid_0's auc: 0.747083
[232]	valid_0's auc: 0.747246
[233]	valid_0's auc: 0.747328
[234]	valid_0's auc: 0.748144
[235]	valid_0's auc: 0.74847
[236]	valid_0's auc: 0.749123
[237]	valid_0's auc: 0.749286
[238]	valid_0's auc: 0.749694
[239]	valid_0's auc: 0.749694
[240]	valid_0's auc: 0.749939
[241]	valid_0's auc: 0.750591
[242]	valid_0's auc: 0.750999
[243]	valid_0's auc: 0.750184
[244]	valid_0's auc: 0.750347
[245]	valid_0's auc: 0.750347
[246]	valid_0's auc: 0.751081
[247]	valid_0's auc: 0.750918
[248]	valid_0's auc: 0.750347
[249]	valid_0's auc: 0.751326
[250]	valid_0's auc: 0.751081
[251]	valid_0's auc: 0.751489
[252]	valid_0's auc: 0.751407
[253]	valid_0's auc: 0.751163
[254]	valid_0's auc: 0.750591
[255]	valid_0's auc: 0.750184
Early stopping, best iteration is:
[55]	valid_0's auc: 0.756139
[1]	valid_0's auc: 0.749451
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.739293
[3]	valid_0's auc: 0.738033
[4]	valid_0

[358]	valid_0's auc: 0.766843
[359]	valid_0's auc: 0.766436
[360]	valid_0's auc: 0.766761
[361]	valid_0's auc: 0.766843
[362]	valid_0's auc: 0.766843
[363]	valid_0's auc: 0.76668
[364]	valid_0's auc: 0.766436
[365]	valid_0's auc: 0.766436
[366]	valid_0's auc: 0.767087
[367]	valid_0's auc: 0.766843
[368]	valid_0's auc: 0.767087
[369]	valid_0's auc: 0.766924
[370]	valid_0's auc: 0.767087
[371]	valid_0's auc: 0.765868
[372]	valid_0's auc: 0.765949
[373]	valid_0's auc: 0.765217
[374]	valid_0's auc: 0.76473
[375]	valid_0's auc: 0.764405
[376]	valid_0's auc: 0.764892
[377]	valid_0's auc: 0.765461
[378]	valid_0's auc: 0.764892
[379]	valid_0's auc: 0.764323
[380]	valid_0's auc: 0.764405
[381]	valid_0's auc: 0.764161
[382]	valid_0's auc: 0.764323
[383]	valid_0's auc: 0.764323
[384]	valid_0's auc: 0.764974
[385]	valid_0's auc: 0.76473
[386]	valid_0's auc: 0.764486
[387]	valid_0's auc: 0.764242
[388]	valid_0's auc: 0.763998
[389]	valid_0's auc: 0.763104
[390]	valid_0's auc: 0.763673
[391]	valid_0

[132]	valid_0's auc: 0.778355
[133]	valid_0's auc: 0.779167
[134]	valid_0's auc: 0.779248
[135]	valid_0's auc: 0.780791
[136]	valid_0's auc: 0.779654
[137]	valid_0's auc: 0.779492
[138]	valid_0's auc: 0.779248
[139]	valid_0's auc: 0.780547
[140]	valid_0's auc: 0.779248
[141]	valid_0's auc: 0.779817
[142]	valid_0's auc: 0.777949
[143]	valid_0's auc: 0.778517
[144]	valid_0's auc: 0.778274
[145]	valid_0's auc: 0.777543
[146]	valid_0's auc: 0.779005
[147]	valid_0's auc: 0.778842
[148]	valid_0's auc: 0.779248
[149]	valid_0's auc: 0.780547
[150]	valid_0's auc: 0.78006
[151]	valid_0's auc: 0.779817
[152]	valid_0's auc: 0.780141
[153]	valid_0's auc: 0.780953
[154]	valid_0's auc: 0.780791
[155]	valid_0's auc: 0.780791
[156]	valid_0's auc: 0.780872
[157]	valid_0's auc: 0.780466
[158]	valid_0's auc: 0.780872
[159]	valid_0's auc: 0.780628
[160]	valid_0's auc: 0.780466
[161]	valid_0's auc: 0.782252
[162]	valid_0's auc: 0.781359
[163]	valid_0's auc: 0.782577
[164]	valid_0's auc: 0.782577
[165]	valid

[503]	valid_0's auc: 0.785662
[504]	valid_0's auc: 0.785824
[505]	valid_0's auc: 0.786068
[506]	valid_0's auc: 0.785337
[507]	valid_0's auc: 0.78485
[508]	valid_0's auc: 0.784282
[509]	valid_0's auc: 0.783714
[510]	valid_0's auc: 0.783226
[511]	valid_0's auc: 0.782902
[512]	valid_0's auc: 0.782658
[513]	valid_0's auc: 0.782902
[514]	valid_0's auc: 0.783308
[515]	valid_0's auc: 0.782983
[516]	valid_0's auc: 0.782739
[517]	valid_0's auc: 0.78282
[518]	valid_0's auc: 0.782252
[519]	valid_0's auc: 0.783551
[520]	valid_0's auc: 0.78347
[521]	valid_0's auc: 0.783632
[522]	valid_0's auc: 0.784038
[523]	valid_0's auc: 0.783795
[524]	valid_0's auc: 0.784363
[525]	valid_0's auc: 0.78347
[526]	valid_0's auc: 0.784038
[527]	valid_0's auc: 0.784444
[528]	valid_0's auc: 0.78412
[529]	valid_0's auc: 0.784444
[530]	valid_0's auc: 0.78485
[531]	valid_0's auc: 0.784688
[532]	valid_0's auc: 0.784688
[533]	valid_0's auc: 0.784688
[534]	valid_0's auc: 0.784525
[535]	valid_0's auc: 0.785013
[536]	valid_0's 

## 天猫复购场景实战

## 读取特征数据

In [54]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_data = pd.read_csv('train_all.csv',nrows=10000)
test_data = pd.read_csv('test_all.csv',nrows=100)

features_columns = [col for col in train_data.columns if col not in ['user_id','label']]
train = train_data[features_columns].values
test = test_data[features_columns].values
target =train_data['label'].values

## 设置模型参数

In [55]:
params = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'auc',
        'num_leaves': 9,
        'learning_rate': 0.03,
        'feature_fraction_seed': 2,
        'feature_fraction': 0.9,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'min_data': 20,
        'min_hessian': 1,
        'verbose': -1,
        'silent': 0
        }

model = SBBTree(params=params,
                stacking_num=5,
                bagging_num=3,
                bagging_test_size=0.33,
                num_boost_round=10000,
                early_stopping_rounds=200)

## 模型训练

In [56]:
model.fit(train, target)

Please use silent argument of the Dataset constructor to pass this parameter.
  .format(key))


[1]	valid_0's auc: 0.463733
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.456267
[3]	valid_0's auc: 0.5256
[4]	valid_0's auc: 0.561653
[5]	valid_0's auc: 0.55344
[6]	valid_0's auc: 0.536907
[7]	valid_0's auc: 0.51184
[8]	valid_0's auc: 0.496533
[9]	valid_0's auc: 0.49392
[10]	valid_0's auc: 0.504427
[11]	valid_0's auc: 0.483893
[12]	valid_0's auc: 0.48144
[13]	valid_0's auc: 0.47344
[14]	valid_0's auc: 0.469387
[15]	valid_0's auc: 0.47232
[16]	valid_0's auc: 0.486133
[17]	valid_0's auc: 0.490827
[18]	valid_0's auc: 0.501493
[19]	valid_0's auc: 0.502987
[20]	valid_0's auc: 0.504427
[21]	valid_0's auc: 0.48976
[22]	valid_0's auc: 0.490187
[23]	valid_0's auc: 0.48752
[24]	valid_0's auc: 0.490933
[25]	valid_0's auc: 0.493813
[26]	valid_0's auc: 0.492907
[27]	valid_0's auc: 0.493813
[28]	valid_0's auc: 0.49616
[29]	valid_0's auc: 0.49632
[30]	valid_0's auc: 0.493013
[31]	valid_0's auc: 0.49616
[32]	valid_0's auc: 0.49536
[33]	valid_0's auc: 0.49136
[34]	

[91]	valid_0's auc: 0.456191
[92]	valid_0's auc: 0.456911
[93]	valid_0's auc: 0.455368
[94]	valid_0's auc: 0.453106
[95]	valid_0's auc: 0.453003
[96]	valid_0's auc: 0.451872
[97]	valid_0's auc: 0.44745
[98]	valid_0's auc: 0.447964
[99]	valid_0's auc: 0.449506
[100]	valid_0's auc: 0.447141
[101]	valid_0's auc: 0.446421
[102]	valid_0's auc: 0.441896
[103]	valid_0's auc: 0.442822
[104]	valid_0's auc: 0.445496
[105]	valid_0's auc: 0.444159
[106]	valid_0's auc: 0.442308
[107]	valid_0's auc: 0.442205
[108]	valid_0's auc: 0.441176
[109]	valid_0's auc: 0.440251
[110]	valid_0's auc: 0.437783
[111]	valid_0's auc: 0.439737
[112]	valid_0's auc: 0.443336
[113]	valid_0's auc: 0.448478
[114]	valid_0's auc: 0.449918
[115]	valid_0's auc: 0.452489
[116]	valid_0's auc: 0.450123
[117]	valid_0's auc: 0.449815
[118]	valid_0's auc: 0.44745
[119]	valid_0's auc: 0.451563
[120]	valid_0's auc: 0.453928
[121]	valid_0's auc: 0.449918
[122]	valid_0's auc: 0.444673
[123]	valid_0's auc: 0.442616
[124]	valid_0's auc: 

[185]	valid_0's auc: 0.524476
[186]	valid_0's auc: 0.527252
[187]	valid_0's auc: 0.528692
[188]	valid_0's auc: 0.527664
[189]	valid_0's auc: 0.527149
[190]	valid_0's auc: 0.528281
[191]	valid_0's auc: 0.527664
[192]	valid_0's auc: 0.527972
[193]	valid_0's auc: 0.531263
[194]	valid_0's auc: 0.529412
[195]	valid_0's auc: 0.528795
[196]	valid_0's auc: 0.528281
[197]	valid_0's auc: 0.528795
[198]	valid_0's auc: 0.529
[199]	valid_0's auc: 0.529617
[200]	valid_0's auc: 0.52972
[201]	valid_0's auc: 0.527869
[202]	valid_0's auc: 0.530954
[203]	valid_0's auc: 0.528898
[204]	valid_0's auc: 0.529823
[205]	valid_0's auc: 0.531057
[206]	valid_0's auc: 0.530852
[207]	valid_0's auc: 0.530029
[208]	valid_0's auc: 0.527972
[209]	valid_0's auc: 0.528178
[210]	valid_0's auc: 0.526429
[211]	valid_0's auc: 0.523961
[212]	valid_0's auc: 0.523241
[213]	valid_0's auc: 0.522624
[214]	valid_0's auc: 0.523241
[215]	valid_0's auc: 0.52355
[216]	valid_0's auc: 0.526738
[217]	valid_0's auc: 0.528795
[218]	valid_0's

[74]	valid_0's auc: 0.50833
[75]	valid_0's auc: 0.505656
[76]	valid_0's auc: 0.509461
[77]	valid_0's auc: 0.510901
[78]	valid_0's auc: 0.512855
[79]	valid_0's auc: 0.511312
[80]	valid_0's auc: 0.514397
[81]	valid_0's auc: 0.516865
[82]	valid_0's auc: 0.518717
[83]	valid_0's auc: 0.520156
[84]	valid_0's auc: 0.52355
[85]	valid_0's auc: 0.520773
[86]	valid_0's auc: 0.518614
[87]	valid_0's auc: 0.517894
[88]	valid_0's auc: 0.519745
[89]	valid_0's auc: 0.524476
[90]	valid_0's auc: 0.52211
[91]	valid_0's auc: 0.520259
[92]	valid_0's auc: 0.523344
[93]	valid_0's auc: 0.52355
[94]	valid_0's auc: 0.519436
[95]	valid_0's auc: 0.522933
[96]	valid_0's auc: 0.524167
[97]	valid_0's auc: 0.523756
[98]	valid_0's auc: 0.528383
[99]	valid_0's auc: 0.533114
[100]	valid_0's auc: 0.533628
[101]	valid_0's auc: 0.538564
[102]	valid_0's auc: 0.538359
[103]	valid_0's auc: 0.541135
[104]	valid_0's auc: 0.545557
[105]	valid_0's auc: 0.545249
[106]	valid_0's auc: 0.544838
[107]	valid_0's auc: 0.543706
[108]	vali

[167]	valid_0's auc: 0.562492
[168]	valid_0's auc: 0.56259
[169]	valid_0's auc: 0.560557
[170]	valid_0's auc: 0.557639
[171]	valid_0's auc: 0.559803
[172]	valid_0's auc: 0.559902
[173]	valid_0's auc: 0.562295
[174]	valid_0's auc: 0.563016
[175]	valid_0's auc: 0.564689
[176]	valid_0's auc: 0.565672
[177]	valid_0's auc: 0.568033
[178]	valid_0's auc: 0.56941
[179]	valid_0's auc: 0.56918
[180]	valid_0's auc: 0.57318
[181]	valid_0's auc: 0.572623
[182]	valid_0's auc: 0.571574
[183]	valid_0's auc: 0.57377
[184]	valid_0's auc: 0.572689
[185]	valid_0's auc: 0.571574
[186]	valid_0's auc: 0.572
[187]	valid_0's auc: 0.572525
[188]	valid_0's auc: 0.57423
[189]	valid_0's auc: 0.575344
[190]	valid_0's auc: 0.574361
[191]	valid_0's auc: 0.577443
[192]	valid_0's auc: 0.579148
[193]	valid_0's auc: 0.578623
[194]	valid_0's auc: 0.577934
[195]	valid_0's auc: 0.579574
[196]	valid_0's auc: 0.579344
[197]	valid_0's auc: 0.579508
[198]	valid_0's auc: 0.579508
[199]	valid_0's auc: 0.578557
[200]	valid_0's auc

[457]	valid_0's auc: 0.583738
[458]	valid_0's auc: 0.583377
[459]	valid_0's auc: 0.584033
[460]	valid_0's auc: 0.583836
[461]	valid_0's auc: 0.583869
[462]	valid_0's auc: 0.582951
[463]	valid_0's auc: 0.582525
[464]	valid_0's auc: 0.582754
[465]	valid_0's auc: 0.582295
[466]	valid_0's auc: 0.58177
[467]	valid_0's auc: 0.581508
[468]	valid_0's auc: 0.58082
[469]	valid_0's auc: 0.580361
[470]	valid_0's auc: 0.580295
[471]	valid_0's auc: 0.580164
[472]	valid_0's auc: 0.580197
[473]	valid_0's auc: 0.580164
[474]	valid_0's auc: 0.579738
[475]	valid_0's auc: 0.580459
[476]	valid_0's auc: 0.579443
[477]	valid_0's auc: 0.579279
[478]	valid_0's auc: 0.578918
[479]	valid_0's auc: 0.579541
[480]	valid_0's auc: 0.578918
[481]	valid_0's auc: 0.579082
[482]	valid_0's auc: 0.579443
[483]	valid_0's auc: 0.579115
[484]	valid_0's auc: 0.579279
[485]	valid_0's auc: 0.578066
[486]	valid_0's auc: 0.577508
[487]	valid_0's auc: 0.577377
[488]	valid_0's auc: 0.576557
[489]	valid_0's auc: 0.576951
[490]	valid_

[195]	valid_0's auc: 0.521767
[196]	valid_0's auc: 0.524368
[197]	valid_0's auc: 0.523237
[198]	valid_0's auc: 0.522559
[199]	valid_0's auc: 0.522747
[200]	valid_0's auc: 0.520975
[201]	valid_0's auc: 0.521955
[202]	valid_0's auc: 0.521579
Early stopping, best iteration is:
[2]	valid_0's auc: 0.548038
[1]	valid_0's auc: 0.593117
Training until validation scores don't improve for 200 rounds
[2]	valid_0's auc: 0.595021
[3]	valid_0's auc: 0.600335
[4]	valid_0's auc: 0.587803
[5]	valid_0's auc: 0.563624
[6]	valid_0's auc: 0.599808
[7]	valid_0's auc: 0.591798
[8]	valid_0's auc: 0.596321
[9]	valid_0's auc: 0.611945
[10]	valid_0's auc: 0.608929
[11]	valid_0's auc: 0.608232
[12]	valid_0's auc: 0.594908
[13]	valid_0's auc: 0.603709
[14]	valid_0's auc: 0.604745
[15]	valid_0's auc: 0.61937
[16]	valid_0's auc: 0.610663
[17]	valid_0's auc: 0.602484
[18]	valid_0's auc: 0.597358
[19]	valid_0's auc: 0.594606
[20]	valid_0's auc: 0.591384
[21]	valid_0's auc: 0.595869
[22]	valid_0's auc: 0.597245
[23]	va

## 预测结果

In [57]:
pred = model.predict(test)
df_out = pd.DataFrame()
df_out['user_id'] = test_data['user_id']
df_out['predict_prob'] = pred
df_out.head()

Unnamed: 0,user_id,predict_prob
0,,0.041774
1,,0.04176
2,,0.041778
3,,0.041757
4,,0.047263


## 保存结果

In [59]:
"""
    保留数据头，不保存index
"""
df_out.to_csv('df_out.csv',header=True,index=False)
print('save OK!')

save OK!
