In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
import datetime
from matplotlib_venn import venn2
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
pd.set_option('display.max_Columns', 100)

In [2]:
cartlog = pd.read_feather('../inputs/cartlog.f')
product_master = pd.read_feather('../inputs/product_master.f')
meta = pd.read_feather('../inputs/meta.f')
user_master = pd.read_feather('../inputs/user_master.f')
test = pd.read_csv('../inputs/test.csv')
display_action_id = pd.read_csv('../inputs/display_action_id.csv')

product_master['JAN'] = product_master['JAN'].astype(str)

In [3]:
test_sessions = test["session_id"].unique()
print(len(test_sessions))
test_input_log = cartlog[cartlog["session_id"].isin(test_sessions)]

56486


In [4]:
target_category = [
    38,  # アイスクリーム__ノベルティー
    110,  # スナック・キャンディー__ガム
    113,  # スナック・キャンディー__シリアル
    114,  # スナック・キャンディー__スナック
    134,  # チョコ・ビスクラ__チョコレート
    171,  # ビール系__RTD
    172,  # ビール系__ノンアルコール
    173,  # ビール系__ビール系
    376,  # 和菓子__米菓
    435,  # 大型PET__無糖茶（大型PET）
    467,  # 小型PET__コーヒー（小型PET）
    537,  # 水・炭酸水__大型PET（炭酸水）
    539,  # 水・炭酸水__小型PET（炭酸水）
    629,  # 缶飲料__コーヒー（缶）
    768,  # 麺類__カップ麺
]

In [5]:
# 2020-08-01以前で10分以上経過し購買が発生したセッションにtrainデータを絞る
tmp_sessions = meta[meta['date'] < '2020-08-01']['session_id'].unique()
tmp_log = cartlog[cartlog["session_id"].isin(tmp_sessions)]
print('2020-08-01以前: ', len(tmp_sessions))

# 購買が発生したセッション
payment_sessions = set(tmp_log[tmp_log['is_payment']==1]['session_id'].unique())
print('購買が発生: ', len(payment_sessions))
# 10分以上のセッション
over10min_sessions = set(tmp_log[tmp_log['spend_time']>=600]['session_id'].unique())
print('10分以上: ', len(over10min_sessions))
# 積集合
all_train_sessions = payment_sessions & over10min_sessions
print('積集合: ', len(all_train_sessions))

# 10分以上の全trainのログデータ
all_train_log = tmp_log[tmp_log["session_id"].isin(all_train_sessions)]
print('全trainのログ: ', len(all_train_log))

2020-08-01以前:  663708
購買が発生:  618462
10分以上:  404825
積集合:  391383
全trainのログ:  10826062


In [6]:
class RetailDataset:
    def __init__(self, thres_sec, meta):
        self.thres_sec = thres_sec
        self.meta = meta.copy()
        self.meta['time_elapsed_sec'] = self.meta['time_elapsed'] * 60
        self.meta.loc[self.meta['time_elapsed_sec'].isnull(), 'time_elapsed_sec'] = thres_sec
        
        # all_train_logの中で、指定時間より前のログのみを抽出　-> public_train_log, train_sessions
        merge_train = pd.merge(all_train_log, self.meta[["session_id", "time_elapsed_sec"]], on=["session_id"], how="inner")
        self.public_train_log = merge_train[merge_train['spend_time'] <= merge_train['time_elapsed_sec']]
        self.train_sessions = self.public_train_log["session_id"].unique()
        
    def get_train_input_log(self) -> pd.DataFrame:
        return self.public_train_log

    def agg_payment(self, cartlog) -> pd.DataFrame:
        """セッションごと・商品ごとの購買個数を集計する"""
        # JANコード (vale_1)ごとに商品の購入個数(n_items)を足し算
        agg = cartlog.loc[cartlog["kind_1"] == "商品"].groupby(["session_id", "value_1"])["n_items"].sum().reset_index()
        return agg.rename(columns={"value_1": "JAN"})

    def get_train_target(self) -> pd.DataFrame:
        """学習で使用するセッションの目的変数を取得する"""
        train_target = pd.DataFrame(
            index=self.train_sessions,
        )
        train_target.index.name = "session_id"

        # time_elapsed以降のデータから購買個数を集計する
        merge_train = pd.merge(all_train_log, self.meta[["session_id", "time_elapsed_sec"]], on=["session_id"], how="inner")
        after_elapsed_log = merge_train[merge_train['spend_time'] > merge_train['time_elapsed_sec']]
        
        train_item_num = self.agg_payment(after_elapsed_log)
        train_item_num_cate = pd.merge(train_item_num, product_master[["JAN", "category_id"]], on="JAN", how="inner")
        train_item_num_cate = train_item_num_cate[train_item_num_cate['category_id'].isin(target_category)]
        train_target_pos = train_item_num_cate.groupby(["session_id", "category_id"])["n_items"].sum().unstack().fillna(0).astype(int)
        train_target_pos[train_target_pos > 0] = 1
        train_target_pos[train_target_pos <= 0] = 0

        return train_target.join(train_target_pos).fillna(0).reset_index()

In [7]:
def get_train_log(elapsed_min):
    dataset = RetailDataset(elapsed_min*60, meta)
    train_input_log = dataset.get_train_input_log()
    y_train = dataset.get_train_target()
    print('train_session', y_train.shape)
    return train_input_log, y_train

In [8]:
ELAPSED_MIN = [0, 3, 5, 10]
# ELAPSED_MIN = [5]

In [9]:
def save_train_log():
    for elap_min in ELAPSED_MIN:
        train_log, train_y = get_train_log(elap_min)
        train_log = train_log.reset_index(drop=True)
        train_y = train_y.reset_index(drop=True)
        train_y.columns = [str(c) for c in train_y.columns]
        train_log.to_feather('../inputs/train2_log_{}.f'.format(elap_min))
        train_y.to_feather('../inputs/train2_y_{}.f'.format(elap_min))

In [10]:
def load_train_log():
    train_log = {}
    train_y = {}
    for elap_min in ELAPSED_MIN:
        log = pd.read_feather('../inputs/train2_log_{}.f'.format(elap_min))
        y = pd.read_feather('../inputs/train2_y_{}.f'.format(elap_min))
        train_log[elap_min] = log
        train_y[elap_min] = y
    return train_log, train_y

In [11]:
# save_train_log()

- 0, train_session (378594, 16)
- 3, train_session (389649, 16)
- 5, train_session (390621, 16)
- 10, train_session (391074, 16)

### ユーザ情報

In [12]:
user_features = pd.merge(meta[["session_id", "user_id"]], user_master, on="user_id", how="left").drop(columns=["user_id"])
user_features.loc[user_features['age'] >= 80, 'age'] = np.NaN
user_features.loc[user_features['age'] < 10, 'age'] = np.NaN
user_features.loc[user_features['gender'] > 1, 'gender'] = np.NaN

### メタ情報

In [13]:
meta_features = meta.copy()
meta_features['year'] = meta_features['date'].dt.year
meta_features['month'] = meta_features['date'].dt.month
meta_features['day'] = meta_features['date'].dt.day
meta_features['dow'] = meta_features['date'].dt.dayofweek
meta_features['doy'] = meta_features['date'].dt.dayofyear
le = preprocessing.LabelEncoder()
meta_features['userid'] = le.fit_transform(meta_features['user_id'])
meta_features.drop(columns=['user_id', 'date', 'time_elapsed', 'date_str'], inplace=True)

### ディスプレイアクション

In [14]:
disp_name_dic = {}
for i, disp in enumerate(display_action_id['display_name'].unique()):
    disp_name_dic[disp] = 'disp_cnt_{}'.format(i)
    
act_name_dic = {}
for i, action in enumerate(display_action_id['action_name'].unique()):
    act_name_dic[action] = 'act_cnt_{}'.format(i)

In [15]:
def get_display_name_feature(input_log):
    merge = pd.merge(input_log, display_action_id, on='display_action_id', how='left')
    disp_group_count = merge.groupby(['session_id', 'display_name']).size().reset_index().rename(columns={0:'disp_name_count'})
    disp_name_pivot = disp_group_count.pivot_table(index='session_id', columns='display_name', values='disp_name_count', aggfunc='sum')
    disp_name_pivot = disp_name_pivot.reset_index().fillna(0).rename(columns=disp_name_dic)
    
    disp_out = disp_name_pivot[['session_id']].copy()
    for val in disp_name_dic.values():
        disp_out[val] = 0
    
    for col in disp_name_pivot.columns:
        if col == 'session_id':
            continue
        disp_out[col] = disp_name_pivot[col]
    
    return disp_out

In [16]:
def get_action_name_feature(input_log):
    merge = pd.merge(input_log, display_action_id, on='display_action_id', how='left')
    act_group_count = merge.groupby(['session_id', 'action_name']).size().reset_index().rename(columns={0:'act_name_count'})
    act_name_pivot = act_group_count.pivot_table(index='session_id', columns='action_name', values='act_name_count', aggfunc='sum')
    act_name_pivot = act_name_pivot.reset_index().fillna(0).rename(columns=act_name_dic)
    
    act_out = act_name_pivot[['session_id']].copy()
    for val in act_name_dic.values():
        act_out[val] = 0
    
    for col in act_name_pivot.columns:
        if col == 'session_id':
            continue
        act_out[col] = act_name_pivot[col]
    
    return act_out

### セッション単位の特徴量

In [17]:
def get_pre_payment_item(input_log):
    session_unique = input_log['session_id'].unique()
    agg = input_log.loc[input_log["kind_1"] == "商品"].groupby(["session_id", "value_1"])["n_items"].sum().reset_index()
    agg = agg.rename(columns={"value_1": "JAN"})
    agg = pd.merge(agg, product_master[["JAN", "category_id"]], on="JAN", how="inner")
    agg = agg[agg['category_id'].isin(target_category)]
    agg = agg.groupby(["session_id", "category_id"])["n_items"].sum().reset_index()
    
    sesi = np.zeros(len(target_category))
    cate = [ct for ct in target_category]
    
    dummy = pd.DataFrame({'session_id':sesi, 'category_id':cate, 'n_items':sesi})
    agg = pd.concat([agg, dummy])
    
    agg = agg.pivot_table(index='session_id', columns='category_id', values='n_items').fillna(0)
    src_columns = ['x_{}'.format(c) for c in agg.columns]
    agg.columns = src_columns
    
    col = ['pre_target_{}'.format(c) for c in target_category]
    df_out = pd.DataFrame(index=session_unique, columns=col)
    df_out.index.name = "session_id"
    df_out = df_out.join(agg)
    for ct in target_category:
        src = 'x_{}'.format(ct)
        dst = 'pre_target_{}'.format(ct)
        df_out[dst] = df_out[src]
    
    return df_out.drop(columns=src_columns).fillna(0).reset_index()

In [18]:
def get_session_kind_group(input_log):
    kind_name ={
        'クーポン': 'coupon',
        '会計': 'kaikei',
        'キー': 'key',
        'カテゴリ': 'categry',
        'バーコードスキャン': 'barcode',
        'UUID': 'uuid',
        '使用ポイント': 'usedpoint',
        '確認': 'confirm',
        'ブランドスイッチ': 'bland',
        'レシピ': 'recipe',
        'スマホスキャン': 'smartphone',
        '磁気スキャン': 'magnetic',
        'レコメンド': 'recommend',
        '倍率ポイント': 'point',
    }
    group_count = input_log[input_log['kind_1'] == '商品'].groupby(["session_id"]).size().rename('group_count_'+'item')
    for kind, name in kind_name.items():
        tmp = input_log[input_log['kind_1'] == kind].groupby(["session_id"]).size().rename('group_count_'+name)
        group_count = pd.concat([group_count, tmp], axis=1)
        
    return group_count.reset_index()

In [19]:
def get_session_info(input_log):
    # アクション数
    n_actions = input_log.groupby(["session_id"]).size().rename("n_actions")
    # 経過時間の平均
    mean_spend_time = input_log.groupby(["session_id"])["spend_time"].mean()
    
    session_features = pd.concat([
        n_actions,
        mean_spend_time,
    ], axis=1)
    return session_features.reset_index()

### セッション単位で集計

In [20]:
def get_session_features(input_log):
    df_ses = pd.DataFrame(columns=['session_id'])
    
    session_feat = [
        get_session_info(input_log),
        get_session_kind_group(input_log),
        get_display_name_feature(input_log),
        get_action_name_feature(input_log),
        get_pre_payment_item(input_log),
    ]
    
    for feat in session_feat:
        df_ses = pd.merge(df_ses, feat, on='session_id', how='outer')
        
    return df_ses

### 特徴量を集約する

In [21]:
def merge_features(input_log, session):
    feat_list = [
        get_session_features(input_log),
        user_features,
        meta_features,
    ]
    out = pd.DataFrame({"session_id": session})
    for feat in feat_list:
        out = pd.merge(out, feat, on="session_id", how="left")

    assert len(session) == len(out)
    return out

In [22]:
def get_train_all_features(elapsed_min, train_log_list, train_y_list):
    train_input_log = train_log_list[elapsed_min]
    y_train = train_y_list[elapsed_min]
    
    train_features = merge_features(train_input_log, y_train['session_id'])
    print('train_features', train_features.shape)
    return train_features, y_train

In [23]:
def get_test_all_feature(elapsed_min):
    test_meta = meta[meta['session_id'].isin(test_sessions)]
    test_meta = test_meta[test_meta['time_elapsed'] == elapsed_min]
    test_input_elapsed = pd.merge(test_input_log, test_meta[['session_id']], on='session_id', how='left')
    
    test_features = merge_features(test_input_elapsed, test_meta['session_id'])
    print('test_features', test_features.shape)
    return test_features

In [24]:
n_fold = 4

In [25]:
lgbm_param = {
    'objective' : 'binary',
    'boosting_type': 'gbdt',
    'metric': 'auc',
    'seed' : 0,
    'learning_rate':  0.1,
#   'max_depth': 6,
    'feature_fraction': 0.6,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}

In [26]:
def train_lgbm(X, y, params=lgbm_param):

    fold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=0)
    cv = fold.split(X, y)
    
    models = []
    oof_pred = np.zeros_like(y, dtype=np.float)
    
    cat_feat = ['age', 'gender', 'dow']

    for i, (idx_train, idx_valid) in enumerate(cv): 
        x_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
        x_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
        
        lgbm_train = lgbm.Dataset(x_train, y_train, categorical_feature = cat_feat)
        lgbm_eval = lgbm.Dataset(x_valid, y_valid, reference=lgbm_train, categorical_feature = cat_feat)
        
        lgbm_model = lgbm.train(params, 
                                                    lgbm_train, 
                                                    valid_sets=lgbm_eval,
                                                    categorical_feature = cat_feat,
                                                    num_boost_round=10000,
                                                    early_stopping_rounds=100,
                                                    verbose_eval=-1)
        y_pred = lgbm_model.predict(x_valid, num_iteration=lgbm_model.best_iteration)
        
        oof_pred[idx_valid] = y_pred
        models.append(lgbm_model)

    score = roc_auc_score(y, oof_pred)
    print('--- FINISHED \ whole score: {:.4f} ---'.format(score))
    return oof_pred, models, score

In [27]:
def predict(models, feature):
    pred_list = []
    for i, model in enumerate(models):
        pred = model.predict(feature, num_iteration = model.best_iteration)
        pred_list.append(pred)
    
    score = np.mean(pred_list, axis=0)
    return score

In [28]:
train_log_list, train_y_list = load_train_log()

In [29]:
%%time
df_pred_all = pd.DataFrame()
df_score_all = pd.DataFrame(index=ELAPSED_MIN)
models_list_list = []

for elapsed_min in ELAPSED_MIN:
    print(f'===== {elapsed_min} =====')
    train_features, y_train = get_train_all_features(elapsed_min, train_log_list, train_y_list)
    test_features = get_test_all_feature(elapsed_min)

    df_pred = pd.DataFrame(index=test_features['session_id'])
    train_features.drop(columns=['session_id'], inplace=True)
    test_features.drop(columns=['session_id'], inplace=True)
    
    models_list = []
    for target in y_train.columns:
        if target == 'session_id':
            continue
        
        print(f"---- id = {target} -----")
        oof, models, score = train_lgbm(train_features, y_train[target])
        models_list.append(models)

        pred = predict(models, test_features)
        df_pred[target] = pred
        df_score_all.loc[elapsed_min, target] = score
        
    models_list_list.append(models_list)
    df_pred_all = pd.concat([df_pred_all, df_pred])
    print(len(df_pred_all))

===== 0 =====
train_features (378594, 89)
test_features (14277, 89)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[968]	valid_0's auc: 0.676063
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1193]	valid_0's auc: 0.679391
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[954]	valid_0's auc: 0.683738
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1052]	valid_0's auc: 0.686359
--- FINISHED \ whole score: 0.6813 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[73]	valid_0's auc: 0.637212
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[332]	valid_0's auc: 0.63639
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[214]	valid_0's auc: 0.663651
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[85]	valid_0's auc: 0.638034
--- FINISHED \ whole score: 0.6429 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[410]	valid_0's auc: 0.645349
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[335]	valid_0's auc: 0.635873
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[68]	valid_0's auc: 0.635054
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[315]	valid_0's auc: 0.642069
--- FINISHED \ whole score: 0.6390 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[883]	valid_0's auc: 0.63732
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[450]	valid_0's auc: 0.632694
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[330]	valid_0's auc: 0.631684
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[625]	valid_0's auc: 0.635073
--- FINISHED \ whole score: 0.6342 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[329]	valid_0's auc: 0.621511
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1117]	valid_0's auc: 0.620175
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[266]	valid_0's auc: 0.620765
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[434]	valid_0's auc: 0.620862
--- FINISHED \ whole score: 0.6205 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1515]	valid_0's auc: 0.704697
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1278]	valid_0's auc: 0.705795
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1114]	valid_0's auc: 0.707157
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[969]	valid_0's auc: 0.705157
--- FINISHED \ whole score: 0.7056 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[409]	valid_0's auc: 0.686995
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[421]	valid_0's auc: 0.70139
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1025]	valid_0's auc: 0.700821
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[819]	valid_0's auc: 0.703903
--- FINISHED \ whole score: 0.6973 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1552]	valid_0's auc: 0.700856
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1331]	valid_0's auc: 0.694084
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[978]	valid_0's auc: 0.692847
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1473]	valid_0's auc: 0.694392
--- FINISHED \ whole score: 0.6955 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[412]	valid_0's auc: 0.612573
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[661]	valid_0's auc: 0.612875
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[411]	valid_0's auc: 0.614071
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[448]	valid_0's auc: 0.615176
--- FINISHED \ whole score: 0.6136 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[448]	valid_0's auc: 0.690251
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[951]	valid_0's auc: 0.705059
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1032]	valid_0's auc: 0.695492
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1139]	valid_0's auc: 0.699916
--- FINISHED \ whole score: 0.6975 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[767]	valid_0's auc: 0.705046
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[763]	valid_0's auc: 0.693296
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[608]	valid_0's auc: 0.691156
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[318]	valid_0's auc: 0.693347
--- FINISHED \ whole score: 0.6953 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[911]	valid_0's auc: 0.749319
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[452]	valid_0's auc: 0.732456
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[992]	valid_0's auc: 0.749999
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[861]	valid_0's auc: 0.742445
--- FINISHED \ whole score: 0.7434 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1037]	valid_0's auc: 0.69472
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[919]	valid_0's auc: 0.691345
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[789]	valid_0's auc: 0.684954
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[374]	valid_0's auc: 0.689887
--- FINISHED \ whole score: 0.6896 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[914]	valid_0's auc: 0.761934
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[557]	valid_0's auc: 0.767622
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[300]	valid_0's auc: 0.764016
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[528]	valid_0's auc: 0.768166
--- FINISHED \ whole score: 0.7638 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[911]	valid_0's auc: 0.611875
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[416]	valid_0's auc: 0.614841
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[419]	valid_0's auc: 0.615568
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[421]	valid_0's auc: 0.615812
--- FINISHED \ whole score: 0.6143 ---
14277
===== 3 =====
train_features (389649, 89)
test_features (11304, 89)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[947]	valid_0's auc: 0.662173
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[502]	valid_0's auc: 0.661707
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[296]	valid_0's auc: 0.659923
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[846]	valid_0's auc: 0.66791
--- FINISHED \ whole score: 0.6628 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[306]	valid_0's auc: 0.622052
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	valid_0's auc: 0.620522
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[74]	valid_0's auc: 0.625621
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[65]	valid_0's auc: 0.632704
--- FINISHED \ whole score: 0.6227 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[144]	valid_0's auc: 0.630584
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[154]	valid_0's auc: 0.635739
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[68]	valid_0's auc: 0.626471
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[148]	valid_0's auc: 0.625453
--- FINISHED \ whole score: 0.6295 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[496]	valid_0's auc: 0.630684
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[287]	valid_0's auc: 0.632636
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[698]	valid_0's auc: 0.631179
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[683]	valid_0's auc: 0.631392
--- FINISHED \ whole score: 0.6314 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[191]	valid_0's auc: 0.612605
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[487]	valid_0's auc: 0.615966
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[290]	valid_0's auc: 0.611055
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[314]	valid_0's auc: 0.615013
--- FINISHED \ whole score: 0.6136 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[763]	valid_0's auc: 0.686013
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[799]	valid_0's auc: 0.684532
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[912]	valid_0's auc: 0.690484
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[910]	valid_0's auc: 0.686371
--- FINISHED \ whole score: 0.6868 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[351]	valid_0's auc: 0.665003
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[241]	valid_0's auc: 0.669464
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[267]	valid_0's auc: 0.673134
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[641]	valid_0's auc: 0.680421
--- FINISHED \ whole score: 0.6714 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1491]	valid_0's auc: 0.677296
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[943]	valid_0's auc: 0.66935
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[858]	valid_0's auc: 0.666116
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1072]	valid_0's auc: 0.672186
--- FINISHED \ whole score: 0.6712 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[171]	valid_0's auc: 0.608476
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[300]	valid_0's auc: 0.609262
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[435]	valid_0's auc: 0.604403
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[209]	valid_0's auc: 0.605027
--- FINISHED \ whole score: 0.6066 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[574]	valid_0's auc: 0.667484
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[333]	valid_0's auc: 0.668288
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[298]	valid_0's auc: 0.661259
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[914]	valid_0's auc: 0.671784
--- FINISHED \ whole score: 0.6670 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[293]	valid_0's auc: 0.668738
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[127]	valid_0's auc: 0.683471
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[381]	valid_0's auc: 0.666608
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[551]	valid_0's auc: 0.678288
--- FINISHED \ whole score: 0.6732 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1220]	valid_0's auc: 0.720917
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[567]	valid_0's auc: 0.704376
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[380]	valid_0's auc: 0.722437
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[889]	valid_0's auc: 0.730565
--- FINISHED \ whole score: 0.7188 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[625]	valid_0's auc: 0.666705
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[292]	valid_0's auc: 0.66105
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[201]	valid_0's auc: 0.648844
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[571]	valid_0's auc: 0.674176
--- FINISHED \ whole score: 0.6627 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[254]	valid_0's auc: 0.718853
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[221]	valid_0's auc: 0.726626
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[213]	valid_0's auc: 0.727974
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[485]	valid_0's auc: 0.73253
--- FINISHED \ whole score: 0.7259 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[368]	valid_0's auc: 0.603825
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[543]	valid_0's auc: 0.604359
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[368]	valid_0's auc: 0.601758
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[389]	valid_0's auc: 0.606049
--- FINISHED \ whole score: 0.6039 ---
25581
===== 5 =====
train_features (390621, 89)
test_features (14072, 89)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[377]	valid_0's auc: 0.661781
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[451]	valid_0's auc: 0.661574
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[388]	valid_0's auc: 0.663694
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[893]	valid_0's auc: 0.666267
--- FINISHED \ whole score: 0.6632 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[136]	valid_0's auc: 0.631814
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[122]	valid_0's auc: 0.638094
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[384]	valid_0's auc: 0.608624
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[97]	valid_0's auc: 0.618623
--- FINISHED \ whole score: 0.6222 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[141]	valid_0's auc: 0.634251
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[69]	valid_0's auc: 0.631071
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[365]	valid_0's auc: 0.61767
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[128]	valid_0's auc: 0.625562
--- FINISHED \ whole score: 0.6260 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[254]	valid_0's auc: 0.626263
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[646]	valid_0's auc: 0.63046
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[239]	valid_0's auc: 0.628576
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[644]	valid_0's auc: 0.63058
--- FINISHED \ whole score: 0.6289 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[311]	valid_0's auc: 0.613461
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[304]	valid_0's auc: 0.61665
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[390]	valid_0's auc: 0.612666
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[296]	valid_0's auc: 0.610007
--- FINISHED \ whole score: 0.6132 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1004]	valid_0's auc: 0.677846
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[942]	valid_0's auc: 0.681373
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[848]	valid_0's auc: 0.681259
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[861]	valid_0's auc: 0.681209
--- FINISHED \ whole score: 0.6804 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[209]	valid_0's auc: 0.67147
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[295]	valid_0's auc: 0.661545
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[507]	valid_0's auc: 0.666615
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[186]	valid_0's auc: 0.668282
--- FINISHED \ whole score: 0.6661 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[905]	valid_0's auc: 0.666251
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1273]	valid_0's auc: 0.67033
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1060]	valid_0's auc: 0.669985
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1502]	valid_0's auc: 0.669252
--- FINISHED \ whole score: 0.6688 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[298]	valid_0's auc: 0.610135
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[133]	valid_0's auc: 0.60539
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[378]	valid_0's auc: 0.607859
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[246]	valid_0's auc: 0.602778
--- FINISHED \ whole score: 0.6064 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[194]	valid_0's auc: 0.656942
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[366]	valid_0's auc: 0.653393
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[366]	valid_0's auc: 0.659222
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[487]	valid_0's auc: 0.656515
--- FINISHED \ whole score: 0.6563 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[143]	valid_0's auc: 0.670534
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[184]	valid_0's auc: 0.664456
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[132]	valid_0's auc: 0.662587
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[164]	valid_0's auc: 0.663562
--- FINISHED \ whole score: 0.6652 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[561]	valid_0's auc: 0.706829
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[259]	valid_0's auc: 0.700266
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[253]	valid_0's auc: 0.717145
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[419]	valid_0's auc: 0.705911
--- FINISHED \ whole score: 0.7071 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[425]	valid_0's auc: 0.657979
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[560]	valid_0's auc: 0.656553
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[898]	valid_0's auc: 0.656504
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[472]	valid_0's auc: 0.658674
--- FINISHED \ whole score: 0.6572 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[387]	valid_0's auc: 0.717837
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[457]	valid_0's auc: 0.725127
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[298]	valid_0's auc: 0.711806
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[218]	valid_0's auc: 0.705875
--- FINISHED \ whole score: 0.7152 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[366]	valid_0's auc: 0.59842
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[238]	valid_0's auc: 0.605338
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[572]	valid_0's auc: 0.602731
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[133]	valid_0's auc: 0.590407
--- FINISHED \ whole score: 0.5992 ---
39653
===== 10 =====
train_features (391074, 89)
test_features (16833, 89)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[573]	valid_0's auc: 0.673127
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[235]	valid_0's auc: 0.665606
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[432]	valid_0's auc: 0.661655
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[547]	valid_0's auc: 0.671042
--- FINISHED \ whole score: 0.6678 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[74]	valid_0's auc: 0.628656
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[74]	valid_0's auc: 0.641289
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[89]	valid_0's auc: 0.642019
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[120]	valid_0's auc: 0.63545
--- FINISHED \ whole score: 0.6365 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[102]	valid_0's auc: 0.64418
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[112]	valid_0's auc: 0.64154
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[111]	valid_0's auc: 0.661529
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	valid_0's auc: 0.651957
--- FINISHED \ whole score: 0.6497 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[308]	valid_0's auc: 0.641197
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[409]	valid_0's auc: 0.643963
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[456]	valid_0's auc: 0.649987
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[399]	valid_0's auc: 0.64324
--- FINISHED \ whole score: 0.6446 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[179]	valid_0's auc: 0.631783
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[386]	valid_0's auc: 0.637219
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[184]	valid_0's auc: 0.632958
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[233]	valid_0's auc: 0.633533
--- FINISHED \ whole score: 0.6338 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[426]	valid_0's auc: 0.682854
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[183]	valid_0's auc: 0.667802
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[392]	valid_0's auc: 0.678698
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[516]	valid_0's auc: 0.680577
--- FINISHED \ whole score: 0.6775 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[74]	valid_0's auc: 0.655914
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[177]	valid_0's auc: 0.659841
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[126]	valid_0's auc: 0.673036
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[129]	valid_0's auc: 0.660952
--- FINISHED \ whole score: 0.6622 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[620]	valid_0's auc: 0.682305
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[634]	valid_0's auc: 0.677226
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[314]	valid_0's auc: 0.678859
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[338]	valid_0's auc: 0.682432
--- FINISHED \ whole score: 0.6800 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[131]	valid_0's auc: 0.631844
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[233]	valid_0's auc: 0.629037
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[191]	valid_0's auc: 0.628836
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[423]	valid_0's auc: 0.627268
--- FINISHED \ whole score: 0.6290 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[206]	valid_0's auc: 0.662454
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[512]	valid_0's auc: 0.673357
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[749]	valid_0's auc: 0.663769
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[546]	valid_0's auc: 0.6671
--- FINISHED \ whole score: 0.6661 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[69]	valid_0's auc: 0.652
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[67]	valid_0's auc: 0.663882
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[162]	valid_0's auc: 0.659984
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[258]	valid_0's auc: 0.656323
--- FINISHED \ whole score: 0.6566 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[522]	valid_0's auc: 0.716939
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[534]	valid_0's auc: 0.714845
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[162]	valid_0's auc: 0.696102
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[134]	valid_0's auc: 0.718301
--- FINISHED \ whole score: 0.7105 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[278]	valid_0's auc: 0.664526
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[95]	valid_0's auc: 0.650936
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[331]	valid_0's auc: 0.660115
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[156]	valid_0's auc: 0.664401
--- FINISHED \ whole score: 0.6598 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[99]	valid_0's auc: 0.689304
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[125]	valid_0's auc: 0.653519
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[58]	valid_0's auc: 0.6666
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[110]	valid_0's auc: 0.672668
--- FINISHED \ whole score: 0.6697 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[158]	valid_0's auc: 0.60642
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[201]	valid_0's auc: 0.610676
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[236]	valid_0's auc: 0.605801
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[349]	valid_0's auc: 0.611563
--- FINISHED \ whole score: 0.6086 ---
56486
CPU times: user 10h 11min 31s, sys: 15min 56s, total: 10h 27min 27s
Wall time: 37min 47s


In [30]:
df_pred_all.head()

Unnamed: 0_level_0,38,110,113,114,134,171,172,173,376,435,467,537,539,629,768
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
663721,0.066165,0.012845,0.050452,0.362526,0.120522,0.09672,0.019461,0.061983,0.194036,0.042899,0.027678,0.061412,0.052344,0.017825,0.079523
663761,0.160473,0.021593,0.087512,0.434458,0.178808,0.127535,0.084279,0.145566,0.196981,0.074895,0.025429,0.201324,0.146833,0.004251,0.223977
663763,0.109619,0.018317,0.079689,0.36043,0.151949,0.286579,0.06736,0.102522,0.135448,0.133712,0.033788,0.03854,0.115384,0.00416,0.183605
663775,0.124562,0.019226,0.058562,0.262837,0.110575,0.079874,0.083307,0.167078,0.12302,0.099812,0.033824,0.141324,0.091846,0.00375,0.119449
663778,0.129514,0.027705,0.092895,0.242339,0.124953,0.090289,0.020049,0.083249,0.139984,0.060662,0.030979,0.109579,0.111933,0.010191,0.153139


In [31]:
df_score_all

Unnamed: 0,38,110,113,114,134,171,172,173,376,435,467,537,539,629,768
0,0.681338,0.642867,0.639008,0.634153,0.620534,0.70559,0.697278,0.695533,0.613601,0.697475,0.695277,0.74336,0.689649,0.763795,0.614263
3,0.66276,0.622692,0.629465,0.6314,0.613645,0.686841,0.671369,0.671232,0.606639,0.666966,0.673227,0.718846,0.662657,0.725948,0.60394
5,0.663178,0.622222,0.62601,0.628929,0.613172,0.680401,0.666058,0.668828,0.606445,0.656312,0.665171,0.707058,0.657165,0.715158,0.599159
10,0.667823,0.636495,0.649682,0.644614,0.633838,0.677519,0.662234,0.679982,0.629027,0.666118,0.656583,0.710462,0.659762,0.669723,0.608591


In [32]:
cv = df_score_all.mean(axis=1)
print(cv)
print('- cv =', cv.mean())

0     0.675581
3     0.656509
5     0.651684
10    0.656830
dtype: float64
- cv = 0.6601511436803562


In [33]:
assert len(df_pred_all) == len(test)

In [34]:
submission = pd.merge(test[['session_id']], df_pred_all.reset_index(), on='session_id', how='inner')
assert len(submission) == len(test)

In [35]:
submission.drop(columns='session_id').to_csv('../outputs/submission.csv', index=False)

### baseline_6
- feat = 89
- Wall time: 37min 47s
- cv = 0.66015
- LB = 0.6247

### baseline_5
- feat = 74
- Wall time: 39min 18s
- cv = 0.66859
- LB = 0.6230

### baseline_4_1
- 5minのみ
- cv=0.655407
- Wall time: 13min 51s

### baseline_4
- 5minのみ
- cv=0.655407
- Wall time: 11min 56s