In [1]:
import gc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
import datetime
from matplotlib_venn import venn2
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import LatentDirichletAllocation as LDA
pd.set_option('display.max_Columns', 100)

In [2]:
cartlog = pd.read_feather('../inputs/cartlog.f')
product_master = pd.read_feather('../inputs/product_master.f')
meta = pd.read_feather('../inputs/meta.f')
user_master = pd.read_feather('../inputs/user_master.f')
test = pd.read_csv('../inputs/test.csv')
display_action_id = pd.read_csv('../inputs/display_action_id.csv')

product_master['JAN'] = product_master['JAN'].astype(str)

In [3]:
test_sessions = test["session_id"].unique()
print(len(test_sessions))
test_input_log = cartlog[cartlog["session_id"].isin(test_sessions)]

56486


In [4]:
target_category = [
    38,  # アイスクリーム__ノベルティー
    110,  # スナック・キャンディー__ガム
    113,  # スナック・キャンディー__シリアル
    114,  # スナック・キャンディー__スナック
    134,  # チョコ・ビスクラ__チョコレート
    171,  # ビール系__RTD
    172,  # ビール系__ノンアルコール
    173,  # ビール系__ビール系
    376,  # 和菓子__米菓
    435,  # 大型PET__無糖茶（大型PET）
    467,  # 小型PET__コーヒー（小型PET）
    537,  # 水・炭酸水__大型PET（炭酸水）
    539,  # 水・炭酸水__小型PET（炭酸水）
    629,  # 缶飲料__コーヒー（缶）
    768,  # 麺類__カップ麺
]

In [5]:
# 2020-08-01以前で10分以上経過し購買が発生したセッションにtrainデータを絞る
tmp_sessions = meta[meta['date'] < '2020-08-01']['session_id'].unique()
tmp_log = cartlog[cartlog["session_id"].isin(tmp_sessions)]
print('2020-08-01以前: ', len(tmp_sessions))

# 購買が発生したセッション
payment_sessions = set(tmp_log[tmp_log['is_payment']==1]['session_id'].unique())
print('購買が発生: ', len(payment_sessions))
# 10分以上のセッション
over10min_sessions = set(tmp_log[tmp_log['spend_time']>=600]['session_id'].unique())
print('10分以上: ', len(over10min_sessions))
# 積集合
all_train_sessions = payment_sessions & over10min_sessions
print('積集合: ', len(all_train_sessions))

# 10分以上の全trainのログデータ
all_train_log = tmp_log[tmp_log["session_id"].isin(all_train_sessions)]
print('全trainのログ: ', len(all_train_log))

2020-08-01以前:  663708
購買が発生:  618462
10分以上:  404825
積集合:  391383
全trainのログ:  10826062


In [6]:
def agg_payment(cartlog) -> pd.DataFrame:
    """セッションごと・商品ごとの購買個数を集計する"""
    # JANコード (vale_1)ごとに商品の購入個数(n_items)を足し算
    agg = cartlog.loc[cartlog["kind_1"] == "商品"].groupby(["session_id", "value_1"])["n_items"].sum().reset_index()
    agg = agg.rename(columns={"value_1": "JAN"})
    return pd.merge(agg, product_master[["JAN", "category_id"]], on="JAN", how="inner").drop(columns=['JAN'])

In [7]:
class RetailDataset:
    def __init__(self, thres_sec, meta):
        self.thres_sec = thres_sec
        self.meta = meta.copy()
        self.meta['time_elapsed_sec'] = self.meta['time_elapsed'] * 60
        self.meta.loc[self.meta['time_elapsed_sec'].isnull(), 'time_elapsed_sec'] = thres_sec
        
        # all_train_logの中で、指定時間より前のログのみを抽出　-> public_train_log, train_sessions
        merge_train = pd.merge(all_train_log, self.meta[["session_id", "time_elapsed_sec"]], on=["session_id"], how="inner")
        self.public_train_log = merge_train[merge_train['spend_time'] <= merge_train['time_elapsed_sec']]
        self.train_sessions = self.public_train_log["session_id"].unique()
        
    def get_train_input_log(self) -> pd.DataFrame:
        return self.public_train_log

    def get_train_target(self) -> pd.DataFrame:
        """学習で使用するセッションの目的変数を取得する"""
        train_target = pd.DataFrame(
            index=self.train_sessions,
        )
        train_target.index.name = "session_id"

        # time_elapsed以降のデータから購買個数を集計する
        merge_train = pd.merge(all_train_log, self.meta[["session_id", "time_elapsed_sec"]], on=["session_id"], how="inner")
        after_elapsed_log = merge_train[merge_train['spend_time'] > merge_train['time_elapsed_sec']]
        
        train_item_num = agg_payment(after_elapsed_log)
        train_item_num = train_item_num[train_item_num['category_id'].isin(target_category)]
        train_target_pos = train_item_num.groupby(["session_id", "category_id"])["n_items"].sum().unstack().fillna(0).astype(int)
        train_target_pos[train_target_pos > 0] = 1
        train_target_pos[train_target_pos <= 0] = 0

        return train_target.join(train_target_pos).fillna(0).reset_index()

In [8]:
def get_train_log(elapsed_min):
    dataset = RetailDataset(elapsed_min*60, meta)
    train_input_log = dataset.get_train_input_log()
    y_train = dataset.get_train_target()
    print('train_session', y_train.shape)
    return train_input_log, y_train

In [9]:
ELAPSED_MIN = [0, 3, 5, 10]
# ELAPSED_MIN = [5]

In [10]:
def save_train_log():
    for elap_min in ELAPSED_MIN:
        train_log, train_y = get_train_log(elap_min)
        train_log = train_log.reset_index(drop=True)
        train_y = train_y.reset_index(drop=True)
        train_y.columns = [str(c) for c in train_y.columns]
        train_log.to_feather('../inputs/train2_log_{}.f'.format(elap_min))
        train_y.to_feather('../inputs/train2_y_{}.f'.format(elap_min))

In [11]:
def load_train_log():
    train_log = {}
    train_y = {}
    for elap_min in ELAPSED_MIN:
        log = pd.read_feather('../inputs/train2_log_{}.f'.format(elap_min))
        y = pd.read_feather('../inputs/train2_y_{}.f'.format(elap_min))
        train_log[elap_min] = log
        train_y[elap_min] = y
    return train_log, train_y

In [12]:
# save_train_log()

- 0, train_session (378594, 16)
- 3, train_session (389649, 16)
- 5, train_session (390621, 16)
- 10, train_session (391074, 16)

### ユーザ情報

In [13]:
user_features = pd.merge(meta[["session_id", "user_id"]], user_master, on="user_id", how="left")
user_features.loc[user_features['age'] >= 80, 'age'] = np.NaN
user_features.loc[user_features['age'] < 10, 'age'] = np.NaN
user_features.loc[user_features['gender'] > 1, 'gender'] = np.NaN

In [14]:
def get_user_item():
    # train用のデータから購買した商品
    payed_item = agg_payment(all_train_log)
    user_payed_item = pd.merge(payed_item, meta[['session_id', 'user_id']], on='session_id', how='left')
    group_user_item = user_payed_item.groupby(['user_id', 'category_id'])[['n_items']].sum().reset_index()
    pivot_user_item = group_user_item.pivot_table(index='user_id', columns='category_id', values='n_items')
    # 全ユーザーの購入数の合計が5000以上のカテゴリに絞り込み
    sum_user_item = pivot_user_item.sum()
    user_item_index = sum_user_item[sum_user_item > 5000].index
    pivot_user_item = pivot_user_item[user_item_index].fillna(0).reset_index()
    # trainに存在しない人用に平均値で穴埋め
    user_item_mean = pivot_user_item.mean()
    # 全ユーザーとマージ
    all_user_item = pd.merge(user_master[['user_id']], pivot_user_item, on='user_id', how='left')
    # targetのカテゴリは除く
    for col in all_user_item.columns:
        if (col == 'user_id') or (col in target_category):
            continue
        new_col = 'user_pay_{}'.format(col)
        all_user_item[new_col] = all_user_item[col].fillna(user_item_mean[col]).astype('float32')
        all_user_item.loc[all_user_item[new_col]<0, new_col] = 0

    return all_user_item.drop(columns=list(user_item_mean.index))

In [15]:
all_user_item = get_user_item()
print(all_user_item.shape)

(40350, 231)


In [16]:
def LDA_topic(df_input):
    df_cp = df_input.set_index('user_id')
    lda = LDA(n_components=10)
    lda_out = pd.DataFrame(lda.fit_transform(df_cp), index=df_cp.index).add_prefix('LDA_')
    return lda_out.reset_index()

In [17]:
# %%time
# user_lda = LDA_topic(all_user_item)
# user_lda.to_feather('../inputs/user_lda.f')

In [18]:
user_lda = pd.read_feather('../inputs/user_lda.f')
all_user_item = pd.merge(all_user_item, user_lda, on='user_id', how='left')

In [19]:
print(all_user_item.shape)

(40350, 241)


### メタ情報

In [20]:
meta_features = meta.copy()
meta_features['year'] = meta_features['date'].dt.year
meta_features['month'] = meta_features['date'].dt.month
meta_features['day'] = meta_features['date'].dt.day
meta_features['dow'] = meta_features['date'].dt.dayofweek
meta_features['doy'] = meta_features['date'].dt.dayofyear
le = preprocessing.LabelEncoder()
meta_features['userid'] = le.fit_transform(meta_features['user_id'])
meta_features.drop(columns=['user_id', 'date', 'time_elapsed', 'date_str'], inplace=True)

### ディスプレイアクション

In [21]:
disp_name_dic = {}
for i, disp in enumerate(display_action_id['display_name'].unique()):
    disp_name_dic[disp] = 'disp_cnt_{}'.format(i)
    
act_name_dic = {}
for i, action in enumerate(display_action_id['action_name'].unique()):
    act_name_dic[action] = 'act_cnt_{}'.format(i)

In [22]:
def get_display_name_feature(input_log):
    merge = pd.merge(input_log, display_action_id, on='display_action_id', how='left')
    disp_group_count = merge.groupby(['session_id', 'display_name']).size().reset_index().rename(columns={0:'disp_name_count'})
    disp_name_pivot = disp_group_count.pivot_table(index='session_id', columns='display_name', values='disp_name_count', aggfunc='sum')
    disp_name_pivot = disp_name_pivot.reset_index().fillna(0).rename(columns=disp_name_dic)
    
    disp_out = disp_name_pivot[['session_id']].copy()
    for val in disp_name_dic.values():
        disp_out[val] = 0
    
    for col in disp_name_pivot.columns:
        if col == 'session_id':
            continue
        disp_out[col] = disp_name_pivot[col]
    
    return disp_out

In [23]:
def get_action_name_feature(input_log):
    merge = pd.merge(input_log, display_action_id, on='display_action_id', how='left')
    act_group_count = merge.groupby(['session_id', 'action_name']).size().reset_index().rename(columns={0:'act_name_count'})
    act_name_pivot = act_group_count.pivot_table(index='session_id', columns='action_name', values='act_name_count', aggfunc='sum')
    act_name_pivot = act_name_pivot.reset_index().fillna(0).rename(columns=act_name_dic)
    
    act_out = act_name_pivot[['session_id']].copy()
    for val in act_name_dic.values():
        act_out[val] = 0
    
    for col in act_name_pivot.columns:
        if col == 'session_id':
            continue
        act_out[col] = act_name_pivot[col]
    
    return act_out

### セッション単位の特徴量

In [24]:
def get_pre_payment_item(input_log):
    session_unique = input_log['session_id'].unique()
    agg = input_log.loc[input_log["kind_1"] == "商品"].groupby(["session_id", "value_1"])["n_items"].sum().reset_index()
    agg = agg.rename(columns={"value_1": "JAN"})
    agg = pd.merge(agg, product_master[["JAN", "category_id"]], on="JAN", how="inner")
    agg = agg[agg['category_id'].isin(target_category)]
    agg = agg.groupby(["session_id", "category_id"])["n_items"].sum().reset_index()
    
    sesi = np.zeros(len(target_category))
    cate = [ct for ct in target_category]
    
    dummy = pd.DataFrame({'session_id':sesi, 'category_id':cate, 'n_items':sesi})
    agg = pd.concat([agg, dummy])
    
    agg = agg.pivot_table(index='session_id', columns='category_id', values='n_items').fillna(0)
    src_columns = ['x_{}'.format(c) for c in agg.columns]
    agg.columns = src_columns
    
    col = ['pre_target_{}'.format(c) for c in target_category]
    df_out = pd.DataFrame(index=session_unique, columns=col)
    df_out.index.name = "session_id"
    df_out = df_out.join(agg)
    for ct in target_category:
        src = 'x_{}'.format(ct)
        dst = 'pre_target_{}'.format(ct)
        df_out[dst] = df_out[src]
    
    return df_out.drop(columns=src_columns).fillna(0).reset_index()

In [25]:
def get_session_kind_group(input_log):
    kind_name ={
        'クーポン': 'coupon',
        '会計': 'kaikei',
        'キー': 'key',
        'カテゴリ': 'categry',
        'バーコードスキャン': 'barcode',
        'UUID': 'uuid',
        '使用ポイント': 'usedpoint',
        '確認': 'confirm',
        'ブランドスイッチ': 'bland',
        'レシピ': 'recipe',
        'スマホスキャン': 'smartphone',
        '磁気スキャン': 'magnetic',
        'レコメンド': 'recommend',
        '倍率ポイント': 'point',
    }
    group_count = input_log[input_log['kind_1'] == '商品'].groupby(["session_id"]).size().rename('group_count_'+'item')
    for kind, name in kind_name.items():
        tmp = input_log[input_log['kind_1'] == kind].groupby(["session_id"]).size().rename('group_count_'+name)
        group_count = pd.concat([group_count, tmp], axis=1)
        
    return group_count.reset_index()

In [26]:
def get_session_info(input_log):
    # アクション数
    n_actions = input_log.groupby(["session_id"]).size().rename("n_actions")
    # 経過時間の平均
    mean_spend_time = input_log.groupby(["session_id"])["spend_time"].mean()
    
    session_features = pd.concat([
        n_actions,
        mean_spend_time,
    ], axis=1)
    return session_features.reset_index()

### セッション単位で集計

In [27]:
def get_session_features(input_log):
    df_ses = pd.DataFrame(columns=['session_id'])
    
    session_feat = [
        get_session_info(input_log),
        get_session_kind_group(input_log),
        get_display_name_feature(input_log),
        get_action_name_feature(input_log),
        get_pre_payment_item(input_log),
    ]
    
    for feat in session_feat:
        df_ses = pd.merge(df_ses, feat, on='session_id', how='outer')
        
    return df_ses

### 特徴量を集約する

In [28]:
def merge_features(input_log, session):
    feat_list = [
        get_session_features(input_log),
        user_features,
        meta_features,
    ]
    out = pd.DataFrame({"session_id": session})
    for feat in feat_list:
        out = pd.merge(out, feat, on="session_id", how="left")
        
    # userの情報
    out = pd.merge(out, all_user_item, on='user_id', how='left').drop(columns='user_id')

    assert len(session) == len(out)
    return out

In [29]:
def get_train_all_features(elapsed_min, train_log_list, train_y_list):
    train_input_log = train_log_list[elapsed_min]
    y_train = train_y_list[elapsed_min]
    
    train_features = merge_features(train_input_log, y_train['session_id'])
    print('train_features', train_features.shape)
    return train_features, y_train

In [30]:
def get_test_all_feature(elapsed_min):
    test_meta = meta[meta['session_id'].isin(test_sessions)]
    test_meta = test_meta[test_meta['time_elapsed'] == elapsed_min]
    test_input_elapsed = pd.merge(test_input_log, test_meta[['session_id']], on='session_id', how='left')
    
    test_features = merge_features(test_input_elapsed, test_meta['session_id'])
    print('test_features', test_features.shape)
    return test_features

In [31]:
n_fold = 4

In [32]:
lgbm_param = {
    'objective' : 'binary',
    'boosting_type': 'gbdt',
    'metric': 'auc',
    'seed' : 0,
    'learning_rate':  0.1,
#   'max_depth': 6,
    'feature_fraction': 0.6,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}

In [33]:
def train_lgbm(X, y, params=lgbm_param):

    fold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=0)
    cv = fold.split(X, y)
    
    models = []
    oof_pred = np.zeros_like(y, dtype=np.float)
    
    cat_feat = ['age', 'gender', 'dow']

    for i, (idx_train, idx_valid) in enumerate(cv): 
        x_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
        x_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
        
        lgbm_train = lgbm.Dataset(x_train, y_train, categorical_feature = cat_feat)
        lgbm_eval = lgbm.Dataset(x_valid, y_valid, reference=lgbm_train, categorical_feature = cat_feat)
        
        lgbm_model = lgbm.train(params, 
                                                    lgbm_train, 
                                                    valid_sets=lgbm_eval,
                                                    categorical_feature = cat_feat,
                                                    num_boost_round=10000,
                                                    early_stopping_rounds=100,
                                                    verbose_eval=-1)
        y_pred = lgbm_model.predict(x_valid, num_iteration=lgbm_model.best_iteration)
        
        oof_pred[idx_valid] = y_pred
        models.append(lgbm_model)

    score = roc_auc_score(y, oof_pred)
    print('--- FINISHED \ whole score: {:.4f} ---'.format(score))
    return oof_pred, models, score

In [34]:
def predict(models, feature):
    pred_list = []
    for i, model in enumerate(models):
        pred = model.predict(feature, num_iteration = model.best_iteration)
        pred_list.append(pred)
    
    score = np.mean(pred_list, axis=0)
    return score

In [35]:
train_log_list, train_y_list = load_train_log()

In [36]:
gc.collect()

74

In [37]:
%%time
df_pred_all = pd.DataFrame()
df_score_all = pd.DataFrame(index=ELAPSED_MIN)
models_list_list = []

for elapsed_min in ELAPSED_MIN:
    print(f'===== {elapsed_min} =====')
    train_features, y_train = get_train_all_features(elapsed_min, train_log_list, train_y_list)
    test_features = get_test_all_feature(elapsed_min)

    df_pred = pd.DataFrame(index=test_features['session_id'])
    train_features.drop(columns=['session_id'], inplace=True)
    test_features.drop(columns=['session_id'], inplace=True)
    
    models_list = []
    for target in y_train.columns:
        if target == 'session_id':
            continue
        
        print(f"---- id = {target} -----")
        oof, models, score = train_lgbm(train_features, y_train[target])
        models_list.append(models)

        pred = predict(models, test_features)
        df_pred[target] = pred
        df_score_all.loc[elapsed_min, target] = score
        
    models_list_list.append(models_list)
    df_pred_all = pd.concat([df_pred_all, df_pred])
    print(len(df_pred_all))

===== 0 =====
train_features (378594, 329)
test_features (14277, 329)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[869]	valid_0's auc: 0.824963
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[870]	valid_0's auc: 0.828872
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[782]	valid_0's auc: 0.830242
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1010]	valid_0's auc: 0.829603
--- FINISHED \ whole score: 0.8284 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[279]	valid_0's auc: 0.799629
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[360]	valid_0's auc: 0.798896
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[208]	valid_0's auc: 0.800357
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[274]	valid_0's auc: 0.802714
--- FINISHED \ whole score: 0.7998 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[513]	valid_0's auc: 0.806866
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[724]	valid_0's auc: 0.800949
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[407]	valid_0's auc: 0.80435
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[670]	valid_0's auc: 0.805409
--- FINISHED \ whole score: 0.8041 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1098]	valid_0's auc: 0.759229
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[862]	valid_0's auc: 0.760785
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1125]	valid_0's auc: 0.75905
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1304]	valid_0's auc: 0.76089
--- FINISHED \ whole score: 0.7600 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[549]	valid_0's auc: 0.745844
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[517]	valid_0's auc: 0.746746
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[623]	valid_0's auc: 0.748349
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[764]	valid_0's auc: 0.745315
--- FINISHED \ whole score: 0.7465 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1336]	valid_0's auc: 0.869831
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1549]	valid_0's auc: 0.870365
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1322]	valid_0's auc: 0.870185
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1575]	valid_0's auc: 0.874697
--- FINISHED \ whole score: 0.8713 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[614]	valid_0's auc: 0.874764
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[585]	valid_0's auc: 0.873178
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[532]	valid_0's auc: 0.878885
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[554]	valid_0's auc: 0.871779
--- FINISHED \ whole score: 0.8746 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1035]	valid_0's auc: 0.867356
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1049]	valid_0's auc: 0.865925
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1294]	valid_0's auc: 0.866839
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1442]	valid_0's auc: 0.865498
--- FINISHED \ whole score: 0.8663 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[618]	valid_0's auc: 0.754065
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[697]	valid_0's auc: 0.763436
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[608]	valid_0's auc: 0.755073
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[538]	valid_0's auc: 0.755471
--- FINISHED \ whole score: 0.7570 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[982]	valid_0's auc: 0.866158
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1005]	valid_0's auc: 0.870717
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[945]	valid_0's auc: 0.871102
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1070]	valid_0's auc: 0.871622
--- FINISHED \ whole score: 0.8699 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[462]	valid_0's auc: 0.871655
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[532]	valid_0's auc: 0.871621
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[867]	valid_0's auc: 0.871905
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[662]	valid_0's auc: 0.876145
--- FINISHED \ whole score: 0.8723 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[681]	valid_0's auc: 0.912126
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[581]	valid_0's auc: 0.905116
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[823]	valid_0's auc: 0.903634
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[733]	valid_0's auc: 0.90947
--- FINISHED \ whole score: 0.9074 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[679]	valid_0's auc: 0.857123
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[478]	valid_0's auc: 0.860684
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[728]	valid_0's auc: 0.861215
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[984]	valid_0's auc: 0.862415
--- FINISHED \ whole score: 0.8600 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[359]	valid_0's auc: 0.917618
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[360]	valid_0's auc: 0.921065
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[254]	valid_0's auc: 0.928412
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[429]	valid_0's auc: 0.920392
--- FINISHED \ whole score: 0.9212 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[865]	valid_0's auc: 0.74791
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[673]	valid_0's auc: 0.747596
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[823]	valid_0's auc: 0.747437
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[788]	valid_0's auc: 0.749704
--- FINISHED \ whole score: 0.7481 ---
14277
===== 3 =====
train_features (389649, 329)
test_features (11304, 329)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[708]	valid_0's auc: 0.826316
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[988]	valid_0's auc: 0.825698
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[601]	valid_0's auc: 0.82266
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[819]	valid_0's auc: 0.828085
--- FINISHED \ whole score: 0.8256 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[234]	valid_0's auc: 0.788704
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[347]	valid_0's auc: 0.791268
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[209]	valid_0's auc: 0.794076
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[231]	valid_0's auc: 0.790036
--- FINISHED \ whole score: 0.7904 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[596]	valid_0's auc: 0.801925
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[475]	valid_0's auc: 0.794358
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[635]	valid_0's auc: 0.796121
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[677]	valid_0's auc: 0.799125
--- FINISHED \ whole score: 0.7978 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1036]	valid_0's auc: 0.754792
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[798]	valid_0's auc: 0.756128
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[877]	valid_0's auc: 0.753401
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[985]	valid_0's auc: 0.752969
--- FINISHED \ whole score: 0.7543 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[595]	valid_0's auc: 0.740039
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[735]	valid_0's auc: 0.742175
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[593]	valid_0's auc: 0.74053
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[549]	valid_0's auc: 0.741165
--- FINISHED \ whole score: 0.7410 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1314]	valid_0's auc: 0.862513
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1167]	valid_0's auc: 0.860731
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1622]	valid_0's auc: 0.863974
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1213]	valid_0's auc: 0.865229
--- FINISHED \ whole score: 0.8630 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[369]	valid_0's auc: 0.865064
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[418]	valid_0's auc: 0.862504
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[533]	valid_0's auc: 0.866471
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[267]	valid_0's auc: 0.862148
--- FINISHED \ whole score: 0.8635 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[925]	valid_0's auc: 0.860131
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[795]	valid_0's auc: 0.856907
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1164]	valid_0's auc: 0.860409
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1268]	valid_0's auc: 0.860702
--- FINISHED \ whole score: 0.8595 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[437]	valid_0's auc: 0.748072
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[635]	valid_0's auc: 0.754772
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[656]	valid_0's auc: 0.752954
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[673]	valid_0's auc: 0.753003
--- FINISHED \ whole score: 0.7522 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[951]	valid_0's auc: 0.858726
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1248]	valid_0's auc: 0.855839
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[926]	valid_0's auc: 0.855739
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1230]	valid_0's auc: 0.857924
--- FINISHED \ whole score: 0.8569 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[566]	valid_0's auc: 0.861127
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[408]	valid_0's auc: 0.868759
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[786]	valid_0's auc: 0.864866
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[426]	valid_0's auc: 0.861214
--- FINISHED \ whole score: 0.8633 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[657]	valid_0's auc: 0.898947
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[607]	valid_0's auc: 0.896202
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1124]	valid_0's auc: 0.904691
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[884]	valid_0's auc: 0.901656
--- FINISHED \ whole score: 0.8998 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[789]	valid_0's auc: 0.851622
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[773]	valid_0's auc: 0.849704
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[865]	valid_0's auc: 0.851081
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[873]	valid_0's auc: 0.847259
--- FINISHED \ whole score: 0.8499 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[296]	valid_0's auc: 0.90409
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[262]	valid_0's auc: 0.911428
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[340]	valid_0's auc: 0.911745
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[332]	valid_0's auc: 0.915961
--- FINISHED \ whole score: 0.9107 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[539]	valid_0's auc: 0.736171
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[474]	valid_0's auc: 0.738647
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[613]	valid_0's auc: 0.735453
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[627]	valid_0's auc: 0.737042
--- FINISHED \ whole score: 0.7368 ---
25581
===== 5 =====
train_features (390621, 329)
test_features (14072, 329)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[726]	valid_0's auc: 0.827581
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[864]	valid_0's auc: 0.821218
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[926]	valid_0's auc: 0.825905
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[611]	valid_0's auc: 0.825649
--- FINISHED \ whole score: 0.8250 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[207]	valid_0's auc: 0.789496
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[134]	valid_0's auc: 0.783486
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[254]	valid_0's auc: 0.773838
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[138]	valid_0's auc: 0.77174
--- FINISHED \ whole score: 0.7791 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[605]	valid_0's auc: 0.793255
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[386]	valid_0's auc: 0.79534
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[617]	valid_0's auc: 0.789266
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[916]	valid_0's auc: 0.789452
--- FINISHED \ whole score: 0.7909 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[831]	valid_0's auc: 0.750995
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[933]	valid_0's auc: 0.751162
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[893]	valid_0's auc: 0.748238
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[785]	valid_0's auc: 0.749304
--- FINISHED \ whole score: 0.7499 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[775]	valid_0's auc: 0.735125
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[697]	valid_0's auc: 0.737013
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[619]	valid_0's auc: 0.737561
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[503]	valid_0's auc: 0.739411
--- FINISHED \ whole score: 0.7372 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1125]	valid_0's auc: 0.85583
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1334]	valid_0's auc: 0.858566
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[886]	valid_0's auc: 0.853965
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1203]	valid_0's auc: 0.858686
--- FINISHED \ whole score: 0.8567 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[630]	valid_0's auc: 0.8616
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[574]	valid_0's auc: 0.862442
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[789]	valid_0's auc: 0.859985
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[596]	valid_0's auc: 0.863974
--- FINISHED \ whole score: 0.8617 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[939]	valid_0's auc: 0.855268
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[801]	valid_0's auc: 0.85427
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1215]	valid_0's auc: 0.855297
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1112]	valid_0's auc: 0.853836
--- FINISHED \ whole score: 0.8546 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[519]	valid_0's auc: 0.748162
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[606]	valid_0's auc: 0.748698
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[521]	valid_0's auc: 0.750173
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[520]	valid_0's auc: 0.746705
--- FINISHED \ whole score: 0.7484 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[782]	valid_0's auc: 0.851346
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[896]	valid_0's auc: 0.847891
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[530]	valid_0's auc: 0.847839
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[963]	valid_0's auc: 0.854698
--- FINISHED \ whole score: 0.8502 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[441]	valid_0's auc: 0.858318
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[540]	valid_0's auc: 0.860392
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[518]	valid_0's auc: 0.855928
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[706]	valid_0's auc: 0.857297
--- FINISHED \ whole score: 0.8576 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[884]	valid_0's auc: 0.891387
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[765]	valid_0's auc: 0.895804
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[706]	valid_0's auc: 0.899032
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[396]	valid_0's auc: 0.892787
--- FINISHED \ whole score: 0.8938 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[703]	valid_0's auc: 0.848798
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[724]	valid_0's auc: 0.834849
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[614]	valid_0's auc: 0.846127
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[816]	valid_0's auc: 0.846651
--- FINISHED \ whole score: 0.8439 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[514]	valid_0's auc: 0.908951
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[135]	valid_0's auc: 0.901193
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[137]	valid_0's auc: 0.904445
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[202]	valid_0's auc: 0.894133
--- FINISHED \ whole score: 0.8986 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[701]	valid_0's auc: 0.736743
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[887]	valid_0's auc: 0.731018
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[510]	valid_0's auc: 0.733634
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[754]	valid_0's auc: 0.727508
--- FINISHED \ whole score: 0.7321 ---
39653
===== 10 =====
train_features (391074, 329)
test_features (16833, 329)
---- id = 38 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[708]	valid_0's auc: 0.825021
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[686]	valid_0's auc: 0.820719
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[512]	valid_0's auc: 0.822822
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[585]	valid_0's auc: 0.820761
--- FINISHED \ whole score: 0.8223 ---
---- id = 110 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[186]	valid_0's auc: 0.772275
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[272]	valid_0's auc: 0.7833
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[115]	valid_0's auc: 0.783172
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[294]	valid_0's auc: 0.772353
--- FINISHED \ whole score: 0.7750 ---
---- id = 113 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[472]	valid_0's auc: 0.780627
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[673]	valid_0's auc: 0.790214
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[410]	valid_0's auc: 0.792776
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[410]	valid_0's auc: 0.787372
--- FINISHED \ whole score: 0.7872 ---
---- id = 114 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[715]	valid_0's auc: 0.753241
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[898]	valid_0's auc: 0.756608
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[800]	valid_0's auc: 0.755543
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[910]	valid_0's auc: 0.755395
--- FINISHED \ whole score: 0.7552 ---
---- id = 134 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[453]	valid_0's auc: 0.739835
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[579]	valid_0's auc: 0.745539
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[489]	valid_0's auc: 0.744047
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[691]	valid_0's auc: 0.74527
--- FINISHED \ whole score: 0.7436 ---
---- id = 171 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[902]	valid_0's auc: 0.846597
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[960]	valid_0's auc: 0.844016
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[755]	valid_0's auc: 0.841612
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1027]	valid_0's auc: 0.845405
--- FINISHED \ whole score: 0.8444 ---
---- id = 172 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[307]	valid_0's auc: 0.8386
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[611]	valid_0's auc: 0.822854
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[635]	valid_0's auc: 0.849676
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[426]	valid_0's auc: 0.83632
--- FINISHED \ whole score: 0.8351 ---
---- id = 173 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1064]	valid_0's auc: 0.846746
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[875]	valid_0's auc: 0.847869
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[709]	valid_0's auc: 0.848987
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[805]	valid_0's auc: 0.848414
--- FINISHED \ whole score: 0.8478 ---
---- id = 376 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[453]	valid_0's auc: 0.757982
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[486]	valid_0's auc: 0.754688
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[535]	valid_0's auc: 0.762526
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[495]	valid_0's auc: 0.754621
--- FINISHED \ whole score: 0.7574 ---
---- id = 435 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[621]	valid_0's auc: 0.838749
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[457]	valid_0's auc: 0.838829
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[688]	valid_0's auc: 0.844334
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[595]	valid_0's auc: 0.835889
--- FINISHED \ whole score: 0.8393 ---
---- id = 467 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[169]	valid_0's auc: 0.831389
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[362]	valid_0's auc: 0.831854
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[332]	valid_0's auc: 0.852983
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[433]	valid_0's auc: 0.842309
--- FINISHED \ whole score: 0.8388 ---
---- id = 537 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1067]	valid_0's auc: 0.881698
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[610]	valid_0's auc: 0.878905
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[969]	valid_0's auc: 0.885079
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[504]	valid_0's auc: 0.878357
--- FINISHED \ whole score: 0.8789 ---
---- id = 539 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[639]	valid_0's auc: 0.834444
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[416]	valid_0's auc: 0.823852
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[914]	valid_0's auc: 0.835916
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[468]	valid_0's auc: 0.837789
--- FINISHED \ whole score: 0.8321 ---
---- id = 629 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[258]	valid_0's auc: 0.857836
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[249]	valid_0's auc: 0.873442
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[172]	valid_0's auc: 0.876243
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[298]	valid_0's auc: 0.880048
--- FINISHED \ whole score: 0.8708 ---
---- id = 768 -----




Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[448]	valid_0's auc: 0.729568
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[660]	valid_0's auc: 0.730339
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[365]	valid_0's auc: 0.724727
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[453]	valid_0's auc: 0.735765
--- FINISHED \ whole score: 0.7299 ---
56486
CPU times: user 1d 18h 1min 38s, sys: 44min 1s, total: 1d 18h 45min 40s
Wall time: 1h 59min 16s


In [38]:
df_pred_all.head()

Unnamed: 0_level_0,38,110,113,114,134,171,172,173,376,435,467,537,539,629,768
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
663721,0.08483,0.015274,0.016923,0.273338,0.059053,0.093039,0.001064,0.071332,0.143537,0.018601,0.015905,0.001158,0.050283,0.075182,0.022762
663761,0.225642,0.086428,0.360441,0.494684,0.276311,0.156338,0.399049,0.026057,0.303417,0.005537,0.00768,0.007498,0.280894,0.000838,0.507436
663763,0.271042,0.054439,0.119405,0.86511,0.448861,0.008613,0.003984,0.002733,0.215836,0.02814,0.057002,0.000732,0.017465,0.013638,0.051168
663775,0.041786,0.015483,0.020866,0.150245,0.257015,0.131807,0.014734,0.649858,0.045159,0.200413,0.022921,0.002874,0.072442,0.007965,0.224136
663778,0.245837,0.008837,0.152508,0.201549,0.125345,0.072845,0.009683,0.078135,0.044527,0.064755,0.003676,0.016154,0.040702,0.001603,0.067167


In [39]:
df_score_all

Unnamed: 0,38,110,113,114,134,171,172,173,376,435,467,537,539,629,768
0,0.828399,0.799798,0.804072,0.75996,0.746501,0.87127,0.874591,0.866333,0.757023,0.869878,0.87232,0.90739,0.860034,0.921245,0.748148
3,0.82564,0.790394,0.797844,0.754308,0.740964,0.863026,0.863471,0.859453,0.752209,0.856908,0.863328,0.89979,0.849869,0.910716,0.736769
5,0.824971,0.779091,0.790944,0.749923,0.737195,0.856731,0.861659,0.854557,0.748434,0.850206,0.857643,0.893833,0.843935,0.898571,0.732062
10,0.822274,0.775022,0.787172,0.755187,0.743642,0.84437,0.835091,0.847778,0.757447,0.839289,0.83882,0.878927,0.83212,0.870808,0.729946


In [40]:
cv = df_score_all.mean(axis=1)
print(cv)
print('- cv =', cv.mean())

0     0.832464
3     0.824312
5     0.818650
10    0.810526
dtype: float64
- cv = 0.8214882660187509


In [41]:
assert len(df_pred_all) == len(test)

In [42]:
submission = pd.merge(test[['session_id']], df_pred_all.reset_index(), on='session_id', how='inner')
assert len(submission) == len(test)

In [43]:
submission.drop(columns='session_id').to_csv('../outputs/submission.csv', index=False)

### baseline_7: userごとのカテゴリの過去の購買実績
- feat = 329
- Wall time: 1h 59min 16s
- cv = 0.82148
- LB = 0.7671

### baseline_6: sesssion内のターゲットの購買
- feat = 89
- Wall time: 37min 47s
- cv = 0.66015
- LB = 0.6247

### baseline_5
- feat = 74
- Wall time: 39min 18s
- cv = 0.66859
- LB = 0.6230