In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn import preprocessing
# import lightgbm as lgbm
import optuna.integration.lightgbm as lgbm

In [2]:
INPUT_DIR = '../input/'
OUTPUT_DIR = '../output/'

In [3]:
def read_feather(filename):
    df = pd.read_feather(os.path.join(INPUT_DIR, filename))
    print(filename, df.shape)
    return df

In [4]:
train_df = read_feather('train.f')
test_df = read_feather('test.f')

train.f (1997595, 35)
test.f (390095, 30)


In [5]:
campaign_df = read_feather('campaign.f')
map_gv_df = read_feather('map_game_feed_native_video_assets.f')
ad_video_df = read_feather('advertiser_video.f')
ad_cvideo_df = read_feather('advertiser_converted_video.f')

campaign.f (14627, 4)
map_game_feed_native_video_assets.f (2796, 3)
advertiser_video.f (11707, 6)
advertiser_converted_video.f (198622, 8)


In [6]:
ad_cvideo_df = ad_cvideo_df.drop_duplicates(
    subset=['mst_advertiser_video_id', 
                   'mst_game_feed_id', 
                    'mst_video_template_id'], keep='last')

le = preprocessing.LabelEncoder()
le.fit(['vertical', 'horizontal'])
ad_cvideo_df['rectangle_type_id'] = le.transform(ad_cvideo_df['rectangle_type'])
ad_cvideo_df.drop(columns=['rectangle_type'], inplace=True)
print('ad_cvideo_df', ad_cvideo_df.shape)

ad_cvideo_df (107493, 8)


#### 連続変数の特徴量

In [7]:
def create_continuous_features(input_df):
    use_columns = [
        # 連続変数
        'first_login_interval',
        'max_login_interval', 
        'frequency', 
        'login_frequency', 
        'last_login_interval',
        'from_click',
    ]
    return input_df[use_columns].copy()

#### Category系の特徴量

In [8]:
def create_category_features(input_df):
    use_columns = [
        # category 系の id. label-encoding として使う
        'adnw_id',
        'adspot_id',
        'adspot_video_format_id',
        'game_feed_asset_type_id',
        'auction_type_id',
        'category_id',
        'header_bidding',
        'is_interstitial',
        'os',
#  os_version',
        'pos',
        'user_type_id'
    ]
    return input_df[use_columns].copy()

#### country_code

In [9]:
def create_countrycode(input_df):
    le = preprocessing.LabelEncoder()
    le.fit(['None', 'JP', 'US', 'KR'])
    return pd.DataFrame(le.transform(input_df['country_code'].fillna('None')), columns=['country'])

#### date系

In [10]:
def create_date_features(input_df):
    date_df = pd.DataFrame(pd.to_datetime(input_df['imp_at'], utc=True))
    date_df['imp_at'] = date_df['imp_at'].dt.tz_convert('Asia/Tokyo')
    date_df['day'] = date_df['imp_at'].dt.day
    date_df['hour'] = date_df['imp_at'].dt.hour
    date_df['total_minute'] = date_df['imp_at'].dt.hour*60+date_df['imp_at'].dt.minute
    date_df['dayofweek'] = date_df['imp_at'].dt.dayofweek
    date_df.drop(columns=['imp_at'], inplace=True)
    return date_df

#### campaign

In [11]:
def create_campaign_features(input_df):
    campaign = pd.merge(input_df[['campaign_id']], campaign_df, left_on='campaign_id', right_on='id', how='left')
    campaign.drop(columns=['campaign_id', 'id', 'mst_advertiser_id', 'mst_advertiser_order_id'], inplace=True)
    return campaign

#### map_game_feed_native_video_assets

In [12]:
def create_gamefeed_features(input_df):
    input_merge = pd.merge(input_df[['game_feed_id', 'advertiser_id', 'video_template_id']], map_gv_df, 
                           left_on='game_feed_id', right_on='mst_game_feed_id', how='left').drop(columns=['mst_game_feed_id'])
    
    horizontal = ad_video_df.copy()
    left_keys = ['horizontal_mst_advertiser_video_id', 'advertiser_id']
    right_keys = ['id', 'mst_advertiser_id']
    horizontal.columns = [f'horizontal_{c}' if c not in right_keys else c for c in horizontal.columns]
    input_merge = pd.merge(input_merge, horizontal, left_on=left_keys, right_on=right_keys, how='left').drop(columns=right_keys) 
    
    vertical = ad_video_df.copy()
    left_keys = ['vertical_mst_advertiser_video_id', 'advertiser_id']
    right_keys = ['id', 'mst_advertiser_id']
    vertical.columns = [f'vertical_{c}' if c not in right_keys else c for c in vertical.columns]
    input_merge = pd.merge(input_merge, vertical, left_on=left_keys, right_on=right_keys, how='left').drop(columns=right_keys)
    
    left_keys = [
        "horizontal_mst_advertiser_video_id",
        "game_feed_id",
        "video_template_id",
    ]
    right_keys = [
        "mst_advertiser_video_id",
        "mst_game_feed_id",
        "mst_video_template_id",
    ]
    horizontal = ad_cvideo_df.copy()
    horizontal.columns = [f"horizontal_converted_{c}" if c not in right_keys else c for c in horizontal.columns]
    input_merge = pd.merge(input_merge, horizontal, left_on=left_keys, right_on=right_keys, how='left').drop(columns=right_keys) 
    
    left_keys = [
        "vertical_mst_advertiser_video_id",
        "game_feed_id",
        "video_template_id",
    ]
    right_keys = [
        "mst_advertiser_video_id",
        "mst_game_feed_id",
        "mst_video_template_id",
    ]
    vertical = ad_cvideo_df.copy()
    vertical.columns = [f"vertical_converted_{c}" if c not in right_keys else c for c in vertical.columns]
    input_merge = pd.merge(input_merge, vertical, left_on=left_keys, right_on=right_keys, how='left').drop(columns=right_keys)
    
    input_merge.drop(columns=['game_feed_id', 'advertiser_id', 'video_template_id', 
                              'horizontal_mst_advertiser_video_id', 'vertical_mst_advertiser_video_id'], inplace=True)
    return input_merge

In [13]:
processors = [
    create_continuous_features,
    create_category_features,
    create_countrycode,
    create_date_features,
    create_campaign_features,
    create_gamefeed_features
]

In [14]:
def to_feature(input_df):
    out_df = pd.DataFrame()
    for func in processors:
        _df = func(input_df)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
    return out_df

In [15]:
train_feat_df = to_feature(train_df)
test_feat_df = to_feature(test_df)
y = train_df['target']

In [16]:
assert len(train_feat_df) == len(train_df)
assert len(test_feat_df) == len(test_df)

In [17]:
feature_count = len(train_feat_df.columns)

In [18]:
#del train_df, test_df

#### LightGBM による学習

In [19]:
from sklearn.metrics import average_precision_score

def pr_auc(y_pred, y_true):
    """lightGBM の round ごとに PR-AUC を計算する用"""
    score = average_precision_score(y_true.get_label(), y_pred)
    return "pr_auc", score, True

In [20]:
lgbm_param = {
    'objective' : 'binary',
    'boosting_type': 'gbdt',
    'seed' : 0,
    'learning_rate':  0.1
}

In [21]:
def train_lgbm(X, y, cv, params: dict, verbose=100):

    models = []
    # training data の target と同じだけのゼロ配列を用意
    # float にしないと悲しい事件が起こるのでそこだけ注意
    oof_pred = np.zeros_like(y, dtype=np.float)

    for i, (idx_train, idx_valid) in enumerate(cv): 
        # この部分が交差検証のところです。データセットを cv instance によって分割します
        # training data を trian/valid に分割
        x_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
        x_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
        
        lgbm_train = lgbm.Dataset(x_train, y_train)
        lgbm_eval = lgbm.Dataset(x_valid, y_valid, reference=lgbm_train)
        
        lgbm_model = lgbm.train(params, 
                                                    lgbm_train, 
                                                    valid_sets=lgbm_eval,
                                                    num_boost_round=1000,
                                                    early_stopping_rounds=verbose,
                                                    feval=pr_auc,
                                                    verbose_eval=verbose)
        y_pred = lgbm_model.predict(x_valid, num_iteration=lgbm_model.best_iteration)
        
        oof_pred[idx_valid] = y_pred
        models.append(lgbm_model)

        print(f'Fold {i} PR-AUC: {average_precision_score(y_valid, y_pred):.4f}')

    score = average_precision_score(y, oof_pred)
    print('FINISHED \ whole score: {:.4f}'.format(score))
    return oof_pred, models, score

In [22]:
from sklearn.model_selection import StratifiedKFold

fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
cv = list(fold.split(train_feat_df, y)) # もともとが generator なため明示的に list に変換する

In [23]:
def tuning_lgbm(X, y, cv, params, verbose=100):
    idx_train, idx_valid = cv[0]
    x_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
    x_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
    lgbm_train = lgbm.Dataset(x_train, y_train)
    lgbm_eval = lgbm.Dataset(x_valid, y_valid, reference=lgbm_train)
    
    best_params, tuning_history = dict(), list()
    best = lgbm.train(params,
                                  lgbm_train,
                                  valid_sets=lgbm_eval,
                                  num_boost_round=1000,
                                  early_stopping_rounds=verbose,
                                  feval=pr_auc,
                                  verbose_eval=0)
    print('Best Params:', best.params)
    print('Best Iteration:', best.best_iteration)
    print('Best Score:', best.best_score)

In [24]:
%%time
tuning_lgbm(train_feat_df, y, cv, params=lgbm_param)

[32m[I 2020-11-15 15:19:48,617][0m A new study created in memory with name: no-name-5ee51138-5f2f-4e99-b96e-0a86d0c365c3[0m
feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  14%|#4        | 1/7 [02:01<12:08, 121.34s/it][32m[I 2020-11-15 15:21:49,957][0m Trial 0 finished with value: 0.10884063224250155 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  14%|#4        | 1/7 [02:01<12:08, 121.34s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  29%|##8       | 2/7 [03:29<09:16, 111.33s/it][32m[I 2020-11-15 15:23:17,940][0m Trial 1 finished with value: 0.10896368191275302 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  29%|##8       | 2/7 [03:29<09:16, 111.33s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  43%|####2     | 3/7 [04:58<06:58, 104.65s/it][32m[I 2020-11-15 15:24:46,998][0m Trial 2 finished with value: 0.10906982907424413 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  43%|####2     | 3/7 [04:58<06:58, 104.65s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  57%|#####7    | 4/7 [06:46<05:16, 105.56s/it][32m[I 2020-11-15 15:26:34,671][0m Trial 3 finished with value: 0.10889813299098247 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  57%|#####7    | 4/7 [06:46<05:16, 105.56s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  71%|#######1  | 5/7 [08:01<03:13, 96.65s/it] [32m[I 2020-11-15 15:27:50,555][0m Trial 4 finished with value: 0.1090050342141429 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  71%|#######1  | 5/7 [08:01<03:13, 96.65s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841:  86%|########5 | 6/7 [09:06<01:27, 87.09s/it][32m[I 2020-11-15 15:28:55,341][0m Trial 5 finished with value: 0.10928357410445236 and parameters: {'feature_fraction': 0.6}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841:  86%|########5 | 6/7 [09:06<01:27, 87.09s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction, val_score: 0.108841: 100%|##########| 7/7 [10:08<00:00, 79.53s/it][32m[I 2020-11-15 15:29:57,218][0m Trial 6 finished with value: 0.10931850732774431 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.10884063224250155.[0m
feature_fraction, val_score: 0.108841: 100%|##########| 7/7 [10:08<00:00, 86.94s/it]
num_leaves, val_score: 0.108841:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:   5%|5         | 1/20 [00:42<13:20, 42.14s/it][32m[I 2020-11-15 15:30:39,366][0m Trial 7 finished with value: 0.10836800878177427 and parameters: {'num_leaves': 236}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:   5%|5         | 1/20 [00:42<13:20, 42.14s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  10%|#         | 2/20 [01:47<14:42, 49.01s/it][32m[I 2020-11-15 15:31:44,408][0m Trial 8 finished with value: 0.10889962046907202 and parameters: {'num_leaves': 61}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  10%|#         | 2/20 [01:47<14:42, 49.01s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  15%|#5        | 3/20 [02:29<13:17, 46.91s/it][32m[I 2020-11-15 15:32:26,406][0m Trial 9 finished with value: 0.1084047821483553 and parameters: {'num_leaves': 179}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  15%|#5        | 3/20 [02:29<13:17, 46.91s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  20%|##        | 4/20 [03:07<11:48, 44.25s/it][32m[I 2020-11-15 15:33:04,455][0m Trial 10 finished with value: 0.1084235108872279 and parameters: {'num_leaves': 243}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  20%|##        | 4/20 [03:07<11:48, 44.25s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  25%|##5       | 5/20 [03:45<10:35, 42.34s/it][32m[I 2020-11-15 15:33:42,325][0m Trial 11 finished with value: 0.10837881519882647 and parameters: {'num_leaves': 215}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  25%|##5       | 5/20 [03:45<10:35, 42.34s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  30%|###       | 6/20 [04:26<09:49, 42.08s/it][32m[I 2020-11-15 15:34:23,793][0m Trial 12 finished with value: 0.10840055266534117 and parameters: {'num_leaves': 256}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  30%|###       | 6/20 [04:26<09:49, 42.08s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108368:  35%|###5      | 7/20 [05:04<08:49, 40.69s/it][32m[I 2020-11-15 15:35:01,263][0m Trial 13 finished with value: 0.1084596480380913 and parameters: {'num_leaves': 184}. Best is trial 7 with value: 0.10836800878177427.[0m
num_leaves, val_score: 0.108368:  35%|###5      | 7/20 [05:04<08:49, 40.69s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  40%|####      | 8/20 [05:44<08:06, 40.52s/it][32m[I 2020-11-15 15:35:41,374][0m Trial 14 finished with value: 0.10836487983490546 and parameters: {'num_leaves': 213}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  40%|####      | 8/20 [05:44<08:06, 40.52s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  45%|####5     | 9/20 [06:30<07:43, 42.12s/it][32m[I 2020-11-15 15:36:27,226][0m Trial 15 finished with value: 0.10851297835265393 and parameters: {'num_leaves': 120}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  45%|####5     | 9/20 [06:30<07:43, 42.12s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  50%|#####     | 10/20 [07:17<07:17, 43.79s/it][32m[I 2020-11-15 15:37:14,923][0m Trial 16 finished with value: 0.10846690119341607 and parameters: {'num_leaves': 147}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  50%|#####     | 10/20 [07:17<07:17, 43.79s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  55%|#####5    | 11/20 [07:56<06:20, 42.27s/it][32m[I 2020-11-15 15:37:53,626][0m Trial 17 finished with value: 0.10840932394698112 and parameters: {'num_leaves': 255}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  55%|#####5    | 11/20 [07:56<06:20, 42.27s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  60%|######    | 12/20 [08:38<05:36, 42.12s/it][32m[I 2020-11-15 15:38:35,411][0m Trial 18 finished with value: 0.10837487509074133 and parameters: {'num_leaves': 211}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  60%|######    | 12/20 [08:38<05:36, 42.12s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  65%|######5   | 13/20 [09:27<05:10, 44.38s/it][32m[I 2020-11-15 15:39:25,077][0m Trial 19 finished with value: 0.10876795113077965 and parameters: {'num_leaves': 92}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  65%|######5   | 13/20 [09:27<05:10, 44.38s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108365:  70%|#######   | 14/20 [10:06<04:15, 42.60s/it][32m[I 2020-11-15 15:40:03,502][0m Trial 20 finished with value: 0.10837881519882649 and parameters: {'num_leaves': 215}. Best is trial 14 with value: 0.10836487983490546.[0m
num_leaves, val_score: 0.108365:  70%|#######   | 14/20 [10:06<04:15, 42.60s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332:  75%|#######5  | 15/20 [10:44<03:25, 41.16s/it][32m[I 2020-11-15 15:40:41,312][0m Trial 21 finished with value: 0.10833206010522908 and parameters: {'num_leaves': 212}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332:  75%|#######5  | 15/20 [10:44<03:25, 41.16s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332:  80%|########  | 16/20 [11:24<02:43, 40.81s/it][32m[I 2020-11-15 15:41:21,312][0m Trial 22 finished with value: 0.10843115959041695 and parameters: {'num_leaves': 177}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332:  80%|########  | 16/20 [11:24<02:43, 40.81s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332:  85%|########5 | 17/20 [12:02<02:00, 40.04s/it][32m[I 2020-11-15 15:41:59,549][0m Trial 23 finished with value: 0.10839777466525109 and parameters: {'num_leaves': 231}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332:  85%|########5 | 17/20 [12:02<02:00, 40.04s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332:  90%|######### | 18/20 [12:39<01:18, 39.31s/it][32m[I 2020-11-15 15:42:37,142][0m Trial 24 finished with value: 0.10839816245600614 and parameters: {'num_leaves': 196}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332:  90%|######### | 18/20 [12:39<01:18, 39.31s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332:  95%|#########5| 19/20 [13:27<00:41, 41.68s/it][32m[I 2020-11-15 15:43:24,363][0m Trial 25 finished with value: 0.10846690119341607 and parameters: {'num_leaves': 147}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332:  95%|#########5| 19/20 [13:27<00:41, 41.68s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


num_leaves, val_score: 0.108332: 100%|##########| 20/20 [14:05<00:00, 40.81s/it][32m[I 2020-11-15 15:44:03,130][0m Trial 26 finished with value: 0.10836320625259825 and parameters: {'num_leaves': 253}. Best is trial 21 with value: 0.10833206010522908.[0m
num_leaves, val_score: 0.108332: 100%|##########| 20/20 [14:05<00:00, 42.30s/it]
bagging, val_score: 0.108332:   0%|          | 0/10 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108320:  10%|#         | 1/10 [00:45<06:47, 45.22s/it][32m[I 2020-11-15 15:44:48,358][0m Trial 27 finished with value: 0.10831961174942051 and parameters: {'bagging_fraction': 0.9603647089936341, 'bagging_freq': 2}. Best is trial 27 with value: 0.10831961174942051.[0m
bagging, val_score: 0.108320:  10%|#         | 1/10 [00:45<06:47, 45.22s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108320:  20%|##        | 2/10 [01:27<05:55, 44.48s/it][32m[I 2020-11-15 15:45:31,093][0m Trial 28 finished with value: 0.10833470438089653 and parameters: {'bagging_fraction': 0.9957020474040958, 'bagging_freq': 2}. Best is trial 27 with value: 0.10831961174942051.[0m
bagging, val_score: 0.108320:  20%|##        | 2/10 [01:27<05:55, 44.48s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108320:  30%|###       | 3/10 [02:13<05:14, 44.92s/it][32m[I 2020-11-15 15:46:17,040][0m Trial 29 finished with value: 0.10834436556867914 and parameters: {'bagging_fraction': 0.9923505068723085, 'bagging_freq': 2}. Best is trial 27 with value: 0.10831961174942051.[0m
bagging, val_score: 0.108320:  30%|###       | 3/10 [02:13<05:14, 44.92s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108320:  40%|####      | 4/10 [02:56<04:25, 44.23s/it][32m[I 2020-11-15 15:46:59,669][0m Trial 30 finished with value: 0.10839232708250224 and parameters: {'bagging_fraction': 0.9973009445509088, 'bagging_freq': 2}. Best is trial 27 with value: 0.10831961174942051.[0m
bagging, val_score: 0.108320:  40%|####      | 4/10 [02:56<04:25, 44.23s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315:  50%|#####     | 5/10 [03:38<03:37, 43.56s/it][32m[I 2020-11-15 15:47:41,677][0m Trial 31 finished with value: 0.10831489858348657 and parameters: {'bagging_fraction': 0.9907178796872467, 'bagging_freq': 2}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315:  50%|#####     | 5/10 [03:38<03:37, 43.56s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315:  60%|######    | 6/10 [04:23<02:56, 44.08s/it][32m[I 2020-11-15 15:48:26,951][0m Trial 32 finished with value: 0.10832404116901027 and parameters: {'bagging_fraction': 0.9952667407888507, 'bagging_freq': 2}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315:  60%|######    | 6/10 [04:23<02:56, 44.08s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315:  70%|#######   | 7/10 [05:04<02:09, 43.12s/it][32m[I 2020-11-15 15:49:07,849][0m Trial 33 finished with value: 0.10861200430250233 and parameters: {'bagging_fraction': 0.7660883363989718, 'bagging_freq': 5}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315:  70%|#######   | 7/10 [05:04<02:09, 43.12s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315:  80%|########  | 8/10 [05:43<01:23, 41.85s/it][32m[I 2020-11-15 15:49:46,737][0m Trial 34 finished with value: 0.10853354161149514 and parameters: {'bagging_fraction': 0.8551177105177565, 'bagging_freq': 1}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315:  80%|########  | 8/10 [05:43<01:23, 41.85s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315:  90%|######### | 9/10 [06:19<00:39, 39.94s/it][32m[I 2020-11-15 15:50:22,202][0m Trial 35 finished with value: 0.1096773634667118 and parameters: {'bagging_fraction': 0.43587200626867945, 'bagging_freq': 3}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315:  90%|######### | 9/10 [06:19<00:39, 39.94s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


bagging, val_score: 0.108315: 100%|##########| 10/10 [06:58<00:00, 39.92s/it][32m[I 2020-11-15 15:51:02,091][0m Trial 36 finished with value: 0.10846864279362962 and parameters: {'bagging_fraction': 0.8757596763983945, 'bagging_freq': 7}. Best is trial 31 with value: 0.10831489858348657.[0m
bagging, val_score: 0.108315: 100%|##########| 10/10 [06:58<00:00, 41.90s/it]
feature_fraction_stage2, val_score: 0.108315:   0%|          | 0/3 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction_stage2, val_score: 0.108315:  33%|###3      | 1/3 [00:38<01:17, 38.86s/it][32m[I 2020-11-15 15:51:40,956][0m Trial 37 finished with value: 0.1087282416917308 and parameters: {'feature_fraction': 0.48000000000000004}. Best is trial 37 with value: 0.1087282416917308.[0m
feature_fraction_stage2, val_score: 0.108315:  33%|###3      | 1/3 [00:38<01:17, 38.86s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction_stage2, val_score: 0.108315:  67%|######6   | 2/3 [01:35<00:44, 44.23s/it][32m[I 2020-11-15 15:52:37,709][0m Trial 38 finished with value: 0.10849075580990233 and parameters: {'feature_fraction': 0.44800000000000006}. Best is trial 38 with value: 0.10849075580990233.[0m
feature_fraction_stage2, val_score: 0.108315:  67%|######6   | 2/3 [01:35<00:44, 44.23s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


feature_fraction_stage2, val_score: 0.108315: 100%|##########| 3/3 [02:25<00:00, 46.02s/it][32m[I 2020-11-15 15:53:27,900][0m Trial 39 finished with value: 0.10837581696749339 and parameters: {'feature_fraction': 0.41600000000000004}. Best is trial 39 with value: 0.10837581696749339.[0m
feature_fraction_stage2, val_score: 0.108315: 100%|##########| 3/3 [02:25<00:00, 48.60s/it]
regularization_factors, val_score: 0.108315:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.108207:   5%|5         | 1/20 [00:42<13:35, 42.94s/it][32m[I 2020-11-15 15:54:10,847][0m Trial 40 finished with value: 0.1082069124098612 and parameters: {'lambda_l1': 4.0152301341547e-05, 'lambda_l2': 0.25017130099418683}. Best is trial 40 with value: 0.1082069124098612.[0m
regularization_factors, val_score: 0.108207:   5%|5         | 1/20 [00:42<13:35, 42.94s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107808:  10%|#         | 2/20 [01:39<14:07, 47.09s/it][32m[I 2020-11-15 15:55:07,602][0m Trial 41 finished with value: 0.10780814269073244 and parameters: {'lambda_l1': 2.459187443737713e-05, 'lambda_l2': 1.3149695665065615}. Best is trial 41 with value: 0.10780814269073244.[0m
regularization_factors, val_score: 0.107808:  10%|#         | 2/20 [01:39<14:07, 47.09s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107808:  15%|#5        | 3/20 [02:29<13:36, 48.04s/it][32m[I 2020-11-15 15:55:57,866][0m Trial 42 finished with value: 0.10791031140831576 and parameters: {'lambda_l1': 2.630569498709477e-05, 'lambda_l2': 1.005765340214611}. Best is trial 41 with value: 0.10780814269073244.[0m
regularization_factors, val_score: 0.107808:  15%|#5        | 3/20 [02:29<13:36, 48.04s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107771:  20%|##        | 4/20 [03:41<14:40, 55.05s/it][32m[I 2020-11-15 15:57:09,259][0m Trial 43 finished with value: 0.10777061291165119 and parameters: {'lambda_l1': 3.872526084233591e-05, 'lambda_l2': 1.7882410541825418}. Best is trial 43 with value: 0.10777061291165119.[0m
regularization_factors, val_score: 0.107771:  20%|##        | 4/20 [03:41<14:40, 55.05s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107771:  25%|##5       | 5/20 [04:42<14:11, 56.78s/it][32m[I 2020-11-15 15:58:10,082][0m Trial 44 finished with value: 0.10779738735201992 and parameters: {'lambda_l1': 3.962166326872457e-05, 'lambda_l2': 1.708415453776413}. Best is trial 43 with value: 0.10777061291165119.[0m
regularization_factors, val_score: 0.107771:  25%|##5       | 5/20 [04:42<14:11, 56.78s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107771:  30%|###       | 6/20 [05:42<13:29, 57.85s/it][32m[I 2020-11-15 15:59:10,423][0m Trial 45 finished with value: 0.10780306168358028 and parameters: {'lambda_l1': 2.969285017267949e-05, 'lambda_l2': 1.5910847702055195}. Best is trial 43 with value: 0.10777061291165119.[0m
regularization_factors, val_score: 0.107771:  30%|###       | 6/20 [05:42<13:29, 57.85s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  35%|###5      | 7/20 [07:05<14:09, 65.33s/it][32m[I 2020-11-15 16:00:33,218][0m Trial 46 finished with value: 0.1077258764303843 and parameters: {'lambda_l1': 2.7522712678013458e-05, 'lambda_l2': 1.759568963823917}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  35%|###5      | 7/20 [07:05<14:09, 65.33s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  40%|####      | 8/20 [08:07<12:52, 64.40s/it][32m[I 2020-11-15 16:01:35,430][0m Trial 47 finished with value: 0.10792545081200847 and parameters: {'lambda_l1': 3.415856902884164e-05, 'lambda_l2': 2.527083257723957}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  40%|####      | 8/20 [08:07<12:52, 64.40s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  45%|####5     | 9/20 [09:07<11:34, 63.17s/it][32m[I 2020-11-15 16:02:35,736][0m Trial 48 finished with value: 0.10788197204910518 and parameters: {'lambda_l1': 3.552605864136303e-05, 'lambda_l2': 2.287320541342843}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  45%|####5     | 9/20 [09:07<11:34, 63.17s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  50%|#####     | 10/20 [10:04<10:12, 61.24s/it][32m[I 2020-11-15 16:03:32,474][0m Trial 49 finished with value: 0.10789476214394983 and parameters: {'lambda_l1': 2.6787532964052956e-05, 'lambda_l2': 1.814523827214241}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  50%|#####     | 10/20 [10:04<10:12, 61.24s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  55%|#####5    | 11/20 [10:46<08:19, 55.55s/it][32m[I 2020-11-15 16:04:14,742][0m Trial 50 finished with value: 0.10831536238791686 and parameters: {'lambda_l1': 6.0938098710117e-08, 'lambda_l2': 4.222455725210905e-07}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  55%|#####5    | 11/20 [10:46<08:19, 55.55s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  60%|######    | 12/20 [11:49<07:41, 57.66s/it][32m[I 2020-11-15 16:05:17,329][0m Trial 51 finished with value: 0.10776636446349928 and parameters: {'lambda_l1': 3.911879249047325e-05, 'lambda_l2': 2.2993210902286445}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  60%|######    | 12/20 [11:49<07:41, 57.66s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  65%|######5   | 13/20 [12:40<06:30, 55.79s/it][32m[I 2020-11-15 16:06:08,742][0m Trial 52 finished with value: 0.10784007681853965 and parameters: {'lambda_l1': 3.546137592387273e-05, 'lambda_l2': 4.262839731383408}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  65%|######5   | 13/20 [12:40<06:30, 55.79s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107726:  70%|#######   | 14/20 [13:24<05:13, 52.21s/it][32m[I 2020-11-15 16:06:52,618][0m Trial 53 finished with value: 0.10846417305666063 and parameters: {'lambda_l1': 0.000589446748418633, 'lambda_l2': 0.023799813021111074}. Best is trial 46 with value: 0.1077258764303843.[0m
regularization_factors, val_score: 0.107726:  70%|#######   | 14/20 [13:24<05:13, 52.21s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624:  75%|#######5  | 15/20 [14:47<05:06, 61.25s/it][32m[I 2020-11-15 16:08:14,964][0m Trial 54 finished with value: 0.10762392338783457 and parameters: {'lambda_l1': 6.419345380049121e-07, 'lambda_l2': 8.432801302426078}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624:  75%|#######5  | 15/20 [14:47<05:06, 61.25s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624:  80%|########  | 16/20 [15:30<03:43, 55.90s/it][32m[I 2020-11-15 16:08:58,384][0m Trial 55 finished with value: 0.10842263424546927 and parameters: {'lambda_l1': 2.2505398796172587e-07, 'lambda_l2': 0.03561142290431853}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624:  80%|########  | 16/20 [15:30<03:43, 55.90s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624:  85%|########5 | 17/20 [16:42<03:02, 60.78s/it][32m[I 2020-11-15 16:10:10,535][0m Trial 56 finished with value: 0.10767361464497754 and parameters: {'lambda_l1': 1.1632728982276379e-06, 'lambda_l2': 9.647119414648337}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624:  85%|########5 | 17/20 [16:42<03:02, 60.78s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624:  90%|######### | 18/20 [17:39<01:59, 59.52s/it][32m[I 2020-11-15 16:11:07,107][0m Trial 57 finished with value: 0.10776000340642172 and parameters: {'lambda_l1': 5.16448256056151e-07, 'lambda_l2': 7.9195104665729605}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624:  90%|######### | 18/20 [17:39<01:59, 59.52s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624:  95%|#########5| 19/20 [18:45<01:01, 61.47s/it][32m[I 2020-11-15 16:12:13,152][0m Trial 58 finished with value: 0.10767316029265367 and parameters: {'lambda_l1': 5.875791200273285e-07, 'lambda_l2': 9.465904293035546}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624:  95%|#########5| 19/20 [18:45<01:01, 61.47s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


regularization_factors, val_score: 0.107624: 100%|##########| 20/20 [19:46<00:00, 61.55s/it][32m[I 2020-11-15 16:13:14,885][0m Trial 59 finished with value: 0.10779734107162874 and parameters: {'lambda_l1': 4.2181274695494245e-07, 'lambda_l2': 8.05718000414992}. Best is trial 54 with value: 0.10762392338783457.[0m
regularization_factors, val_score: 0.107624: 100%|##########| 20/20 [19:46<00:00, 59.35s/it]
min_data_in_leaf, val_score: 0.107624:   0%|          | 0/5 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


min_data_in_leaf, val_score: 0.107624:  20%|##        | 1/5 [01:07<04:29, 67.46s/it][32m[I 2020-11-15 16:14:22,352][0m Trial 60 finished with value: 0.1076843212154368 and parameters: {'min_child_samples': 50}. Best is trial 60 with value: 0.1076843212154368.[0m
min_data_in_leaf, val_score: 0.107624:  20%|##        | 1/5 [01:07<04:29, 67.46s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


min_data_in_leaf, val_score: 0.107529:  40%|####      | 2/5 [02:23<03:30, 70.15s/it][32m[I 2020-11-15 16:15:38,781][0m Trial 61 finished with value: 0.10752893954228449 and parameters: {'min_child_samples': 100}. Best is trial 61 with value: 0.10752893954228449.[0m
min_data_in_leaf, val_score: 0.107529:  40%|####      | 2/5 [02:23<03:30, 70.15s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


min_data_in_leaf, val_score: 0.107529:  60%|######    | 3/5 [03:33<02:19, 69.97s/it][32m[I 2020-11-15 16:16:48,322][0m Trial 62 finished with value: 0.10764034451977081 and parameters: {'min_child_samples': 25}. Best is trial 61 with value: 0.10752893954228449.[0m
min_data_in_leaf, val_score: 0.107529:  60%|######    | 3/5 [03:33<02:19, 69.97s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


min_data_in_leaf, val_score: 0.107529:  80%|########  | 4/5 [04:55<01:13, 73.57s/it][32m[I 2020-11-15 16:18:10,294][0m Trial 63 finished with value: 0.1076631854372158 and parameters: {'min_child_samples': 5}. Best is trial 61 with value: 0.10752893954228449.[0m
min_data_in_leaf, val_score: 0.107529:  80%|########  | 4/5 [04:55<01:13, 73.57s/it]

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1863
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468


min_data_in_leaf, val_score: 0.107529: 100%|##########| 5/5 [06:18<00:00, 76.55s/it][32m[I 2020-11-15 16:19:33,813][0m Trial 64 finished with value: 0.10763984588945337 and parameters: {'min_child_samples': 10}. Best is trial 61 with value: 0.10752893954228449.[0m
min_data_in_leaf, val_score: 0.107529: 100%|##########| 5/5 [06:18<00:00, 75.79s/it]

Best Params: {'objective': 'binary', 'boosting_type': 'gbdt', 'seed': 0, 'learning_rate': 0.1, 'feature_pre_filter': False, 'lambda_l1': 6.419345380049121e-07, 'lambda_l2': 8.432801302426078, 'num_leaves': 212, 'feature_fraction': 0.4, 'bagging_fraction': 0.9907178796872467, 'bagging_freq': 2, 'min_child_samples': 100, 'num_iterations': 1000, 'early_stopping_round': 100}
Best Iteration: 245
Best Score: defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('binary_logloss', 0.10752893954228449), ('pr_auc', 0.22382995580267329)])})
CPU times: user 6h 57min 57s, sys: 6min 31s, total: 7h 4min 28s
Wall time: 59min 45s





In [47]:
%%time
oof, models, score = train_lgbm(train_feat_df, y, cv, params=lgbm_param)

[LightGBM] [Info] Number of positive: 52392, number of negative: 1545684
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1853
[LightGBM] [Info] Number of data points in the train set: 1598076, number of used features: 36
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032784 -> initscore=-3.384468
[LightGBM] [Info] Start training from score -3.384468
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.111217	valid_0's pr_auc: 0.201038
[200]	valid_0's binary_logloss: 0.109942	valid_0's pr_auc: 0.206783
[300]	valid_0's binary_logloss: 0.109278	valid_0's pr_auc: 0.210614
[400]	valid_0's binary_logloss: 0.108899	valid_0's pr_auc: 0.21246
[500]	valid_0's binary_logloss: 0.108661	valid_0's pr_auc: 0.213817
[600]	valid_0's binary_logloss: 0.108505	valid_0's pr_auc: 0.214109
Early stopping, best iteration is:
[595]	valid_0's binary_logloss: 0.1084

[100]	valid_0's binary_logloss: 0.111355	valid_0's pr_auc: 0.194843
[200]	valid_0's binary_logloss: 0.110184	valid_0's pr_auc: 0.200396
[300]	valid_0's binary_logloss: 0.109556	valid_0's pr_auc: 0.203279
[400]	valid_0's binary_logloss: 0.109172	valid_0's pr_auc: 0.204732
[500]	valid_0's binary_logloss: 0.108885	valid_0's pr_auc: 0.205386
[600]	valid_0's binary_logloss: 0.108763	valid_0's pr_auc: 0.206126
[700]	valid_0's binary_logloss: 0.108554	valid_0's pr_auc: 0.206588
[800]	valid_0's binary_logloss: 0.108462	valid_0's pr_auc: 0.207238
[900]	valid_0's binary_logloss: 0.108382	valid_0's pr_auc: 0.207689
[1000]	valid_0's binary_logloss: 0.10834	valid_0's pr_auc: 0.208322
Did not meet early stopping. Best iteration is:
[997]	valid_0's binary_logloss: 0.108338	valid_0's pr_auc: 0.208325
Fold 4 PR-AUC: 0.2083
FINISHED \ whole score: 0.2151
CPU times: user 58min 43s, sys: 23.1 s, total: 59min 6s
Wall time: 12min 33s


In [48]:
def feat_imp(model):
    fi = model.feature_importance()
    fn = model.feature_name()
    df_feature_importance = pd.DataFrame({'name':fn, 'imp':fi})
    df_feature_importance.sort_values('imp', inplace=True)
    return df_feature_importance

def feature_importance(models):
    fi = pd.DataFrame(columns=['name'])
    for i, model in enumerate(models):
        fi_tmp = feat_imp(model)
        colname = 'imp_{}'.format(i)
        fi_tmp.rename(columns={'imp': colname}, inplace=True)
        fi = pd.merge(fi, fi_tmp, on=['name'], how='outer')
    fi['sum'] = fi.sum(axis=1)
    return fi.sort_values(['sum'], ascending=False)

In [49]:
feature_importance(models)

Unnamed: 0,name,imp_0,imp_1,imp_2,imp_3,imp_4,sum
40,max_login_interval,2070,3516,3641,3492,3616,16335
39,last_login_interval,1976,3137,3358,3445,3460,15376
37,first_login_interval,1745,3117,3174,3250,3109,14395
38,horizontal_file_size,1922,2921,3096,2990,3136,14065
36,total_minute,1344,2368,2594,2609,2574,11489
34,horizontal_converted_file_size,1110,1881,2058,2045,2041,9135
35,day,1175,1935,2005,1946,1948,9009
33,horizontal_duration,1025,1250,1398,1356,1364,6393
31,login_frequency,670,1031,1108,1068,1147,5024
32,mst_user_type_id,812,1018,1031,1045,1035,4941


In [50]:
%%time
pred_list = []
for model in models:
    print('best_iteration', model.best_iteration)
    pred = model.predict(test_feat_df, num_iteration = model.best_iteration)
    pred_list.append(pred)
    
pred = np.mean(pred_list, axis=0)

best_iteration 595
best_iteration 946
best_iteration 1000
best_iteration 988
best_iteration 997
CPU times: user 3min 27s, sys: 430 ms, total: 3min 27s
Wall time: 11.5 s


In [51]:
assert len(pred) == len(test_df)

NameError: name 'test_df' is not defined

In [None]:
sub_df = pd.DataFrame({ 'target': pred })
sub_df.to_csv(os.path.join(OUTPUT_DIR, 'submission.csv'), index=False)

In [None]:
print('- feature={}'.format(feature_count))
print('- score={:.4f}'.format(score))

#### simple_2
- Wall time: 47min 46s
- feature= 41
- score= 0.214588
- publicLB= 0.1907

#### tuning
```
Best Params: {
    'objective': 'binary', 
    'boosting_type': 'gbdt', 
    'seed': 0, 
    'learning_rate': 0.1, 
    'feature_pre_filter': False, 
    'lambda_l1': 6.419345380049121e-07, 
    'lambda_l2': 8.432801302426078, 
    'num_leaves': 212, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 0.9907178796872467, 
    'bagging_freq': 2, 
    'min_child_samples': 100, 
    'num_iterations': 1000, 
    'early_stopping_round': 100
}
Best Iteration: 245
Best Score: 'pr_auc', 0.22382995580267329
```