In [1]:
import datetime
import numpy as np
import pandas as pd
from glob import glob

import warnings
warnings.simplefilter("ignore")

In [53]:
glob('./data2/*')

['./data2/login.csv',
 './data2/purchase_detail.csv',
 './data2/submission.csv',
 './data2/user_info.csv',
 './data2/user_label_train.csv']

# load data

In [3]:
df_login = pd.read_csv('./data2/login.csv')
df_user_info = pd.read_csv('./data2/user_info.csv')
df_submission = pd.read_csv('./data2/submission.csv')
df_user_label_train = pd.read_csv('./data2/user_label_train.csv')
df_purchase_detail = pd.read_csv('./data2/purchase_detail.csv')

In [4]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [5]:
for _df in [df_login,df_user_info,df_submission,df_user_label_train,df_purchase_detail]:
    _df = reduce_mem_usage(_df)

Mem. usage decreased to 628.64 Mb (45.8% reduction)
Mem. usage decreased to  8.14 Mb (57.5% reduction)
Mem. usage decreased to  0.29 Mb (50.0% reduction)
Mem. usage decreased to  2.04 Mb (68.7% reduction)
Mem. usage decreased to 141.21 Mb (52.5% reduction)


# feature engineer

## df_user_info

In [6]:
df_user_info['lifetime'] = pd.to_datetime('2020-07-31') - pd.to_datetime(df_user_info['enroll_time'])
df_user_info['lifetime'] = df_user_info['lifetime'].astype('timedelta64[D]')
df_user_info['age'] = 2020 - df_user_info['birth_year']

## login

In [7]:
df_login_feature = df_login.groupby('userid').agg({
    'login_times': ['sum', 'min', 'max', 'std', 'mean','median'],
    'date': ['count']
})
df_login_feature.columns = ["_".join(x) for x in df_login_feature.columns.ravel()]

In [8]:
df = pd.merge(df_user_info, df_login_feature, on='userid', how='inner')

##  purchase_detail

In [9]:
df_purchase_detail['grass_date'] = pd.to_datetime(df_purchase_detail.grass_date)
df_purchase_detail = df_purchase_detail.sort_values(['userid','grass_date'], ascending=[1,1])

In [10]:
df_purchase_detail['epochtime'] = (
    df_purchase_detail['grass_date'] - datetime.datetime(1970, 1, 1)).dt.total_seconds()

In [11]:
df_dt = df_purchase_detail[['userid','grass_date']].drop_duplicates()
df_dt['dt_diff'] = df_dt.groupby('userid')['grass_date'].diff().astype('timedelta64[D]')

In [12]:
df_purchase_dt_diff = df_dt.groupby('userid').agg({
    'dt_diff':['sum', 'min', 'max', 'std','mean']
})
df_purchase_dt_diff.columns = ["_".join(x) for x in df_purchase_dt_diff.columns.ravel()]

In [13]:
cat_map = df_purchase_detail['category_encoded'].value_counts(normalize=True)
df_purchase_detail['cat_encode'] = df_purchase_detail['category_encoded'].map(cat_map)

In [14]:
df_purchase_detail_ordersum = pd.crosstab(index=df_purchase_detail['userid'],
    columns=df_purchase_detail['category_encoded'],
    values=df_purchase_detail['order_count'],
    aggfunc='sum',
    margins=False).fillna(0)

df_purchase_detail_ordersum.columns = [f'ordercount_{x}' for x in df_purchase_detail_ordersum.columns]

In [15]:
df_purchase_detail_totalrsum = pd.crosstab(index=df_purchase_detail['userid'],
    columns=df_purchase_detail['category_encoded'],
    values=df_purchase_detail['total_amount'],
    aggfunc='sum',
    margins=False).fillna(0)

df_purchase_detail_totalrsum.columns = [f'totalcount_{x}' for x in df_purchase_detail_totalrsum.columns]

In [16]:
df_purchase_detail['amount_per_order'] = df_purchase_detail['total_amount'] / df_purchase_detail['order_count']

In [17]:
df_purchase_detail_peruserid = df_purchase_detail.groupby('userid').agg({
    'order_count':['count','mean','std','median','sum','max','min'],
    'total_amount':['count','mean','std','median','sum','max','min'],
    'cat_encode':['sum','mean','std','max','min'],
    'amount_per_order':['sum', 'min', 'max', 'std','mean'],
    'epochtime':['count','mean','std','median','sum','max','min'],
})
df_purchase_detail_peruserid.columns = [f'purchase_detail_{x[0]}_{x[1]}' for x in df_purchase_detail_peruserid.columns]

In [18]:
def normalization(_df):
    return (_df - _df.mean())/_df.std()

In [19]:
df_purchase_detail_peruserid = normalization(df_purchase_detail_peruserid)

In [20]:
df_purchase_detail_peruserid_count = pd.crosstab(index=df_purchase_detail['userid'],
    columns=df_purchase_detail['category_encoded'],
    values=df_purchase_detail['order_count'],
    aggfunc='count',
    margins=False).fillna(0)

df_purchase_detail_peruserid_count.columns = [f'count_{x}' for x in df_purchase_detail_peruserid_count.columns]

In [21]:
df = pd.merge(df, df_purchase_dt_diff, on='userid', how='inner')
df = pd.merge(df, df_purchase_detail_ordersum, on='userid', how='inner')
df = pd.merge(df, df_purchase_detail_totalrsum, on='userid', how='inner')
df = pd.merge(df, df_purchase_detail_peruserid, on='userid', how='inner')
df = pd.merge(df, df_purchase_detail_peruserid_count, on='userid', how='inner')

In [22]:
df = pd.merge(df, df_user_label_train, on='userid', how='outer')

In [23]:
df

Unnamed: 0,userid,gender,is_seller,birth_year,enroll_time,lifetime,age,login_times_sum,login_times_min,login_times_max,...,count_15,count_16,count_17,count_18,count_19,count_20,count_21,count_22,count_23,label
0,1,2.0,1,1985.0,2015-05-27,1892.0,35.0,50.0,1,4,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,1.0,1,1980.0,2015-05-27,1892.0,40.0,487.0,1,6,...,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,3,2.0,1,2014.0,2015-05-27,1892.0,6.0,770.0,2,8,...,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0
3,4,1.0,1,1978.0,2015-05-27,1892.0,42.0,426.0,1,5,...,1.0,0.0,0.0,2.0,0.0,0.0,0.0,6.0,2.0,0.0
4,5,1.0,1,1978.0,2015-05-27,1892.0,42.0,824.0,2,10,...,3.0,3.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502152,521605,2.0,0,2004.0,2020-02-01,181.0,16.0,63.0,1,4,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
502153,521606,2.0,0,1990.0,2020-02-01,181.0,30.0,97.0,1,2,...,1.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0
502154,521607,1.0,0,,2020-02-01,181.0,,201.0,1,3,...,1.0,0.0,2.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
502155,521608,2.0,0,2005.0,2020-02-01,181.0,15.0,113.0,1,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
#df.to_pickle('a1.pkl')

In [25]:
#df = pd.read_pickle('a1.pkl')

# model

In [26]:
x = [x for x in df.columns if x not in ['userid','birth_year','label','enroll_time']]
y = 'label'

In [27]:
from sklearn.model_selection import GridSearchCV,cross_val_score,train_test_split

df['gender'] = (df['gender'].fillna(-1)).astype('int')
df['is_seller'] = (df['is_seller'].fillna(-1)).astype('int')

X_train, X_valid, y_train, y_valid = train_test_split(
    df[df.label.notnull()][x], df[df.label.notnull()][y], test_size=0.2, random_state=2048)

In [28]:
X_test, y_test = df[df.label.isnull()][x], df[df.label.isnull()][y]

In [29]:
len(x)

116

## xgb

In [30]:
import gc
from bayes_opt import BayesianOptimization
from xgboost import XGBClassifier

n_folds = 5

def xgb_eval(gamma, max_depth,min_child_weight,#num_leaves
             colsample_bytree,subsample):
    fit_params={'early_stopping_rounds': 100, 
                'verbose': False,
                'eval_set':[[X_valid,y_valid]]}
    clf = XGBClassifier(learning_rate=0.03,
                        n_estimators=1000,
                        tree_method='gpu_hist',
                        gpu_id='1',
                        max_depth=int(max_depth),
                        #num_leaves = int(num_leaves),
                        gamma = gamma,
                        min_child_weight=min_child_weight, 
                        subsample=subsample, 
                        colsample_bytree=colsample_bytree,
                        #eval_metric='auc',
                        #verbose = 0,
                        n_jobs=10,
                        seed=666)
    cv_result = cross_val_score(estimator=clf,
                                X=X_train,
                                y=y_train,
                                scoring='roc_auc',
                                cv=n_folds, 
                                fit_params = fit_params,
                                verbose=0, 
                                n_jobs=1)    
    return cv_result.mean()

xgbBO = BayesianOptimization(xgb_eval, {'gamma':(0,1),
                                        #'num_leaves': (24, 45),
                                        'max_depth': (3, 20),
                                        'min_child_weight': (2, 20),
                                        'colsample_bytree':(0.3,0.9),
                                        'subsample':(0.3,0.9)
                                        }, 
                             random_state=0)

In [31]:
print('Bayesian Optimization Start')
xgbBO.maximize(init_points=5 , n_iter=20)
print('Bayesian Optimization End')
print(xgbBO.max)

Bayesian Optimization Start
|   iter    |  target   | colsam... |   gamma   | max_depth | min_ch... | subsample |
-------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.697   [0m | [0m 0.6293  [0m | [0m 0.7152  [0m | [0m 13.25   [0m | [0m 11.81   [0m | [0m 0.5542  [0m |
| [0m 2       [0m | [0m 0.6955  [0m | [0m 0.6875  [0m | [0m 0.4376  [0m | [0m 18.16   [0m | [0m 19.35   [0m | [0m 0.5301  [0m |
| [95m 3       [0m | [95m 0.6976  [0m | [95m 0.775   [0m | [95m 0.5289  [0m | [95m 12.66   [0m | [95m 18.66   [0m | [95m 0.3426  [0m |
| [0m 4       [0m | [0m 0.6958  [0m | [0m 0.3523  [0m | [0m 0.02022 [0m | [0m 17.15   [0m | [0m 16.01   [0m | [0m 0.822   [0m |
| [95m 5       [0m | [95m 0.6986  [0m | [95m 0.8872  [0m | [95m 0.7992  [0m | [95m 10.85   [0m | [95m 16.05   [0m | [95m 0.371   [0m |
| [95m 6       [0m | [95m 0.6993  [0m | [95m 0.9     [0m | [95m 0.956

In [32]:
model = XGBClassifier(learning_rate=0.03,
                      n_estimators=1000,
                      tree_method='gpu_hist',
                      gpu_id='1',
                      max_depth=int(xgbBO.max['params']['max_depth']),
                      gamma=xgbBO.max['params']['gamma'],
                      min_child_weight=xgbBO.max['params']['min_child_weight'],
                      subsample=xgbBO.max['params']['subsample'],
                      colsample_bytree=xgbBO.max['params']['colsample_bytree'],
                      n_jobs=10)

In [33]:
model.fit(df[df.label.notnull()][x], df[df.label.notnull()][y])

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.3, gamma=1.0, gpu_id='1',
              importance_type='gain', interaction_constraints='',
              learning_rate=0.03, max_delta_step=0, max_depth=6,
              min_child_weight=6.672319828362536, missing=nan,
              monotone_constraints='(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)',
              n_estimators=1000, n_jobs=10, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.9,
              tree_method='gpu_hist', validate_parameters=1, verbosity=None)

## lgb

In [34]:
import lightgbm as lgb
lgb_data = lgb.Dataset(data=df[df.label.notnull()][x].values, label= df[df.label.notnull()][y].values)
n_folds, random_seed = 5, 666

def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth, #subsample_for_bin,
             lambda_l1, lambda_l2, min_split_gain, min_child_weight):
    params = {'objective': 'binary',
              'num_iterations': 1000,
              'learning_rate': 0.03,
              'metric':'auc',
              'n_jobs': 70}
    #params["subsample_for_bin"] = int(subsample_for_bin)
    params["num_leaves"] = int(num_leaves)
    params['feature_fraction'] = max(min(feature_fraction, 1), 0)
    params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
    params['max_depth'] = int(max_depth)
    params['lambda_l1'] = max(lambda_l1, 0)
    params['lambda_l2'] = max(lambda_l2, 0)
    params['min_split_gain'] = min_split_gain
    params['min_child_weight'] = min_child_weight
    cv_result = lgb.cv(params, lgb_data, nfold=n_folds, early_stopping_rounds=100,
                       seed=random_seed, stratified=True)
    return max(cv_result['auc-mean'])

lgbBO = BayesianOptimization(lgb_eval, {'num_leaves': (24, 60),
                                        'feature_fraction': (0.1, 0.9),
                                        'bagging_fraction': (0.5, 1),
                                        'max_depth': (4, 13),
                                        #'subsample_for_bin': (3000, 6000),
                                        'lambda_l1': (0, 1),
                                        'lambda_l2': (0, 1),
                                        'min_split_gain': (0.001, 0.5),
                                        'min_child_weight': (5, 50)}, random_state=0)

In [35]:
print('Bayesian Optimzation Start')
lgbBO.maximize(init_points=5, n_iter=20)
print('Bayesian Optimzation End')

Bayesian Optimzation Start
|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.701   [0m | [0m 0.7744  [0m | [0m 0.6722  [0m | [0m 0.6028  [0m | [0m 0.5449  [0m | [0m 7.813   [0m | [0m 34.07   [0m | [0m 0.2194  [0m | [0m 56.1    [0m |
| [95m 2       [0m | [95m 0.7012  [0m | [95m 0.9818  [0m | [95m 0.4068  [0m | [95m 0.7917  [0m | [95m 0.5289  [0m | [95m 9.112   [0m | [95m 46.65   [0m | [95m 0.03645 [0m | [95m 27.14   [0m |
| [0m 3       [0m | [0m 0.701   [0m | [0m 0.5101  [0m | [0m 0.7661  [0m | [0m 0.7782  [0m | [0m 0.87    [0m | [0m 12.81   [0m | [0m 40.96   [0m | [0m 0.2313  [0m | [0m 52.1    [0m |
| [0m 4       [0m | [0m 0.7011  [0m | [0m 0.5591  [0m | [0m 0.6119  [0m | [0m 0.1434  [0m | [0m 0.9447

In [36]:
from lightgbm import LGBMClassifier

lgb_model = LGBMClassifier(learning_rate=0.03,
                           num_iterations=1000,
                           bagging_fraction=lgbBO.max['params']['bagging_fraction'],
                           feature_fraction=lgbBO.max['params']['feature_fraction'],
                           lambda_l1=lgbBO.max['params']['lambda_l1'],
                           lambda_l2=lgbBO.max['params']['lambda_l2'],
                           max_depth=int(lgbBO.max['params']['max_depth']),
                           min_child_weight=lgbBO.max['params']['min_child_weight'],
                           min_split_gain=lgbBO.max['params']['min_split_gain'],
                           num_leaves=int(lgbBO.max['params']['num_leaves']),
                           objective='binary',
                           n_jobs=50,
                           seed=666)

In [37]:
lgb_model.fit(df[df.label.notnull()][x].values, df[df.label.notnull()][y].values)

LGBMClassifier(bagging_fraction=0.686983201146895,
               feature_fraction=0.33772719177497934,
               lambda_l1=0.4127431879546162, lambda_l2=0.13758642774581364,
               learning_rate=0.03, max_depth=7,
               min_child_weight=23.01026378937573,
               min_split_gain=0.4485182718412468, n_jobs=50,
               num_iterations=1000, num_leaves=54, objective='binary',
               seed=666)

## cat

In [38]:
from catboost import CatBoostClassifier
cat_idx = [0,1]
cat_model = CatBoostClassifier(eval_metric='AUC', 
                               random_seed=42,
                               learning_rate=0.02,
                               depth=12,
                               l2_leaf_reg=40,
                               iterations=1500,
                               bootstrap_type='Bernoulli',
                               subsample=0.8,
                               task_type = "GPU",
                               devices="1",
                               scale_pos_weight=280944/145888)
cat_model.fit(X_train,y_train,eval_set=(X_valid,y_valid),
              cat_features=cat_idx)

0:	learn: 0.6371061	test: 0.6365038	best: 0.6365038 (0)	total: 214ms	remaining: 5m 20s
1:	learn: 0.6423149	test: 0.6423554	best: 0.6423554 (1)	total: 241ms	remaining: 3m
2:	learn: 0.6422953	test: 0.6423467	best: 0.6423554 (1)	total: 434ms	remaining: 3m 36s
3:	learn: 0.6447704	test: 0.6444827	best: 0.6444827 (3)	total: 627ms	remaining: 3m 54s
4:	learn: 0.6455306	test: 0.6453711	best: 0.6453711 (4)	total: 740ms	remaining: 3m 41s
5:	learn: 0.6455061	test: 0.6453030	best: 0.6453711 (4)	total: 854ms	remaining: 3m 32s
6:	learn: 0.6455128	test: 0.6453034	best: 0.6453711 (4)	total: 1.05s	remaining: 3m 43s
7:	learn: 0.6466119	test: 0.6463639	best: 0.6463639 (7)	total: 1.24s	remaining: 3m 51s
8:	learn: 0.6466781	test: 0.6464500	best: 0.6464500 (8)	total: 1.43s	remaining: 3m 57s
9:	learn: 0.6492580	test: 0.6490575	best: 0.6490575 (9)	total: 1.64s	remaining: 4m 3s
10:	learn: 0.6502423	test: 0.6500187	best: 0.6500187 (10)	total: 1.66s	remaining: 3m 44s
11:	learn: 0.6518463	test: 0.6516728	best: 0.6

<catboost.core.CatBoostClassifier at 0x7fc7b9c5b2e8>

In [39]:
cat_model.fit(df[df.label.notnull()][x].values, df[df.label.notnull()][y].values,verbose_eval=False)

<catboost.core.CatBoostClassifier at 0x7fc7b9c5b2e8>

## stack

In [40]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
estimators = [
    ('xgb', model),
    ('lgb', lgb_model),
    ('cat', cat_model)
]
clf = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression(), cv=5
)

In [41]:
clf.fit(df[df.label.notnull()][x].values, df[df.label.notnull()][y].values)

0:	learn: 0.6841466	total: 62.8ms	remaining: 1m 34s
1:	learn: 0.6857036	total: 105ms	remaining: 1m 18s
2:	learn: 0.6885526	total: 167ms	remaining: 1m 23s
3:	learn: 0.6891016	total: 230ms	remaining: 1m 26s
4:	learn: 0.6894420	total: 292ms	remaining: 1m 27s
5:	learn: 0.6893832	total: 353ms	remaining: 1m 27s
6:	learn: 0.6893469	total: 418ms	remaining: 1m 29s
7:	learn: 0.6894933	total: 480ms	remaining: 1m 29s
8:	learn: 0.6898310	total: 541ms	remaining: 1m 29s
9:	learn: 0.6902256	total: 604ms	remaining: 1m 29s
10:	learn: 0.6902955	total: 665ms	remaining: 1m 29s
11:	learn: 0.6900221	total: 683ms	remaining: 1m 24s
12:	learn: 0.6903705	total: 745ms	remaining: 1m 25s
13:	learn: 0.6907502	total: 807ms	remaining: 1m 25s
14:	learn: 0.6909386	total: 869ms	remaining: 1m 26s
15:	learn: 0.6911591	total: 931ms	remaining: 1m 26s
16:	learn: 0.6912127	total: 993ms	remaining: 1m 26s
17:	learn: 0.6912799	total: 1.05s	remaining: 1m 26s
18:	learn: 0.6915154	total: 1.12s	remaining: 1m 27s
19:	learn: 0.6916591	

StackingClassifier(cv=5,
                   estimators=[('xgb',
                                XGBClassifier(base_score=0.5, booster='gbtree',
                                              colsample_bylevel=1,
                                              colsample_bynode=1,
                                              colsample_bytree=0.3, gamma=1.0,
                                              gpu_id='1',
                                              importance_type='gain',
                                              interaction_constraints='',
                                              learning_rate=0.03,
                                              max_delta_step=0, max_depth=6,
                                              min_child_weight=6.672319828362536,
                                              missing=nan,
                                              monotone_constraints='(0,0,0,0,0,0,...
                                               feature_fraction=0.3377271

# mixture

In [42]:
xgb_pred = model.predict_proba(X_test)[:,1].reshape(-1)

In [43]:
lgb_pred = lgb_model.predict_proba(X_test)[:,1].reshape(-1)

In [44]:
cat_pred = cat_model.predict_proba(X_test)[:,1].reshape(-1)

In [45]:
stack_pred = clf.predict_proba(df[df.label.isnull()][x].values)[:,0].reshape(-1)

In [46]:
def output_generate(y_pred):
    submit1 = pd.DataFrame({
        'userid': df[df.label.isnull()]['userid'].values,
        'label':y_pred
    })
    final = df_submission.merge(submit1)
    return final
df_submission = pd.read_csv('./data2/submission.csv')

In [47]:
output_generate(lgb_pred).to_csv('output_lgb5.csv',index=0)

In [48]:
output_generate(xgb_pred).to_csv('output_xgb5.csv',index=0)

In [49]:
output_generate(stack_pred).to_csv('output_stack.csv',index=0)

In [50]:
output_generate(cat_pred).to_csv('output_cat1.csv',index=0)

In [51]:
mix = (xgb_pred+lgb_pred+stack_pred+cat_pred)/4

In [52]:
output_generate(mix).to_csv('output_mix7.csv',index=0)