Позаимствовали агрегированные фичи у kdimon15. Спасибо! <br>
https://github.com/kdimon15/data-fusion-2024-baseline/

In [1]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier, CatBoostRegressor, Pool
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import random
import warnings
warnings.simplefilter('ignore')

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

random.seed(42)
np.random.seed(42)

In [2]:
main = pd.read_csv('data/train.csv')

sample = pd.read_csv('data/sample_submit_naive.csv').drop('predict', axis=1)
sample['target'] = -1

main = pd.concat([main, sample])

In [4]:
clients = pd.read_csv('data/clients.csv')
report_dates = pd.read_csv('data/report_dates.csv', parse_dates=['report_dt'])

transactions = pd.read_csv('data/transactions.csv', parse_dates=['transaction_dttm'])
transactions = transactions.sort_values('transaction_dttm').reset_index(drop=True)

In [5]:
# Добавим информацию о клиенте, а также закодируем employee_count_nm

main = main.merge(clients, how='left', on='user_id')
main['employee_count_nm'] = LabelEncoder().fit_transform(main['employee_count_nm'].fillna('unknown'))

In [6]:
good_codes = transactions['mcc_code'].value_counts()
good_codes = good_codes[good_codes >= 10]

mcc_info = transactions[transactions.mcc_code.isin(good_codes)].pivot_table(
    index = 'user_id',
    values=['transaction_amt'],
    columns=['mcc_code'],
    aggfunc=['count', 'median', 'sum']
).fillna(0)
mcc_info.columns = ['main_' + '_'.join(map(str, x)) for x in mcc_info.columns]

count_cols = [x for x in mcc_info.columns if 'count' in x]
mcc_info['sum'] = mcc_info[count_cols].sum(axis=1)
for col in count_cols:
    mcc_info[f'{col}_norm'] = mcc_info[col] / mcc_info['sum']
mcc_info.drop('sum', axis=1, inplace=True)

main = main.merge(mcc_info, how='left', left_on='user_id', right_index=True)

In [8]:
df_more = transactions.merge(clients[['user_id', 'report']], how='left', on='user_id')
df_more = df_more.merge(report_dates, how='left', on='report')
df_more['days_to_report'] = (df_more['report_dt'] - df_more['transaction_dttm']).dt.days


for day_diff in [30, 1000]:

    # Информация о размерах транзакций в различных валютах
    currency_pivot = df_more[df_more['days_to_report'] < day_diff + 100].pivot_table(
        index='user_id',
        columns='currency_rk',
        values='transaction_amt',
        aggfunc=['sum', 'mean', 'median', 'count']
    ).fillna(0)
    currency_pivot.columns = [f'currency_daydiff_{day_diff}_{x[0]}_{x[1]}' for x in currency_pivot.columns]

    currency_pivot['sum'] = currency_pivot[[x for x in currency_pivot.columns if 'count' in x]].sum(axis=1)
    for x in range(4):
        currency_pivot[f'currency_daydiff_{day_diff}_count_{x}_norm'] = currency_pivot[f'currency_daydiff_{day_diff}_count_{x}'] / currency_pivot['sum']
    currency_pivot.drop('sum', axis=1, inplace=True)

    main = main.merge(currency_pivot, how='left', left_on='user_id', right_index=True)


    general_trans_info = df_more[df_more['days_to_report'] < day_diff + 100].groupby('user_id')['transaction_amt'].agg(['sum', 'count', 'median'])
    general_trans_info[['sum', 'count']] = general_trans_info[['sum', 'count']].fillna(0)
    general_trans_info.columns = [f'general_trans_info_{day_diff}_{x}' for x in general_trans_info]
    main = main.merge(general_trans_info, how='left', left_on='user_id', right_index=True)

    general_trans_info = df_more[(df_more['days_to_report']<day_diff + 100)&(df_more['transaction_amt']>0)].groupby('user_id')['transaction_amt'].agg(['sum', 'count', 'median'])
    general_trans_info[['sum', 'count']] = general_trans_info[['sum', 'count']].fillna(0)
    general_trans_info.columns = [f'positive_general_trans_info_{day_diff}_{x}' for x in general_trans_info]
    main = main.merge(general_trans_info, how='left', left_on='user_id', right_index=True)

    general_trans_info = df_more[(df_more['days_to_report']<day_diff + 100)&(df_more['transaction_amt']<0)].groupby('user_id')['transaction_amt'].agg(['sum', 'count', 'median'])
    general_trans_info[['sum', 'count']] = general_trans_info[['sum', 'count']].fillna(0)
    general_trans_info.columns = [f'negative_general_trans_info_{day_diff}_{x}' for x in general_trans_info]
    main = main.merge(general_trans_info, how='left', left_on='user_id', right_index=True)


# Анализируем кол-во транзакций в последние n дней / кол-во транзакций до последних n дней
for x in [5, 30]:
    prev = df_more[df_more['days_to_report'] > x + 100].groupby('user_id')['report'].agg(['count']).reset_index().rename({'count': f'num_transaction_before_{x}_days'}, axis=1)
    last = df_more[df_more['days_to_report'] <= x + 100].groupby('user_id')['report'].agg(['count']).reset_index().rename({'count': f'num_transaction_last_{x}_days'}, axis=1)

    main = main.merge(prev, how='left', on='user_id')
    main = main.merge(last, how='left', on='user_id')
    main[f'num_transaction_last_{x}_days'].fillna(0, inplace=True)
    main[f'num_transaction_before_{x}_days'].fillna(0, inplace=True)
    main[f'percent_last_{x}'] = main[f'num_transaction_last_{x}_days'] / main[f'num_transaction_before_{x}_days']

In [9]:
# Кол-во уникальных MCC кодов, валют, а также уникальных дней, в которые были транзакции
main = main.merge(df_more.groupby('user_id')['days_to_report'].nunique(), how='left', on='user_id').rename({'days_to_report': 'nunique_days'}, axis=1)
main = main.merge(df_more.groupby('user_id')['mcc_code'].nunique(), how='left', on='user_id').rename({'mcc_code': 'nunique_mcc_codes'}, axis=1)
main = main.merge(df_more.groupby('user_id')['currency_rk'].nunique(), how='left', on='user_id').rename({'currency_rk': 'nunique_currency'}, axis=1)

main = main.fillna(0)

In [10]:
tmp = transactions.copy()
tmp['hour'] = tmp['transaction_dttm'].dt.hour
pivot_table = tmp.pivot_table(
    index='user_id',
    columns='hour',
    values='transaction_amt',
    aggfunc=['count', 'median']
).fillna(0)
pivot_table.columns = [f'hour_{x[0]}_{x[1]}' for x in pivot_table.columns]

count_cols = [x for x in pivot_table.columns if 'count' in x]
pivot_table['sum'] = pivot_table[count_cols].sum(axis=1)
for col in count_cols:
    pivot_table[f'{col}_norm'] = pivot_table[col] / pivot_table['sum']
pivot_table.drop('sum', axis=1, inplace=True)

main = main.merge(pivot_table, how='left', left_on='user_id', right_index=True)

In [11]:
cur = transactions.groupby('user_id')['transaction_dttm'].agg(['min', 'max']).reset_index()
cur = cur.merge(clients[['user_id', 'report']], how='left', on='user_id')
cur = cur.merge(report_dates, how='left', on='report')

cur['min_diff_dttm'] = (cur['report_dt'] - cur['min']).dt.days
cur['days_to_report'] = (cur['report_dt'] - cur['max']).dt.days
cur['max_min_diff_dttm'] = cur['days_to_report'] - cur['min_diff_dttm']

main = main.merge(cur[['user_id', 'min_diff_dttm','days_to_report','max_min_diff_dttm']], how='left', on='user_id')

In [12]:
main['trx_density'] = main['max_min_diff_dttm'] / main['general_trans_info_1000_count']
main['days_density'] = (main['max_min_diff_dttm'] + 1) / main['nunique_days']

In [64]:
cat_cols = ['customer_age', 'employee_count_nm', 'report']
main[cat_cols] = main[cat_cols].astype(str)
main = main.fillna(0)
main = main.sort_values('user_id').reset_index(drop=True)
train = main[main.target != -1]
test = main[main.target == -1]

In [None]:
test_ids = pd.read_csv('data/train.csv')
train_, test_ = train.loc[~train['user_id'].isin(test_ids['user_id'])], train.loc[train['user_id'].isin(test_ids['user_id'])]

In [65]:
# import os
#os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [None]:
model = CatBoostClassifier(
    iterations = 1400,
    depth=4,
    learning_rate=0.03,
    task_type="GPU",
    eval_metric='AUC',
    cat_features = cat_cols,
    thread_count=6,
    l2_leaf_reg=5,
    early_stopping_rounds=200,
)
model.fit(train_.drop(['user_id', 'target', 'time'], axis=1), train_['target'], verbose=100)


df_imp = pd.DataFrame({
    'name': train_.drop(['user_id', 'target', 'time'], axis=1).columns,
    'imp': model.get_feature_importance()
}).sort_values('imp', ascending=False)

df_imp = df_imp[df_imp['imp'] > 0.15]
good_cols = df_imp['name'].tolist()

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 30.1ms	remaining: 42.1s
100:	total: 2.92s	remaining: 37.5s
200:	total: 5.82s	remaining: 34.7s
300:	total: 8.68s	remaining: 31.7s
400:	total: 11.6s	remaining: 28.8s
500:	total: 14.4s	remaining: 25.9s
600:	total: 17.3s	remaining: 23s
700:	total: 20.2s	remaining: 20.1s
800:	total: 23.1s	remaining: 17.3s
900:	total: 25s	remaining: 13.9s
1000:	total: 27.9s	remaining: 11.1s
1100:	total: 30.8s	remaining: 8.36s


#### Обучаемся на train_ выборке с сидом 42, получаем предикты для стекинга и отложенной выборки

In [70]:
strat_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

X, y = train_.drop(['time', 'target'], axis=1), train_['target']
scores = []
frames_for_metamodel = []
models = []
for train_index, valid_index in strat_kfold.split(train, train['target']):
    
    X_train, X_val = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_val = y.iloc[train_index], y.iloc[valid_index]
    
    model = CatBoostClassifier(
        iterations = 2500,
        depth=4,
        learning_rate=0.03,
        eval_metric='AUC',
        task_type="GPU",
        cat_features = cat_cols,
        early_stopping_rounds=400,
        random_seed=42,
    )

    model.fit(Pool(X_train[good_cols], y_train, cat_features=cat_cols),
              eval_set=Pool(X_val[good_cols], y_val, cat_features=cat_cols),
              verbose=100)
    models.append(model)
    
    pred = model.predict_proba(X_val[good_cols])[:, 1]
    frames_for_metamodel.append(pd.DataFrame({'user_id': X_val.user_id.values, 'pred_agg': pred}))
    scores.append(metrics.roc_auc_score(y_val, pred))

print(np.mean(scores))
metadata = pd.concat(frames_for_metamodel, axis=0).reset_index(drop=True)

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.5996201	best: 0.5996201 (0)	total: 79.6ms	remaining: 3m 19s
100:	test: 0.7439937	best: 0.7442905 (91)	total: 6.53s	remaining: 2m 35s
200:	test: 0.7531515	best: 0.7532193 (199)	total: 13s	remaining: 2m 28s
300:	test: 0.7569328	best: 0.7569328 (300)	total: 19.5s	remaining: 2m 22s
400:	test: 0.7597520	best: 0.7597520 (400)	total: 25.9s	remaining: 2m 15s
500:	test: 0.7616559	best: 0.7616559 (500)	total: 32.3s	remaining: 2m 8s
600:	test: 0.7635614	best: 0.7635645 (597)	total: 38.2s	remaining: 2m
700:	test: 0.7646601	best: 0.7646900 (698)	total: 44.4s	remaining: 1m 53s
800:	test: 0.7660500	best: 0.7660500 (800)	total: 50.8s	remaining: 1m 47s
900:	test: 0.7669390	best: 0.7669390 (900)	total: 57s	remaining: 1m 41s
1000:	test: 0.7674038	best: 0.7675301 (980)	total: 1m 3s	remaining: 1m 35s
1100:	test: 0.7676142	best: 0.7676142 (1100)	total: 1m 9s	remaining: 1m 28s
1200:	test: 0.7674363	best: 0.7678154 (1127)	total: 1m 16s	remaining: 1m 22s
1300:	test: 0.7679921	best: 0.7680578 (1264)	

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.5991083	best: 0.5991083 (0)	total: 64.4ms	remaining: 2m 40s
100:	test: 0.7394903	best: 0.7395813 (99)	total: 6.57s	remaining: 2m 36s
200:	test: 0.7475601	best: 0.7475701 (199)	total: 12.2s	remaining: 2m 19s
300:	test: 0.7506356	best: 0.7506765 (299)	total: 18.7s	remaining: 2m 16s
400:	test: 0.7527723	best: 0.7527723 (400)	total: 25.1s	remaining: 2m 11s
500:	test: 0.7544979	best: 0.7544979 (500)	total: 31.5s	remaining: 2m 5s
600:	test: 0.7555419	best: 0.7555481 (599)	total: 37.9s	remaining: 1m 59s
700:	test: 0.7557786	best: 0.7559286 (678)	total: 44.2s	remaining: 1m 53s
800:	test: 0.7558749	best: 0.7559798 (741)	total: 50.6s	remaining: 1m 47s
900:	test: 0.7561170	best: 0.7561458 (835)	total: 57s	remaining: 1m 41s
1000:	test: 0.7563711	best: 0.7563852 (994)	total: 1m 3s	remaining: 1m 34s
1100:	test: 0.7570429	best: 0.7571322 (1091)	total: 1m 9s	remaining: 1m 28s
1200:	test: 0.7571303	best: 0.7572464 (1153)	total: 1m 16s	remaining: 1m 22s
1300:	test: 0.7573792	best: 0.7574903 (

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.5965842	best: 0.5965842 (0)	total: 28.5ms	remaining: 1m 11s
100:	test: 0.7193838	best: 0.7193838 (100)	total: 2.69s	remaining: 1m 3s
200:	test: 0.7277556	best: 0.7277556 (200)	total: 5.5s	remaining: 1m 2s
300:	test: 0.7326946	best: 0.7326946 (300)	total: 8.28s	remaining: 1m
400:	test: 0.7358306	best: 0.7358306 (400)	total: 11s	remaining: 57.7s
500:	test: 0.7371940	best: 0.7371950 (499)	total: 13.8s	remaining: 55.1s
600:	test: 0.7388123	best: 0.7388203 (599)	total: 16.5s	remaining: 52.1s
700:	test: 0.7397516	best: 0.7397802 (681)	total: 19.3s	remaining: 49.5s
800:	test: 0.7405707	best: 0.7405707 (800)	total: 22s	remaining: 46.8s
900:	test: 0.7405994	best: 0.7407613 (877)	total: 24.9s	remaining: 44.1s
1000:	test: 0.7407959	best: 0.7408883 (978)	total: 27.8s	remaining: 41.6s
1100:	test: 0.7411637	best: 0.7412173 (1088)	total: 30.6s	remaining: 38.9s
1200:	test: 0.7419925	best: 0.7420052 (1199)	total: 33.4s	remaining: 36.1s
1300:	test: 0.7422323	best: 0.7423124 (1263)	total: 36.2

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.5918868	best: 0.5918868 (0)	total: 27.2ms	remaining: 1m 7s
100:	test: 0.7450096	best: 0.7450677 (99)	total: 2.73s	remaining: 1m 4s
200:	test: 0.7519636	best: 0.7519636 (200)	total: 5.51s	remaining: 1m 3s
300:	test: 0.7546951	best: 0.7547918 (287)	total: 8.25s	remaining: 1m
400:	test: 0.7558557	best: 0.7558671 (396)	total: 11s	remaining: 57.6s
500:	test: 0.7562451	best: 0.7562585 (495)	total: 13.7s	remaining: 54.6s
600:	test: 0.7568250	best: 0.7568250 (600)	total: 16.3s	remaining: 51.5s
700:	test: 0.7571424	best: 0.7572095 (679)	total: 19s	remaining: 48.7s
800:	test: 0.7580054	best: 0.7580489 (787)	total: 21.7s	remaining: 45.9s
900:	test: 0.7574648	best: 0.7581587 (820)	total: 24.3s	remaining: 43.1s
1000:	test: 0.7578517	best: 0.7581587 (820)	total: 27s	remaining: 40.4s
1100:	test: 0.7581577	best: 0.7582395 (1077)	total: 29.6s	remaining: 37.6s
1200:	test: 0.7583718	best: 0.7585480 (1176)	total: 32.3s	remaining: 34.9s
1300:	test: 0.7586141	best: 0.7586141 (1300)	total: 34.9s	r

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6059755	best: 0.6059755 (0)	total: 28.2ms	remaining: 1m 10s
100:	test: 0.7455274	best: 0.7455274 (100)	total: 2.62s	remaining: 1m 2s
200:	test: 0.7528267	best: 0.7528880 (199)	total: 5.28s	remaining: 1m
300:	test: 0.7565646	best: 0.7565665 (298)	total: 7.89s	remaining: 57.6s
400:	test: 0.7585683	best: 0.7585683 (400)	total: 10.5s	remaining: 55s
500:	test: 0.7591588	best: 0.7591999 (498)	total: 13.2s	remaining: 52.5s
600:	test: 0.7596386	best: 0.7597714 (563)	total: 15.9s	remaining: 50.2s
700:	test: 0.7598216	best: 0.7598812 (696)	total: 18.6s	remaining: 47.7s
800:	test: 0.7599555	best: 0.7599555 (799)	total: 21.3s	remaining: 45.2s
900:	test: 0.7602803	best: 0.7604053 (864)	total: 24s	remaining: 42.7s
1000:	test: 0.7601245	best: 0.7604053 (864)	total: 26.7s	remaining: 40s
1100:	test: 0.7595581	best: 0.7604053 (864)	total: 29.4s	remaining: 37.3s
1200:	test: 0.7595074	best: 0.7604053 (864)	total: 32.1s	remaining: 34.7s
bestTest = 0.760405302
bestIteration = 864
Shrink model to 

In [None]:
predict = np.zeros(len(test_))
for i in range(len(models)):
    predict += models[i].predict_proba(test_.drop(['target'], axis=1))[:, 1]
test_pred = {'user_id': test.user_id.values, 'pred_agg': predict}
print(metrics.roc_auc_score(test_['target'], predict))

#### Обучаемся на всей выборке, получаем финальные предикты

In [21]:
strat_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

X, y = train.drop(['time', 'target'], axis=1), train['target']
scores = []

models = []
for train_index, valid_index in strat_kfold.split(train, train['target']):
    
    X_train, X_val = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_val = y.iloc[train_index], y.iloc[valid_index]
    
    model = CatBoostClassifier(
        iterations = 2500,
        depth=4,
        learning_rate=0.03,
        eval_metric='AUC',
        task_type="GPU",
        cat_features = cat_cols,
        early_stopping_rounds=400,
        random_seed=42,
    )

    model.fit(Pool(X_train[good_cols], y_train, cat_features=cat_cols),
              eval_set=Pool(X_val[good_cols], y_val, cat_features=cat_cols),
              verbose=100)
    models.append(model)
    
    pred = model.predict_proba(X_val[good_cols])[:, 1]
    scores.append(metrics.roc_auc_score(y_val, pred))

print(np.mean(scores))

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6601486	best: 0.6601486 (0)	total: 70.1ms	remaining: 2m 55s
100:	test: 0.7511503	best: 0.7511503 (100)	total: 7.96s	remaining: 3m 9s
200:	test: 0.7588906	best: 0.7589512 (199)	total: 15.8s	remaining: 3m
300:	test: 0.7626331	best: 0.7626331 (300)	total: 23.6s	remaining: 2m 52s
400:	test: 0.7649797	best: 0.7649797 (400)	total: 31.1s	remaining: 2m 42s
500:	test: 0.7667466	best: 0.7667974 (498)	total: 38.8s	remaining: 2m 34s
600:	test: 0.7674488	best: 0.7674748 (588)	total: 46.4s	remaining: 2m 26s
700:	test: 0.7683206	best: 0.7683636 (698)	total: 54.2s	remaining: 2m 18s
800:	test: 0.7688682	best: 0.7688698 (798)	total: 1m 1s	remaining: 2m 11s
900:	test: 0.7693982	best: 0.7693982 (900)	total: 1m 9s	remaining: 2m 3s
1000:	test: 0.7703170	best: 0.7703170 (999)	total: 1m 17s	remaining: 1m 55s
1100:	test: 0.7703819	best: 0.7704440 (1091)	total: 1m 25s	remaining: 1m 48s
1200:	test: 0.7707353	best: 0.7708883 (1179)	total: 1m 33s	remaining: 1m 40s
1300:	test: 0.7714486	best: 0.7714486 (

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6558930	best: 0.6558930 (0)	total: 27.6ms	remaining: 1m 9s
100:	test: 0.7438236	best: 0.7438236 (100)	total: 2.55s	remaining: 1m
200:	test: 0.7496982	best: 0.7496982 (200)	total: 5.05s	remaining: 57.7s
300:	test: 0.7521293	best: 0.7521410 (299)	total: 7.54s	remaining: 55.1s
400:	test: 0.7536395	best: 0.7537355 (380)	total: 9.98s	remaining: 52.3s
500:	test: 0.7551668	best: 0.7552093 (496)	total: 12.4s	remaining: 49.6s
600:	test: 0.7554194	best: 0.7555002 (597)	total: 14.9s	remaining: 47.2s
700:	test: 0.7561477	best: 0.7562600 (678)	total: 17.4s	remaining: 44.7s
800:	test: 0.7566285	best: 0.7566368 (790)	total: 19.9s	remaining: 42.2s
900:	test: 0.7569321	best: 0.7571386 (880)	total: 22.3s	remaining: 39.6s
1000:	test: 0.7570937	best: 0.7571710 (988)	total: 24.8s	remaining: 37.2s
1100:	test: 0.7571592	best: 0.7572577 (1098)	total: 27.3s	remaining: 34.7s
1200:	test: 0.7572744	best: 0.7573255 (1172)	total: 29.7s	remaining: 32.1s
1300:	test: 0.7577220	best: 0.7577263 (1299)	total: 

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6359078	best: 0.6359078 (0)	total: 25.9ms	remaining: 1m 4s
100:	test: 0.7237568	best: 0.7237568 (100)	total: 2.52s	remaining: 60s
200:	test: 0.7304130	best: 0.7305022 (196)	total: 5.03s	remaining: 57.5s
300:	test: 0.7332883	best: 0.7332883 (300)	total: 7.53s	remaining: 55s
400:	test: 0.7355776	best: 0.7355776 (400)	total: 9.99s	remaining: 52.3s
500:	test: 0.7373537	best: 0.7373537 (500)	total: 12.5s	remaining: 49.8s
600:	test: 0.7385002	best: 0.7385470 (598)	total: 15s	remaining: 47.3s
700:	test: 0.7385830	best: 0.7388240 (674)	total: 17.4s	remaining: 44.8s
800:	test: 0.7393076	best: 0.7393076 (800)	total: 19.9s	remaining: 42.2s
900:	test: 0.7396082	best: 0.7396200 (899)	total: 22.4s	remaining: 39.8s
1000:	test: 0.7396098	best: 0.7396611 (997)	total: 25.1s	remaining: 37.6s
1100:	test: 0.7402741	best: 0.7402741 (1100)	total: 27.6s	remaining: 35s
1200:	test: 0.7406537	best: 0.7407002 (1165)	total: 30.1s	remaining: 32.5s
1300:	test: 0.7407123	best: 0.7407664 (1294)	total: 32.6s

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6526109	best: 0.6526109 (0)	total: 30.2ms	remaining: 1m 15s
100:	test: 0.7464590	best: 0.7464590 (100)	total: 2.83s	remaining: 1m 7s
200:	test: 0.7522999	best: 0.7522999 (200)	total: 5.44s	remaining: 1m 2s
300:	test: 0.7543421	best: 0.7543421 (300)	total: 8.22s	remaining: 1m
400:	test: 0.7550912	best: 0.7551700 (393)	total: 11s	remaining: 57.5s
500:	test: 0.7554735	best: 0.7554947 (478)	total: 13.7s	remaining: 54.6s
600:	test: 0.7553039	best: 0.7556047 (586)	total: 16.4s	remaining: 51.7s
700:	test: 0.7555910	best: 0.7557474 (672)	total: 19.2s	remaining: 49.2s
800:	test: 0.7560585	best: 0.7560597 (796)	total: 21.9s	remaining: 46.5s
900:	test: 0.7561322	best: 0.7562135 (849)	total: 24.7s	remaining: 43.9s
1000:	test: 0.7565295	best: 0.7566628 (990)	total: 27.5s	remaining: 41.2s
1100:	test: 0.7566333	best: 0.7566956 (1087)	total: 30.3s	remaining: 38.6s
1200:	test: 0.7566116	best: 0.7567443 (1160)	total: 33.3s	remaining: 36.1s
1300:	test: 0.7564768	best: 0.7567443 (1160)	total: 3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.6372829	best: 0.6372829 (0)	total: 28ms	remaining: 1m 9s
100:	test: 0.7480826	best: 0.7481413 (97)	total: 2.75s	remaining: 1m 5s
200:	test: 0.7538459	best: 0.7538677 (199)	total: 5.51s	remaining: 1m 3s
300:	test: 0.7560039	best: 0.7560073 (299)	total: 8.23s	remaining: 1m
400:	test: 0.7565473	best: 0.7566472 (397)	total: 10.9s	remaining: 57.2s
500:	test: 0.7576328	best: 0.7577152 (497)	total: 13.7s	remaining: 54.8s
600:	test: 0.7579629	best: 0.7580968 (582)	total: 16.4s	remaining: 51.9s
700:	test: 0.7580413	best: 0.7580968 (582)	total: 19.1s	remaining: 49s
800:	test: 0.7585921	best: 0.7586914 (796)	total: 21.8s	remaining: 46.1s
900:	test: 0.7590770	best: 0.7591980 (864)	total: 24.5s	remaining: 43.5s
1000:	test: 0.7587047	best: 0.7592618 (920)	total: 27.2s	remaining: 40.7s
1100:	test: 0.7588639	best: 0.7592618 (920)	total: 29.8s	remaining: 37.8s
1200:	test: 0.7590678	best: 0.7592618 (920)	total: 32.4s	remaining: 35s
1300:	test: 0.7590432	best: 0.7592618 (920)	total: 35s	remain

In [None]:
predict = np.zeros(len(test))
for i in range(len(models)):
    predict += models[i].predict_proba(test[good_cols])[:, 1]
real_pred = {'user_id': test.user_id.values, 'pred_agg': predict}

In [None]:
metadata = pd.concat([metadata, test_pred, real_pred])
metadata['pred_time'] = metadata['pred_time']/5
metadata.to_csv('../predict/time_pred_meta.csv', index=False)