In [1]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
import numpy as np
import gc

In [2]:
dtypes = {
        'app'           : 'uint16',
        'device'        : 'uint16',
        'os'            : 'uint16',
        'channel'       : 'uint16',
        'hour'          : 'uint8',
        'is_attributed' : 'uint8', 
        'ip_day_hour_count': 'uint32', 
        'ip_os_day_hour_count': 'uint32', 
        'ip_app_day_hour_count': 'uint32', 
        'ip_app_os_day_hour_count': 'uint32', 
        'app_day_hour_count': 'uint32', 
        'ip_device_os_count': 'uint32', 
        'ip_app_device_os_count': 'uint32', 
        'ip_device_os_mean': 'float16',
        'ip_app_device_os_mean': 'float16',
        'ip_app_device_mean': 'float16',
        'app_device_os_mean': 'float16',
        'ip_device_os_time2nextclick': 'int32',
        'ip_app_device_os_time2nextclick': 'int32',
        'ip_app_device_time2nextclick': 'int32',
        'ip_device_os_time2previousclick': 'int32',
        'ip_app_device_os_time2previousclick': 'int32',
        'ip_app_device_time2previousclick': 'int32',
        'ip_device_os_countfromfuture': 'uint32', 
        'ip_app_device_os_countfromfuture': 'uint32', 
        'ip_app_device_countfromfuture': 'uint32', 
        'ip_device_os_countfrompast': 'uint32', 
        'ip_app_device_os_countfrompast': 'uint32', 
        'ip_app_device_countfrompast': 'uint32', 
        'ip_device_os_lasttimediff': 'int32',
        'ip_app_device_os_lasttimediff': 'int32',
        'ip_app_device_lasttimediff': 'int32',
        'ip_device_os_firsttimediff': 'int32',
        'ip_app_device_os_firsttimediff': 'int32',
        'ip_app_device_firsttimediff': 'int32',
        'matrixFact_user_iposdeviceapp_item_app': 'float16',
        'matrixFact_user_ip_item_appdeviceos': 'float16',
        'matrixFact_user_ipchannel_item_appdeviceos': 'float16',
        'ip_device_os_regression': 'float16',
        'ip_app_device_os_regression': 'float16',
        'ip_app_device_regression': 'float16',
        'ip_app_device_os_channel_regression': 'float16', 
        'attributed_timediffmax':'int32',
        'attributed_timediffmin':'int32',
        'attributed_timediff':'float16',
        'matrixFact_user_ipappdeviceos_item_channel': 'float16'
    
        } 

# Load Data

In [3]:
load_path = '/home/kai/data/kaggle/talkingdata/wl/data/equalhour/'
file_format = '{}_features_equalhour_supplementV3_feature42.csv'
day_list = ['day7', 'day8', 'day9']
df_dict = {}
for file in ['day7', 'day8', 'day9', 'test']: 
    df_dict[file] = pd.read_csv(load_path+file_format.format(file),dtype=dtypes)
    print(file_format.format(file))


print(file_format.format('test_equalhour'))
    


day7_features_equalhour_supplementV3_feature42.csv
day8_features_equalhour_supplementV3_feature42.csv
day9_features_equalhour_supplementV3_feature42.csv
test_features_equalhour_supplementV3_feature42.csv
test_equalhour_features_equalhour_supplementV3_feature42.csv


# Define ROC eval Metrix

In [4]:
from sklearn.metrics import roc_auc_score

class ROCMetric(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return True

    def evaluate(self, approxes, target, weight):
        approx = approxes[0]
        try:
            target_arr = np.array(target)
        except Exception:
            print('building target array error')
            print(len(target))
            target_arr = np.array([0])
        try:
            approx_arr = np.array(approx)
        except Exception:
            print('building aaprox array error')
            approx_arr = np.array([0])
        try:
            roc = roc_auc_score(target_arr, approx_arr)
        except Exception:
            roc = 0
            print('evalution metrix, roc calculation error. now give roc = 0')
            
        if weight is None:
            w = 1
        else:
            try:
                w = np.sum(weight)
            except Exception:
                w = 1
                print('evaluation metrix, w error, now give w=1')
        
        return roc, w

In [9]:
target = 'is_attributed'

day_list = ['day7', 'day8', 'day9']

combine = 0
params = {
#         'eval_metric': ROCMetric(), 
    'eval_metric': 'Logloss',
         'learning_rate':0.1, 
         'od_type':'Iter',
         'od_wait':40,
         'loss_function':'Logloss', 
         'depth':6, 
         'thread_count':18, 
         'iterations':5000,
         'scale_pos_weight': 398,
        'l2_leaf_reg': 6,
    'leaf_estimation_method': 'Gradient',
        }

paramsgpu = {'task_type':'GPU',
#              'pinned_memory_size': int(30 * (1024 **3)),
             'gpu_cat_features_storage':'CpuPinnedMemory',
#              'gpu_ram_part':0.95,
             'max_ctr_complexity':2,
         'learning_rate':0.1, 
         'od_type':'Iter',
         'od_wait':70,
         'loss_function':'Logloss', 
         'depth':6, 
         'thread_count':8, 
         'iterations':5000,
         'scale_pos_weight': 398,
        }

categorical_col = [ 'app', 'device', 'os', 'channel', 'hour']
feature_cols = list(df_dict['test'].columns.values)

category_index = [feature_cols.index(cat) for cat in categorical_col]

In [10]:
def train_catboost(x_train, x_val, feature_cols, category_index, params, best_round = None, target='is_attributed'):
    param = params.copy()
#     y_train = x_train[target].values
#     y_val = x_val[target].values
    print('Building pool...')
    train_pool = Pool(x_train[feature_cols], x_train[target], cat_features=category_index)
    print('train pool done!')
    val_pool = Pool(x_val[feature_cols], x_val[target], cat_features=category_index)
    print('val pool done!')
    if best_round is not None:
        param['iterations'] = best_round
        del param['od_type']
        del param['od_wait']
    print('start training')
    
    print('Get train pool and val pool')
    model = CatBoostClassifier(**param)
    model.fit(train_pool,  eval_set=val_pool, use_best_model=True, verbose_eval=1 )
    return model

In [11]:
def train_catboost(x_train, x_val, feature_cols, category_index, params, best_round = None, target='is_attributed'):
    param = params.copy()
    if best_round is not None:
        param['iterations'] = best_round
        del param['od_type']
        del param['od_wait']
    
    print('Start training')
    model = CatBoostClassifier(**param)
    model.fit(x_train[feature_cols],x_train[target],  
              eval_set=(x_val[feature_cols], x_val[target]), 
              cat_features=category_index, 
              use_best_model=True, 
              verbose_eval=1 )
    return model

# Train CatBoost

In [12]:
%env JOBLIB_TEMP_FOLDER=/tmp
for day in day_list:
    for val_day in list(set(day_list)-set([day])):
        file_name = 'trainday_{}_valday_{}_equalhour_V3_feature43_depth6'.format(day, val_day)
        npy = file_name+'.npy'
        gz = file_name+'.csv.gz'
        print(file_name)
        model = train_catboost(df_dict[day], df_dict[val_day], feature_cols, category_index, params)
        print('training done! start predicting...')
        pred = model.predict_proba(df_dict['test'][feature_cols])[:,1]
        
        np.save(load_path+'catboost/'+npy, pred)
        
    
        # prediction
        df_test_raw = pd.read_csv('/home/kai/data/kaggle/talkingdata/data/test.csv')
        print('loading file done!')
        df_sub = pd.DataFrame()
        df_sub['click_id'] = df_test_raw['click_id']
        df_sub['is_attributed'] = pred
        print('predicting file done!')
        df_sub.to_csv('/home/kai/data/kaggle/talkingdata/wl/data/submission/catboost/'+gz, compression='gzip', index=False)

    

env: JOBLIB_TEMP_FOLDER=/tmp
trainday_day7_valday_day8_equalhour_V3_feature43_depth6
Start training
0:	learn: 0.6315266	test: 0.6312712	best: 0.6312712 (0)	total: 9.74s	remaining: 13h 31m 14s
1:	learn: 0.5786272	test: 0.5777827	best: 0.5777827 (1)	total: 18.2s	remaining: 12h 39m 53s
2:	learn: 0.5327548	test: 0.5315573	best: 0.5315573 (2)	total: 27.8s	remaining: 12h 52m 9s
3:	learn: 0.4927743	test: 0.4911776	best: 0.4911776 (3)	total: 37.1s	remaining: 12h 51m 22s
4:	learn: 0.4574762	test: 0.4556387	best: 0.4556387 (4)	total: 45.4s	remaining: 12h 36m 18s
5:	learn: 0.4264528	test: 0.4242766	best: 0.4242766 (5)	total: 51.4s	remaining: 11h 53m 11s
6:	learn: 0.3996715	test: 0.3975794	best: 0.3975794 (6)	total: 1m	remaining: 12h 3m 49s
7:	learn: 0.3764484	test: 0.3740254	best: 0.3740254 (7)	total: 1m 9s	remaining: 12h 5m 20s
8:	learn: 0.3564439	test: 0.3539365	best: 0.3539365 (8)	total: 1m 19s	remaining: 12h 11m 36s
9:	learn: 0.3385374	test: 0.3361261	best: 0.3361261 (9)	total: 1m 28s	remaini

86:	learn: 0.1637427	test: 0.1613517	best: 0.1613517 (86)	total: 13m 3s	remaining: 12h 17m 40s
87:	learn: 0.1635490	test: 0.1612056	best: 0.1612056 (87)	total: 13m 13s	remaining: 12h 18m 2s
88:	learn: 0.1632738	test: 0.1610184	best: 0.1610184 (88)	total: 13m 22s	remaining: 12h 18m 15s
89:	learn: 0.1630779	test: 0.1608471	best: 0.1608471 (89)	total: 13m 31s	remaining: 12h 18m 3s
90:	learn: 0.1629121	test: 0.1606882	best: 0.1606882 (90)	total: 13m 40s	remaining: 12h 18m 1s
91:	learn: 0.1627733	test: 0.1605668	best: 0.1605668 (91)	total: 13m 50s	remaining: 12h 18m 33s
92:	learn: 0.1624829	test: 0.1603497	best: 0.1603497 (92)	total: 13m 59s	remaining: 12h 18m 26s
93:	learn: 0.1622652	test: 0.1601390	best: 0.1601390 (93)	total: 14m 8s	remaining: 12h 18m 2s
94:	learn: 0.1620801	test: 0.1599910	best: 0.1599910 (94)	total: 14m 17s	remaining: 12h 17m 48s
95:	learn: 0.1618416	test: 0.1598090	best: 0.1598090 (95)	total: 14m 26s	remaining: 12h 17m 56s
96:	learn: 0.1616209	test: 0.1596023	best: 0.1

171:	learn: 0.1520928	test: 0.1525472	best: 0.1525472 (171)	total: 26m 9s	remaining: 12h 14m 19s
172:	learn: 0.1520203	test: 0.1524841	best: 0.1524841 (172)	total: 26m 19s	remaining: 12h 14m 26s
173:	learn: 0.1519554	test: 0.1524368	best: 0.1524368 (173)	total: 26m 28s	remaining: 12h 14m 9s
174:	learn: 0.1518892	test: 0.1524015	best: 0.1524015 (174)	total: 26m 37s	remaining: 12h 14m 12s
175:	learn: 0.1517253	test: 0.1523021	best: 0.1523021 (175)	total: 26m 47s	remaining: 12h 14m 8s
176:	learn: 0.1516509	test: 0.1522433	best: 0.1522433 (176)	total: 26m 57s	remaining: 12h 14m 25s
177:	learn: 0.1516055	test: 0.1522172	best: 0.1522172 (177)	total: 27m 7s	remaining: 12h 14m 42s
178:	learn: 0.1514926	test: 0.1521309	best: 0.1521309 (178)	total: 27m 15s	remaining: 12h 14m 17s
179:	learn: 0.1514536	test: 0.1520942	best: 0.1520942 (179)	total: 27m 24s	remaining: 12h 14m 5s
180:	learn: 0.1513949	test: 0.1520708	best: 0.1520708 (180)	total: 27m 33s	remaining: 12h 13m 52s
181:	learn: 0.1513126	tes

256:	learn: 0.1473863	test: 0.1499325	best: 0.1499325 (256)	total: 39m 15s	remaining: 12h 4m 28s
257:	learn: 0.1473452	test: 0.1499096	best: 0.1499096 (257)	total: 39m 24s	remaining: 12h 4m 19s
258:	learn: 0.1473015	test: 0.1498940	best: 0.1498940 (258)	total: 39m 33s	remaining: 12h 4m 5s
259:	learn: 0.1472639	test: 0.1498742	best: 0.1498742 (259)	total: 39m 43s	remaining: 12h 4m 7s
260:	learn: 0.1472219	test: 0.1498458	best: 0.1498458 (260)	total: 39m 52s	remaining: 12h 3m 56s
261:	learn: 0.1471994	test: 0.1498451	best: 0.1498451 (261)	total: 40m 1s	remaining: 12h 3m 39s
262:	learn: 0.1471767	test: 0.1498308	best: 0.1498308 (262)	total: 40m 10s	remaining: 12h 3m 29s
263:	learn: 0.1471143	test: 0.1497868	best: 0.1497868 (263)	total: 40m 19s	remaining: 12h 3m 24s
264:	learn: 0.1470703	test: 0.1497517	best: 0.1497517 (264)	total: 40m 29s	remaining: 12h 3m 22s
265:	learn: 0.1470341	test: 0.1497374	best: 0.1497374 (265)	total: 40m 38s	remaining: 12h 3m 19s
266:	learn: 0.1469712	test: 0.149

341:	learn: 0.1444832	test: 0.1487269	best: 0.1487269 (341)	total: 52m 10s	remaining: 11h 50m 40s
342:	learn: 0.1444604	test: 0.1487190	best: 0.1487190 (342)	total: 52m 20s	remaining: 11h 50m 34s
343:	learn: 0.1444442	test: 0.1487165	best: 0.1487165 (343)	total: 52m 29s	remaining: 11h 50m 34s
344:	learn: 0.1444186	test: 0.1486973	best: 0.1486973 (344)	total: 52m 39s	remaining: 11h 50m 23s
345:	learn: 0.1444038	test: 0.1486941	best: 0.1486941 (345)	total: 52m 47s	remaining: 11h 50m 12s
346:	learn: 0.1443729	test: 0.1486817	best: 0.1486817 (346)	total: 52m 56s	remaining: 11h 49m 59s
347:	learn: 0.1443466	test: 0.1486735	best: 0.1486735 (347)	total: 53m 6s	remaining: 11h 49m 54s
348:	learn: 0.1443267	test: 0.1486565	best: 0.1486565 (348)	total: 53m 14s	remaining: 11h 49m 30s
349:	learn: 0.1443096	test: 0.1486511	best: 0.1486511 (349)	total: 53m 23s	remaining: 11h 49m 25s
350:	learn: 0.1442893	test: 0.1486422	best: 0.1486422 (350)	total: 53m 32s	remaining: 11h 49m 11s
351:	learn: 0.1442746

425:	learn: 0.1422286	test: 0.1479617	best: 0.1479617 (425)	total: 1h 4m 59s	remaining: 11h 37m 54s
426:	learn: 0.1422123	test: 0.1479597	best: 0.1479597 (426)	total: 1h 5m 8s	remaining: 11h 37m 42s
427:	learn: 0.1421884	test: 0.1479627	best: 0.1479597 (426)	total: 1h 5m 18s	remaining: 11h 37m 33s
428:	learn: 0.1421636	test: 0.1479574	best: 0.1479574 (428)	total: 1h 5m 27s	remaining: 11h 37m 26s
429:	learn: 0.1421374	test: 0.1479491	best: 0.1479491 (429)	total: 1h 5m 37s	remaining: 11h 37m 24s
430:	learn: 0.1421219	test: 0.1479395	best: 0.1479395 (430)	total: 1h 5m 45s	remaining: 11h 37m 10s
431:	learn: 0.1420800	test: 0.1479383	best: 0.1479383 (431)	total: 1h 5m 54s	remaining: 11h 36m 58s
432:	learn: 0.1420493	test: 0.1479390	best: 0.1479383 (431)	total: 1h 6m 4s	remaining: 11h 36m 50s
433:	learn: 0.1420315	test: 0.1479362	best: 0.1479362 (433)	total: 1h 6m 13s	remaining: 11h 36m 47s
434:	learn: 0.1420063	test: 0.1479374	best: 0.1479362 (433)	total: 1h 6m 22s	remaining: 11h 36m 35s
43

507:	learn: 0.1401863	test: 0.1475525	best: 0.1475525 (507)	total: 1h 17m 32s	remaining: 11h 25m 41s
508:	learn: 0.1401720	test: 0.1475517	best: 0.1475517 (508)	total: 1h 17m 42s	remaining: 11h 25m 37s
509:	learn: 0.1401405	test: 0.1475388	best: 0.1475388 (509)	total: 1h 17m 52s	remaining: 11h 25m 33s
510:	learn: 0.1401142	test: 0.1475437	best: 0.1475388 (509)	total: 1h 18m	remaining: 11h 25m 21s
511:	learn: 0.1400960	test: 0.1475435	best: 0.1475388 (509)	total: 1h 18m 10s	remaining: 11h 25m 13s
512:	learn: 0.1400777	test: 0.1475360	best: 0.1475360 (512)	total: 1h 18m 19s	remaining: 11h 25m 8s
513:	learn: 0.1400589	test: 0.1475344	best: 0.1475344 (513)	total: 1h 18m 28s	remaining: 11h 24m 58s
514:	learn: 0.1400363	test: 0.1475274	best: 0.1475274 (514)	total: 1h 18m 37s	remaining: 11h 24m 46s
515:	learn: 0.1400239	test: 0.1475281	best: 0.1475274 (514)	total: 1h 18m 47s	remaining: 11h 24m 44s
516:	learn: 0.1399942	test: 0.1475177	best: 0.1475177 (516)	total: 1h 18m 56s	remaining: 11h 24m

589:	learn: 0.1383088	test: 0.1471551	best: 0.1471535 (587)	total: 1h 30m 7s	remaining: 11h 13m 42s
590:	learn: 0.1382972	test: 0.1471496	best: 0.1471496 (590)	total: 1h 30m 17s	remaining: 11h 13m 32s
591:	learn: 0.1382723	test: 0.1471417	best: 0.1471417 (591)	total: 1h 30m 26s	remaining: 11h 13m 27s
592:	learn: 0.1382555	test: 0.1471364	best: 0.1471364 (592)	total: 1h 30m 36s	remaining: 11h 13m 19s
593:	learn: 0.1382378	test: 0.1471285	best: 0.1471285 (593)	total: 1h 30m 45s	remaining: 11h 13m 9s
594:	learn: 0.1382094	test: 0.1471268	best: 0.1471268 (594)	total: 1h 30m 54s	remaining: 11h 13m
595:	learn: 0.1381971	test: 0.1471187	best: 0.1471187 (595)	total: 1h 31m 4s	remaining: 11h 12m 55s
596:	learn: 0.1381791	test: 0.1471147	best: 0.1471147 (596)	total: 1h 31m 13s	remaining: 11h 12m 51s
597:	learn: 0.1381565	test: 0.1471205	best: 0.1471147 (596)	total: 1h 31m 23s	remaining: 11h 12m 41s
598:	learn: 0.1381429	test: 0.1471205	best: 0.1471147 (596)	total: 1h 31m 32s	remaining: 11h 12m 3

672:	learn: 0.1367976	test: 0.1469270	best: 0.1469218 (670)	total: 1h 42m 59s	remaining: 11h 2m 8s
673:	learn: 0.1367861	test: 0.1469295	best: 0.1469218 (670)	total: 1h 43m 7s	remaining: 11h 1m 56s
674:	learn: 0.1367663	test: 0.1469347	best: 0.1469218 (670)	total: 1h 43m 17s	remaining: 11h 1m 48s
675:	learn: 0.1367571	test: 0.1469339	best: 0.1469218 (670)	total: 1h 43m 26s	remaining: 11h 1m 37s
676:	learn: 0.1367423	test: 0.1469390	best: 0.1469218 (670)	total: 1h 43m 35s	remaining: 11h 1m 31s
677:	learn: 0.1367279	test: 0.1469357	best: 0.1469218 (670)	total: 1h 43m 44s	remaining: 11h 1m 19s
678:	learn: 0.1367100	test: 0.1469302	best: 0.1469218 (670)	total: 1h 43m 53s	remaining: 11h 1m 9s
679:	learn: 0.1366808	test: 0.1469177	best: 0.1469177 (679)	total: 1h 44m 1s	remaining: 11h 51s
680:	learn: 0.1366653	test: 0.1469205	best: 0.1469177 (679)	total: 1h 44m 11s	remaining: 11h 47s
681:	learn: 0.1366482	test: 0.1469219	best: 0.1469177 (679)	total: 1h 44m 20s	remaining: 11h 37s
682:	learn: 0

754:	learn: 0.1353165	test: 0.1468230	best: 0.1468223 (752)	total: 1h 55m 39s	remaining: 10h 50m 20s
755:	learn: 0.1353044	test: 0.1468221	best: 0.1468221 (755)	total: 1h 55m 49s	remaining: 10h 50m 12s
756:	learn: 0.1352742	test: 0.1468177	best: 0.1468177 (756)	total: 1h 55m 58s	remaining: 10h 50m 3s
757:	learn: 0.1352645	test: 0.1468144	best: 0.1468144 (757)	total: 1h 56m 7s	remaining: 10h 49m 54s
758:	learn: 0.1352504	test: 0.1468126	best: 0.1468126 (758)	total: 1h 56m 17s	remaining: 10h 49m 47s
759:	learn: 0.1352321	test: 0.1468162	best: 0.1468126 (758)	total: 1h 56m 26s	remaining: 10h 49m 38s
760:	learn: 0.1352127	test: 0.1468100	best: 0.1468100 (760)	total: 1h 56m 35s	remaining: 10h 49m 29s
761:	learn: 0.1351978	test: 0.1468096	best: 0.1468096 (761)	total: 1h 56m 45s	remaining: 10h 49m 21s
762:	learn: 0.1351801	test: 0.1468062	best: 0.1468062 (762)	total: 1h 56m 55s	remaining: 10h 49m 15s
763:	learn: 0.1351622	test: 0.1468040	best: 0.1468040 (763)	total: 1h 57m 4s	remaining: 10h 4

837:	learn: 0.1337640	test: 0.1465912	best: 0.1465912 (837)	total: 2h 8m 28s	remaining: 10h 38m 2s
838:	learn: 0.1337470	test: 0.1465864	best: 0.1465864 (838)	total: 2h 8m 37s	remaining: 10h 37m 53s
839:	learn: 0.1337209	test: 0.1465834	best: 0.1465834 (839)	total: 2h 8m 45s	remaining: 10h 37m 41s
840:	learn: 0.1337019	test: 0.1465838	best: 0.1465834 (839)	total: 2h 8m 54s	remaining: 10h 37m 31s
841:	learn: 0.1336915	test: 0.1465846	best: 0.1465834 (839)	total: 2h 9m 4s	remaining: 10h 37m 23s
842:	learn: 0.1336730	test: 0.1465916	best: 0.1465834 (839)	total: 2h 9m 14s	remaining: 10h 37m 16s
843:	learn: 0.1336575	test: 0.1465889	best: 0.1465834 (839)	total: 2h 9m 23s	remaining: 10h 37m 6s
844:	learn: 0.1336344	test: 0.1465849	best: 0.1465834 (839)	total: 2h 9m 32s	remaining: 10h 36m 59s
845:	learn: 0.1336169	test: 0.1465833	best: 0.1465833 (845)	total: 2h 9m 41s	remaining: 10h 36m 49s
846:	learn: 0.1336004	test: 0.1465827	best: 0.1465827 (846)	total: 2h 9m 51s	remaining: 10h 36m 40s
847

919:	learn: 0.1324057	test: 0.1465425	best: 0.1465186 (893)	total: 2h 21m 4s	remaining: 10h 25m 36s
920:	learn: 0.1323903	test: 0.1465484	best: 0.1465186 (893)	total: 2h 21m 12s	remaining: 10h 25m 25s
921:	learn: 0.1323684	test: 0.1465493	best: 0.1465186 (893)	total: 2h 21m 21s	remaining: 10h 25m 15s
922:	learn: 0.1323444	test: 0.1465430	best: 0.1465186 (893)	total: 2h 21m 31s	remaining: 10h 25m 7s
923:	learn: 0.1323303	test: 0.1465453	best: 0.1465186 (893)	total: 2h 21m 42s	remaining: 10h 25m 4s
924:	learn: 0.1323207	test: 0.1465413	best: 0.1465186 (893)	total: 2h 21m 51s	remaining: 10h 24m 56s
925:	learn: 0.1323076	test: 0.1465407	best: 0.1465186 (893)	total: 2h 22m	remaining: 10h 24m 47s
926:	learn: 0.1322942	test: 0.1465419	best: 0.1465186 (893)	total: 2h 22m 10s	remaining: 10h 24m 40s
927:	learn: 0.1322792	test: 0.1465409	best: 0.1465186 (893)	total: 2h 22m 19s	remaining: 10h 24m 32s
928:	learn: 0.1322648	test: 0.1465380	best: 0.1465186 (893)	total: 2h 22m 29s	remaining: 10h 24m 2

67:	learn: 0.1685738	test: 0.1886775	best: 0.1886775 (67)	total: 10m 11s	remaining: 12h 19m 44s
68:	learn: 0.1681564	test: 0.1882059	best: 0.1882059 (68)	total: 10m 21s	remaining: 12h 20m 38s
69:	learn: 0.1677898	test: 0.1878349	best: 0.1878349 (69)	total: 10m 31s	remaining: 12h 21m 35s
70:	learn: 0.1673765	test: 0.1874148	best: 0.1874148 (70)	total: 10m 40s	remaining: 12h 20m 50s
71:	learn: 0.1671023	test: 0.1872425	best: 0.1872425 (71)	total: 10m 50s	remaining: 12h 21m 55s
72:	learn: 0.1668487	test: 0.1870047	best: 0.1870047 (72)	total: 10m 59s	remaining: 12h 22m 7s
73:	learn: 0.1665515	test: 0.1867483	best: 0.1867483 (73)	total: 11m 8s	remaining: 12h 21m 28s
74:	learn: 0.1663193	test: 0.1865954	best: 0.1865954 (74)	total: 11m 18s	remaining: 12h 22m 3s
75:	learn: 0.1660371	test: 0.1863473	best: 0.1863473 (75)	total: 11m 27s	remaining: 12h 22m 27s
76:	learn: 0.1656487	test: 0.1859030	best: 0.1859030 (76)	total: 11m 36s	remaining: 12h 22m 10s
77:	learn: 0.1652880	test: 0.1857730	best: 

KeyboardInterrupt: 

In [1]:
model

NameError: name 'model' is not defined

# Get Total

In [None]:
preds_list = []
gz = 'blend_equalhour_V3_feature43_depth6.csv.gz'
for day in day_list:
    for val_day in list(set(day_list)-set([day])):
        file_name = 'trainday_{}_valday_{}_equalhour_V3_feature43_depth6'.format(day, val_day)
        npy = file_name+'.npy'
        pred = np.load(load_path+'catboost/'+npy)
        preds_list.append(pred)
        
        
pred = np.mean(preds_list, axis=0)
print('get pred done!')

# prediction
df_test_raw = pd.read_csv('/home/kai/data/kaggle/talkingdata/data/test.csv')
print('loading file done!')
df_sub = pd.DataFrame()
df_sub['click_id'] = df_test_raw['click_id']
df_sub['is_attributed'] = pred
print('predicting file done!')
df_sub.to_csv('/home/kai/data/kaggle/talkingdata/wl/data/submission/catboost/'+gz, compression='gzip', index=False)
