# Restaurant Visitor Forecasting by GooseLearning

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
from sklearn import metrics
from sklearn.model_selection import train_test_split
import catboost

pd.options.display.max_columns = 1000

%matplotlib inline

In [2]:
data = {
    'stores':  pd.read_csv('result/stores.csv'),
    'reserve': pd.read_csv('result/reserve.csv'),
    'dates':   pd.read_csv('result/dates.csv'),

    'train': pd.read_csv('dataset/air_visit_data.csv'),
    'test':  pd.read_csv('dataset/sample_submission.csv'),
}

## Обработка данных

### Обрабатываем `train` и `test`

Приводим к единому формату

In [3]:
data['train']['id'] = data['train']['air_store_id'] + '_' + data['train']['visit_date']

data['test']['air_store_id'] = [id[:20] for id in data['test']['id']]
data['test']['visit_date']   = [id[21:] for id in data['test']['id']]

Добавляем дополнительные колонки даты

In [4]:
data['train'] = pd.merge(data['train'], data['dates'], how='left', on=['visit_date'])
data['test']  = pd.merge(data['test'],  data['dates'], how='left', on=['visit_date'])

In [5]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 12)
test  (32019, 12)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118


### Обрабатываем `reserve`

Добавляем `reserve` в `train` / `test` 

In [6]:
for ds in ['train', 'test']:
    data[ds] = pd.merge(data[ds], data['reserve'], how='left', on=['air_store_id', 'visit_date'])

In [7]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 14)
test  (32019, 14)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int,reserve_visitors,reserve_visitors_competitor
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113,0,432.0
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114,0,376.0
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115,0,598.0
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116,0,696.0
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118,0,292.0


### Обрабатываем `stores`

Добавляем информацию о посещениях

In [8]:
for op_type in ['sum', 'mean', 'median', 'min', 'max', 'count']:
    tmp = getattr(
        data['train']
            .groupby(['air_store_id', 'visit_dow'], as_index=False)
            [['visitors']],
        op_type,
    )().rename(columns={
        'visitors': 'visitors_' + op_type,
    })
    
    data['stores'] = pd.merge(data['stores'], tmp, how='left', on=['air_store_id', 'visit_dow'])

In [9]:
print('stores', data['stores'].shape)

data['stores'].head()

stores (5803, 13)


Unnamed: 0,air_store_id,visit_dow,air_genre_name,air_area_name,latitude,longitude,city,visitors_sum,visitors_mean,visitors_median,visitors_min,visitors_max,visitors_count
0,air_0f0cdeee6c9bf3d7,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Osaka,609.0,16.916667,15.0,2.0,39.0,36.0
1,air_7cc17a324ae5c7dc,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Osaka,219.0,9.125,7.0,1.0,34.0,24.0
2,air_fee8dcf4d619598e,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Osaka,813.0,20.325,18.5,7.0,39.0,40.0
3,air_a17f0778617c76e2,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Osaka,158.0,26.333333,27.5,19.0,31.0,6.0
4,air_83db5aff8f50478e,0,Italian/French,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,202.0,6.516129,6.0,1.0,13.0,31.0


Добавляем `stores` в `train` / `test`

In [10]:
for ds in ['train', 'test']:
    data[ds] = pd.merge(data[ds], data['stores'], how='left', on=['air_store_id', 'visit_dow'])

In [11]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 25)
test  (32019, 25)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int,reserve_visitors,reserve_visitors_competitor,air_genre_name,air_area_name,latitude,longitude,city,visitors_sum,visitors_mean,visitors_median,visitors_min,visitors_max,visitors_count
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113,0,432.0,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1526.0,23.84375,25.0,7.0,57.0,64.0
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114,0,376.0,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1319.0,20.292308,21.0,2.0,54.0,65.0
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115,0,598.0,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,2258.0,34.738462,35.0,4.0,61.0,65.0
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116,0,696.0,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1825.0,27.651515,27.0,6.0,53.0,66.0
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118,0,292.0,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,784.0,13.754386,12.0,2.0,34.0,57.0


## Обучение

In [12]:
columns = pd.Series([col for col in data['train'] if col not in ['id', 'visit_date', 'visitors']])
cat_features = columns[columns.isin([
    'air_store_id',
    'visit_holiday',
    'visit_day',
    'visit_month',
    'visit_year',
    'visit_dow',
    'visit_work',
    'visit_weekend',
    'air_genre_name',
    'air_area_name',
    'city',
])].index

In [13]:
X_all = data['train'][columns]
y_all = np.log1p(data['train']['visitors'])

stratify = np.clip(data['train']['visitors'], a_min=0, a_max=100)

X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all,
    test_size=0.2, 
    random_state=1234,
    stratify=stratify,
)

train_pool = catboost.Pool(
    X_train, y_train,
    cat_features=cat_features.tolist(),
)

test_pool = catboost.Pool(
    X_test, y_test,
    cat_features=cat_features.tolist(),
)

In [19]:
model = catboost.CatBoostRegressor(
     iterations=2000,
)

model.fit(
    train_pool,
    eval_set=test_pool,
)

0:	learn: 2.8324260	test: 2.8321754	best: 2.8321754 (0)	total: 135ms	remaining: 4m 30s
1:	learn: 2.7505718	test: 2.7503011	best: 2.7503011 (1)	total: 285ms	remaining: 4m 44s
2:	learn: 2.6712519	test: 2.6709378	best: 2.6709378 (2)	total: 440ms	remaining: 4m 52s
3:	learn: 2.5944026	test: 2.5940389	best: 2.5940389 (3)	total: 589ms	remaining: 4m 54s
4:	learn: 2.5199426	test: 2.5195186	best: 2.5195186 (4)	total: 707ms	remaining: 4m 41s
5:	learn: 2.4477888	test: 2.4473280	best: 2.4473280 (5)	total: 830ms	remaining: 4m 35s
6:	learn: 2.3779009	test: 2.3773700	best: 2.3773700 (6)	total: 1.01s	remaining: 4m 48s
7:	learn: 2.3101854	test: 2.3096167	best: 2.3096167 (7)	total: 1.17s	remaining: 4m 51s
8:	learn: 2.2446003	test: 2.2439774	best: 2.2439774 (8)	total: 1.3s	remaining: 4m 48s
9:	learn: 2.1811005	test: 2.1804145	best: 2.1804145 (9)	total: 1.43s	remaining: 4m 44s
10:	learn: 2.1195961	test: 2.1188816	best: 2.1188816 (10)	total: 1.57s	remaining: 4m 44s
11:	learn: 2.0600395	test: 2.0592779	best:

94:	learn: 0.5356013	test: 0.5341112	best: 0.5341112 (94)	total: 13.1s	remaining: 4m 23s
95:	learn: 0.5340856	test: 0.5326188	best: 0.5326188 (95)	total: 13.3s	remaining: 4m 23s
96:	learn: 0.5326450	test: 0.5312011	best: 0.5312011 (96)	total: 13.4s	remaining: 4m 23s
97:	learn: 0.5313008	test: 0.5298689	best: 0.5298689 (97)	total: 13.6s	remaining: 4m 23s
98:	learn: 0.5300420	test: 0.5286316	best: 0.5286316 (98)	total: 13.7s	remaining: 4m 23s
99:	learn: 0.5288307	test: 0.5274493	best: 0.5274493 (99)	total: 13.9s	remaining: 4m 24s
100:	learn: 0.5276901	test: 0.5263188	best: 0.5263188 (100)	total: 14s	remaining: 4m 23s
101:	learn: 0.5266057	test: 0.5252449	best: 0.5252449 (101)	total: 14.2s	remaining: 4m 23s
102:	learn: 0.5255782	test: 0.5242332	best: 0.5242332 (102)	total: 14.3s	remaining: 4m 23s
103:	learn: 0.5245976	test: 0.5232818	best: 0.5232818 (103)	total: 14.5s	remaining: 4m 23s
104:	learn: 0.5236656	test: 0.5223640	best: 0.5223640 (104)	total: 14.6s	remaining: 4m 23s
105:	learn: 0

186:	learn: 0.5049425	test: 0.5044167	best: 0.5044167 (186)	total: 27.5s	remaining: 4m 26s
187:	learn: 0.5048951	test: 0.5043840	best: 0.5043840 (187)	total: 27.6s	remaining: 4m 26s
188:	learn: 0.5048551	test: 0.5043512	best: 0.5043512 (188)	total: 27.8s	remaining: 4m 26s
189:	learn: 0.5048014	test: 0.5042968	best: 0.5042968 (189)	total: 28s	remaining: 4m 26s
190:	learn: 0.5047352	test: 0.5042292	best: 0.5042292 (190)	total: 28.1s	remaining: 4m 26s
191:	learn: 0.5046963	test: 0.5041957	best: 0.5041957 (191)	total: 28.3s	remaining: 4m 26s
192:	learn: 0.5046436	test: 0.5041467	best: 0.5041467 (192)	total: 28.4s	remaining: 4m 26s
193:	learn: 0.5046205	test: 0.5041310	best: 0.5041310 (193)	total: 28.6s	remaining: 4m 26s
194:	learn: 0.5045023	test: 0.5040433	best: 0.5040433 (194)	total: 28.7s	remaining: 4m 25s
195:	learn: 0.5044469	test: 0.5039925	best: 0.5039925 (195)	total: 28.9s	remaining: 4m 26s
196:	learn: 0.5043554	test: 0.5039023	best: 0.5039023 (196)	total: 29.1s	remaining: 4m 26s
1

278:	learn: 0.4995488	test: 0.4993976	best: 0.4993976 (278)	total: 42.5s	remaining: 4m 22s
279:	learn: 0.4995264	test: 0.4993797	best: 0.4993797 (279)	total: 42.7s	remaining: 4m 22s
280:	learn: 0.4995070	test: 0.4993635	best: 0.4993635 (280)	total: 42.9s	remaining: 4m 22s
281:	learn: 0.4994844	test: 0.4993531	best: 0.4993531 (281)	total: 43.1s	remaining: 4m 22s
282:	learn: 0.4994657	test: 0.4993316	best: 0.4993316 (282)	total: 43.2s	remaining: 4m 22s
283:	learn: 0.4994249	test: 0.4993029	best: 0.4993029 (283)	total: 43.4s	remaining: 4m 22s
284:	learn: 0.4993722	test: 0.4992575	best: 0.4992575 (284)	total: 43.6s	remaining: 4m 22s
285:	learn: 0.4993608	test: 0.4992537	best: 0.4992537 (285)	total: 43.8s	remaining: 4m 22s
286:	learn: 0.4993394	test: 0.4992369	best: 0.4992369 (286)	total: 44s	remaining: 4m 22s
287:	learn: 0.4993247	test: 0.4992272	best: 0.4992272 (287)	total: 44.2s	remaining: 4m 22s
288:	learn: 0.4992621	test: 0.4991878	best: 0.4991878 (288)	total: 44.4s	remaining: 4m 22s
2

370:	learn: 0.4960161	test: 0.4960002	best: 0.4960002 (370)	total: 57.8s	remaining: 4m 13s
371:	learn: 0.4959856	test: 0.4959796	best: 0.4959796 (371)	total: 57.9s	remaining: 4m 13s
372:	learn: 0.4959733	test: 0.4959738	best: 0.4959738 (372)	total: 58.1s	remaining: 4m 13s
373:	learn: 0.4959555	test: 0.4959644	best: 0.4959644 (373)	total: 58.3s	remaining: 4m 13s
374:	learn: 0.4958856	test: 0.4959021	best: 0.4959021 (374)	total: 58.4s	remaining: 4m 13s
375:	learn: 0.4958479	test: 0.4958542	best: 0.4958542 (375)	total: 58.6s	remaining: 4m 13s
376:	learn: 0.4958180	test: 0.4958262	best: 0.4958262 (376)	total: 58.8s	remaining: 4m 13s
377:	learn: 0.4957964	test: 0.4958071	best: 0.4958071 (377)	total: 59s	remaining: 4m 13s
378:	learn: 0.4957735	test: 0.4957910	best: 0.4957910 (378)	total: 59.1s	remaining: 4m 12s
379:	learn: 0.4957408	test: 0.4957447	best: 0.4957447 (379)	total: 59.3s	remaining: 4m 12s
380:	learn: 0.4957093	test: 0.4957155	best: 0.4957155 (380)	total: 59.4s	remaining: 4m 12s
3

462:	learn: 0.4936134	test: 0.4938359	best: 0.4938359 (462)	total: 1m 13s	remaining: 4m 2s
463:	learn: 0.4935832	test: 0.4937996	best: 0.4937996 (463)	total: 1m 13s	remaining: 4m 2s
464:	learn: 0.4935567	test: 0.4937743	best: 0.4937743 (464)	total: 1m 13s	remaining: 4m 2s
465:	learn: 0.4935440	test: 0.4937643	best: 0.4937643 (465)	total: 1m 13s	remaining: 4m 2s
466:	learn: 0.4935233	test: 0.4937354	best: 0.4937354 (466)	total: 1m 13s	remaining: 4m 2s
467:	learn: 0.4935063	test: 0.4937214	best: 0.4937214 (467)	total: 1m 13s	remaining: 4m 1s
468:	learn: 0.4934629	test: 0.4936698	best: 0.4936698 (468)	total: 1m 14s	remaining: 4m 1s
469:	learn: 0.4934377	test: 0.4936430	best: 0.4936430 (469)	total: 1m 14s	remaining: 4m 1s
470:	learn: 0.4933753	test: 0.4935667	best: 0.4935667 (470)	total: 1m 14s	remaining: 4m 1s
471:	learn: 0.4933571	test: 0.4935523	best: 0.4935523 (471)	total: 1m 14s	remaining: 4m 1s
472:	learn: 0.4933374	test: 0.4935367	best: 0.4935367 (472)	total: 1m 14s	remaining: 4m 1s

552:	learn: 0.4917975	test: 0.4922125	best: 0.4922125 (552)	total: 1m 28s	remaining: 3m 50s
553:	learn: 0.4917895	test: 0.4922045	best: 0.4922045 (553)	total: 1m 28s	remaining: 3m 50s
554:	learn: 0.4917711	test: 0.4921991	best: 0.4921991 (554)	total: 1m 28s	remaining: 3m 50s
555:	learn: 0.4917526	test: 0.4921891	best: 0.4921891 (555)	total: 1m 28s	remaining: 3m 50s
556:	learn: 0.4917150	test: 0.4921590	best: 0.4921590 (556)	total: 1m 28s	remaining: 3m 50s
557:	learn: 0.4917056	test: 0.4921565	best: 0.4921565 (557)	total: 1m 29s	remaining: 3m 50s
558:	learn: 0.4916962	test: 0.4921498	best: 0.4921498 (558)	total: 1m 29s	remaining: 3m 50s
559:	learn: 0.4916913	test: 0.4921444	best: 0.4921444 (559)	total: 1m 29s	remaining: 3m 50s
560:	learn: 0.4916751	test: 0.4921323	best: 0.4921323 (560)	total: 1m 29s	remaining: 3m 49s
561:	learn: 0.4916517	test: 0.4921059	best: 0.4921059 (561)	total: 1m 29s	remaining: 3m 49s
562:	learn: 0.4916358	test: 0.4920945	best: 0.4920945 (562)	total: 1m 29s	remain

643:	learn: 0.4902449	test: 0.4909570	best: 0.4909570 (643)	total: 1m 43s	remaining: 3m 38s
644:	learn: 0.4902403	test: 0.4909576	best: 0.4909570 (643)	total: 1m 43s	remaining: 3m 38s
645:	learn: 0.4902292	test: 0.4909490	best: 0.4909490 (645)	total: 1m 44s	remaining: 3m 38s
646:	learn: 0.4902236	test: 0.4909473	best: 0.4909473 (646)	total: 1m 44s	remaining: 3m 38s
647:	learn: 0.4901905	test: 0.4909214	best: 0.4909214 (647)	total: 1m 44s	remaining: 3m 37s
648:	learn: 0.4901820	test: 0.4909194	best: 0.4909194 (648)	total: 1m 44s	remaining: 3m 37s
649:	learn: 0.4901783	test: 0.4909174	best: 0.4909174 (649)	total: 1m 44s	remaining: 3m 37s
650:	learn: 0.4901170	test: 0.4908532	best: 0.4908532 (650)	total: 1m 44s	remaining: 3m 37s
651:	learn: 0.4901024	test: 0.4908481	best: 0.4908481 (651)	total: 1m 45s	remaining: 3m 37s
652:	learn: 0.4900962	test: 0.4908424	best: 0.4908424 (652)	total: 1m 45s	remaining: 3m 37s
653:	learn: 0.4900709	test: 0.4908167	best: 0.4908167 (653)	total: 1m 45s	remain

733:	learn: 0.4887631	test: 0.4897824	best: 0.4897824 (733)	total: 1m 58s	remaining: 3m 24s
734:	learn: 0.4887606	test: 0.4897796	best: 0.4897796 (734)	total: 1m 58s	remaining: 3m 24s
735:	learn: 0.4887555	test: 0.4897802	best: 0.4897796 (734)	total: 1m 58s	remaining: 3m 23s
736:	learn: 0.4887453	test: 0.4897735	best: 0.4897735 (736)	total: 1m 58s	remaining: 3m 23s
737:	learn: 0.4887090	test: 0.4897388	best: 0.4897388 (737)	total: 1m 59s	remaining: 3m 23s
738:	learn: 0.4886972	test: 0.4897281	best: 0.4897281 (738)	total: 1m 59s	remaining: 3m 23s
739:	learn: 0.4886916	test: 0.4897284	best: 0.4897281 (738)	total: 1m 59s	remaining: 3m 23s
740:	learn: 0.4886740	test: 0.4897181	best: 0.4897181 (740)	total: 1m 59s	remaining: 3m 23s
741:	learn: 0.4886714	test: 0.4897166	best: 0.4897166 (741)	total: 1m 59s	remaining: 3m 22s
742:	learn: 0.4886580	test: 0.4897060	best: 0.4897060 (742)	total: 1m 59s	remaining: 3m 22s
743:	learn: 0.4886213	test: 0.4896744	best: 0.4896744 (743)	total: 1m 59s	remain

824:	learn: 0.4875546	test: 0.4888470	best: 0.4888470 (824)	total: 2m 13s	remaining: 3m 10s
825:	learn: 0.4875476	test: 0.4888444	best: 0.4888444 (825)	total: 2m 13s	remaining: 3m 10s
826:	learn: 0.4875339	test: 0.4888316	best: 0.4888316 (826)	total: 2m 14s	remaining: 3m 10s
827:	learn: 0.4875152	test: 0.4888135	best: 0.4888135 (827)	total: 2m 14s	remaining: 3m 10s
828:	learn: 0.4875048	test: 0.4888095	best: 0.4888095 (828)	total: 2m 14s	remaining: 3m 9s
829:	learn: 0.4875004	test: 0.4888056	best: 0.4888056 (829)	total: 2m 14s	remaining: 3m 9s
830:	learn: 0.4874688	test: 0.4887687	best: 0.4887687 (830)	total: 2m 14s	remaining: 3m 9s
831:	learn: 0.4874630	test: 0.4887697	best: 0.4887687 (830)	total: 2m 14s	remaining: 3m 9s
832:	learn: 0.4874580	test: 0.4887658	best: 0.4887658 (832)	total: 2m 15s	remaining: 3m 9s
833:	learn: 0.4874497	test: 0.4887552	best: 0.4887552 (833)	total: 2m 15s	remaining: 3m 9s
834:	learn: 0.4874448	test: 0.4887525	best: 0.4887525 (834)	total: 2m 15s	remaining: 3

916:	learn: 0.4865141	test: 0.4880202	best: 0.4880202 (916)	total: 2m 30s	remaining: 2m 57s
917:	learn: 0.4865079	test: 0.4880156	best: 0.4880156 (917)	total: 2m 30s	remaining: 2m 57s
918:	learn: 0.4865020	test: 0.4880145	best: 0.4880145 (918)	total: 2m 30s	remaining: 2m 56s
919:	learn: 0.4864920	test: 0.4880010	best: 0.4880010 (919)	total: 2m 30s	remaining: 2m 56s
920:	learn: 0.4864811	test: 0.4879961	best: 0.4879961 (920)	total: 2m 30s	remaining: 2m 56s
921:	learn: 0.4864765	test: 0.4879933	best: 0.4879933 (921)	total: 2m 30s	remaining: 2m 56s
922:	learn: 0.4864734	test: 0.4879907	best: 0.4879907 (922)	total: 2m 31s	remaining: 2m 56s
923:	learn: 0.4864711	test: 0.4879895	best: 0.4879895 (923)	total: 2m 31s	remaining: 2m 56s
924:	learn: 0.4864648	test: 0.4879863	best: 0.4879863 (924)	total: 2m 31s	remaining: 2m 55s
925:	learn: 0.4864438	test: 0.4879693	best: 0.4879693 (925)	total: 2m 31s	remaining: 2m 55s
926:	learn: 0.4864393	test: 0.4879682	best: 0.4879682 (926)	total: 2m 31s	remain

1005:	learn: 0.4856924	test: 0.4874704	best: 0.4874704 (1005)	total: 2m 45s	remaining: 2m 43s
1006:	learn: 0.4856832	test: 0.4874619	best: 0.4874619 (1006)	total: 2m 45s	remaining: 2m 43s
1007:	learn: 0.4856808	test: 0.4874617	best: 0.4874617 (1007)	total: 2m 45s	remaining: 2m 42s
1008:	learn: 0.4856723	test: 0.4874586	best: 0.4874586 (1008)	total: 2m 45s	remaining: 2m 42s
1009:	learn: 0.4856686	test: 0.4874570	best: 0.4874570 (1009)	total: 2m 45s	remaining: 2m 42s
1010:	learn: 0.4856600	test: 0.4874545	best: 0.4874545 (1010)	total: 2m 46s	remaining: 2m 42s
1011:	learn: 0.4856413	test: 0.4874427	best: 0.4874427 (1011)	total: 2m 46s	remaining: 2m 42s
1012:	learn: 0.4856400	test: 0.4874428	best: 0.4874427 (1011)	total: 2m 46s	remaining: 2m 42s
1013:	learn: 0.4856380	test: 0.4874419	best: 0.4874419 (1013)	total: 2m 46s	remaining: 2m 42s
1014:	learn: 0.4856201	test: 0.4874159	best: 0.4874159 (1014)	total: 2m 46s	remaining: 2m 42s
1015:	learn: 0.4856165	test: 0.4874147	best: 0.4874147 (1015

1093:	learn: 0.4849849	test: 0.4869822	best: 0.4869822 (1093)	total: 3m	remaining: 2m 29s
1094:	learn: 0.4849570	test: 0.4869531	best: 0.4869531 (1094)	total: 3m	remaining: 2m 29s
1095:	learn: 0.4849515	test: 0.4869488	best: 0.4869488 (1095)	total: 3m 1s	remaining: 2m 29s
1096:	learn: 0.4849391	test: 0.4869429	best: 0.4869429 (1096)	total: 3m 1s	remaining: 2m 29s
1097:	learn: 0.4849320	test: 0.4869402	best: 0.4869402 (1097)	total: 3m 1s	remaining: 2m 29s
1098:	learn: 0.4849281	test: 0.4869398	best: 0.4869398 (1098)	total: 3m 1s	remaining: 2m 28s
1099:	learn: 0.4849210	test: 0.4869349	best: 0.4869349 (1099)	total: 3m 1s	remaining: 2m 28s
1100:	learn: 0.4849179	test: 0.4869339	best: 0.4869339 (1100)	total: 3m 2s	remaining: 2m 28s
1101:	learn: 0.4849013	test: 0.4869213	best: 0.4869213 (1101)	total: 3m 2s	remaining: 2m 28s
1102:	learn: 0.4848929	test: 0.4869185	best: 0.4869185 (1102)	total: 3m 2s	remaining: 2m 28s
1103:	learn: 0.4848822	test: 0.4869125	best: 0.4869125 (1103)	total: 3m 2s	r

1182:	learn: 0.4842608	test: 0.4865141	best: 0.4865141 (1182)	total: 3m 16s	remaining: 2m 15s
1183:	learn: 0.4842580	test: 0.4865125	best: 0.4865125 (1183)	total: 3m 16s	remaining: 2m 15s
1184:	learn: 0.4842482	test: 0.4865045	best: 0.4865045 (1184)	total: 3m 16s	remaining: 2m 15s
1185:	learn: 0.4842360	test: 0.4864920	best: 0.4864920 (1185)	total: 3m 17s	remaining: 2m 15s
1186:	learn: 0.4842056	test: 0.4864524	best: 0.4864524 (1186)	total: 3m 17s	remaining: 2m 15s
1187:	learn: 0.4841962	test: 0.4864468	best: 0.4864468 (1187)	total: 3m 17s	remaining: 2m 14s
1188:	learn: 0.4841873	test: 0.4864365	best: 0.4864365 (1188)	total: 3m 17s	remaining: 2m 14s
1189:	learn: 0.4841820	test: 0.4864352	best: 0.4864352 (1189)	total: 3m 17s	remaining: 2m 14s
1190:	learn: 0.4841451	test: 0.4863971	best: 0.4863971 (1190)	total: 3m 17s	remaining: 2m 14s
1191:	learn: 0.4841406	test: 0.4863986	best: 0.4863971 (1190)	total: 3m 18s	remaining: 2m 14s
1192:	learn: 0.4841396	test: 0.4863982	best: 0.4863971 (1190

1271:	learn: 0.4834350	test: 0.4858993	best: 0.4858993 (1271)	total: 3m 31s	remaining: 2m
1272:	learn: 0.4834311	test: 0.4858985	best: 0.4858985 (1272)	total: 3m 31s	remaining: 2m
1273:	learn: 0.4834150	test: 0.4858858	best: 0.4858858 (1273)	total: 3m 31s	remaining: 2m
1274:	learn: 0.4834006	test: 0.4858738	best: 0.4858738 (1274)	total: 3m 31s	remaining: 2m
1275:	learn: 0.4833973	test: 0.4858728	best: 0.4858728 (1275)	total: 3m 31s	remaining: 2m
1276:	learn: 0.4833753	test: 0.4858550	best: 0.4858550 (1276)	total: 3m 31s	remaining: 1m 59s
1277:	learn: 0.4833681	test: 0.4858526	best: 0.4858526 (1277)	total: 3m 32s	remaining: 1m 59s
1278:	learn: 0.4833604	test: 0.4858502	best: 0.4858502 (1278)	total: 3m 32s	remaining: 1m 59s
1279:	learn: 0.4833583	test: 0.4858495	best: 0.4858495 (1279)	total: 3m 32s	remaining: 1m 59s
1280:	learn: 0.4833555	test: 0.4858477	best: 0.4858477 (1280)	total: 3m 32s	remaining: 1m 59s
1281:	learn: 0.4833496	test: 0.4858485	best: 0.4858477 (1280)	total: 3m 32s	rema

1360:	learn: 0.4828158	test: 0.4855603	best: 0.4855603 (1360)	total: 3m 45s	remaining: 1m 45s
1361:	learn: 0.4828088	test: 0.4855552	best: 0.4855552 (1361)	total: 3m 45s	remaining: 1m 45s
1362:	learn: 0.4827880	test: 0.4855345	best: 0.4855345 (1362)	total: 3m 45s	remaining: 1m 45s
1363:	learn: 0.4827862	test: 0.4855343	best: 0.4855343 (1363)	total: 3m 45s	remaining: 1m 45s
1364:	learn: 0.4827727	test: 0.4855270	best: 0.4855270 (1364)	total: 3m 46s	remaining: 1m 45s
1365:	learn: 0.4827715	test: 0.4855269	best: 0.4855269 (1365)	total: 3m 46s	remaining: 1m 44s
1366:	learn: 0.4827679	test: 0.4855231	best: 0.4855231 (1366)	total: 3m 46s	remaining: 1m 44s
1367:	learn: 0.4827620	test: 0.4855211	best: 0.4855211 (1367)	total: 3m 46s	remaining: 1m 44s
1368:	learn: 0.4827509	test: 0.4855115	best: 0.4855115 (1368)	total: 3m 46s	remaining: 1m 44s
1369:	learn: 0.4827459	test: 0.4855108	best: 0.4855108 (1369)	total: 3m 46s	remaining: 1m 44s
1370:	learn: 0.4827301	test: 0.4855054	best: 0.4855054 (1370

1448:	learn: 0.4821887	test: 0.4851931	best: 0.4851931 (1448)	total: 3m 59s	remaining: 1m 31s
1449:	learn: 0.4821876	test: 0.4851927	best: 0.4851927 (1449)	total: 3m 59s	remaining: 1m 30s
1450:	learn: 0.4821778	test: 0.4851878	best: 0.4851878 (1450)	total: 3m 59s	remaining: 1m 30s
1451:	learn: 0.4821690	test: 0.4851824	best: 0.4851824 (1451)	total: 3m 59s	remaining: 1m 30s
1452:	learn: 0.4821569	test: 0.4851774	best: 0.4851774 (1452)	total: 4m	remaining: 1m 30s
1453:	learn: 0.4821514	test: 0.4851767	best: 0.4851767 (1453)	total: 4m	remaining: 1m 30s
1454:	learn: 0.4821404	test: 0.4851727	best: 0.4851727 (1454)	total: 4m	remaining: 1m 30s
1455:	learn: 0.4821250	test: 0.4851593	best: 0.4851593 (1455)	total: 4m	remaining: 1m 29s
1456:	learn: 0.4821153	test: 0.4851545	best: 0.4851545 (1456)	total: 4m	remaining: 1m 29s
1457:	learn: 0.4821140	test: 0.4851543	best: 0.4851543 (1457)	total: 4m	remaining: 1m 29s
1458:	learn: 0.4821087	test: 0.4851511	best: 0.4851511 (1458)	total: 4m 1s	remaining

1537:	learn: 0.4815893	test: 0.4849066	best: 0.4849021 (1533)	total: 4m 13s	remaining: 1m 16s
1538:	learn: 0.4815852	test: 0.4849083	best: 0.4849021 (1533)	total: 4m 13s	remaining: 1m 15s
1539:	learn: 0.4815780	test: 0.4849017	best: 0.4849017 (1539)	total: 4m 13s	remaining: 1m 15s
1540:	learn: 0.4815731	test: 0.4849010	best: 0.4849010 (1540)	total: 4m 13s	remaining: 1m 15s
1541:	learn: 0.4815654	test: 0.4848987	best: 0.4848987 (1541)	total: 4m 13s	remaining: 1m 15s
1542:	learn: 0.4815615	test: 0.4848977	best: 0.4848977 (1542)	total: 4m 14s	remaining: 1m 15s
1543:	learn: 0.4815537	test: 0.4848919	best: 0.4848919 (1543)	total: 4m 14s	remaining: 1m 15s
1544:	learn: 0.4815495	test: 0.4848912	best: 0.4848912 (1544)	total: 4m 14s	remaining: 1m 14s
1545:	learn: 0.4815481	test: 0.4848912	best: 0.4848912 (1545)	total: 4m 14s	remaining: 1m 14s
1546:	learn: 0.4815424	test: 0.4848930	best: 0.4848912 (1545)	total: 4m 14s	remaining: 1m 14s
1547:	learn: 0.4815388	test: 0.4848926	best: 0.4848912 (1545

1625:	learn: 0.4809680	test: 0.4845837	best: 0.4845837 (1625)	total: 4m 29s	remaining: 1m 1s
1626:	learn: 0.4809556	test: 0.4845802	best: 0.4845802 (1626)	total: 4m 29s	remaining: 1m 1s
1627:	learn: 0.4809466	test: 0.4845734	best: 0.4845734 (1627)	total: 4m 29s	remaining: 1m 1s
1628:	learn: 0.4809422	test: 0.4845724	best: 0.4845724 (1628)	total: 4m 29s	remaining: 1m 1s
1629:	learn: 0.4809294	test: 0.4845602	best: 0.4845602 (1629)	total: 4m 30s	remaining: 1m 1s
1630:	learn: 0.4809235	test: 0.4845573	best: 0.4845573 (1630)	total: 4m 30s	remaining: 1m 1s
1631:	learn: 0.4809175	test: 0.4845532	best: 0.4845532 (1631)	total: 4m 30s	remaining: 1m
1632:	learn: 0.4809081	test: 0.4845439	best: 0.4845439 (1632)	total: 4m 30s	remaining: 1m
1633:	learn: 0.4808984	test: 0.4845439	best: 0.4845439 (1633)	total: 4m 30s	remaining: 1m
1634:	learn: 0.4808921	test: 0.4845442	best: 0.4845439 (1633)	total: 4m 30s	remaining: 1m
1635:	learn: 0.4808860	test: 0.4845447	best: 0.4845439 (1633)	total: 4m 31s	remain

1714:	learn: 0.4804761	test: 0.4843469	best: 0.4843469 (1714)	total: 4m 45s	remaining: 47.4s
1715:	learn: 0.4804672	test: 0.4843414	best: 0.4843414 (1715)	total: 4m 45s	remaining: 47.2s
1716:	learn: 0.4804612	test: 0.4843373	best: 0.4843373 (1716)	total: 4m 45s	remaining: 47s
1717:	learn: 0.4804587	test: 0.4843362	best: 0.4843362 (1717)	total: 4m 45s	remaining: 46.9s
1718:	learn: 0.4804474	test: 0.4843302	best: 0.4843302 (1718)	total: 4m 45s	remaining: 46.7s
1719:	learn: 0.4804392	test: 0.4843269	best: 0.4843269 (1719)	total: 4m 45s	remaining: 46.5s
1720:	learn: 0.4804348	test: 0.4843286	best: 0.4843269 (1719)	total: 4m 46s	remaining: 46.4s
1721:	learn: 0.4804284	test: 0.4843287	best: 0.4843269 (1719)	total: 4m 46s	remaining: 46.2s
1722:	learn: 0.4804206	test: 0.4843274	best: 0.4843269 (1719)	total: 4m 46s	remaining: 46s
1723:	learn: 0.4804165	test: 0.4843247	best: 0.4843247 (1723)	total: 4m 46s	remaining: 45.9s
1724:	learn: 0.4804141	test: 0.4843256	best: 0.4843247 (1723)	total: 4m 46

1803:	learn: 0.4799075	test: 0.4840287	best: 0.4840287 (1803)	total: 4m 59s	remaining: 32.6s
1804:	learn: 0.4799033	test: 0.4840245	best: 0.4840245 (1804)	total: 4m 59s	remaining: 32.4s
1805:	learn: 0.4798981	test: 0.4840207	best: 0.4840207 (1805)	total: 5m	remaining: 32.2s
1806:	learn: 0.4798865	test: 0.4840106	best: 0.4840106 (1806)	total: 5m	remaining: 32.1s
1807:	learn: 0.4798838	test: 0.4840099	best: 0.4840099 (1807)	total: 5m	remaining: 31.9s
1808:	learn: 0.4798630	test: 0.4839922	best: 0.4839922 (1808)	total: 5m	remaining: 31.7s
1809:	learn: 0.4798575	test: 0.4839896	best: 0.4839896 (1809)	total: 5m	remaining: 31.6s
1810:	learn: 0.4798540	test: 0.4839869	best: 0.4839869 (1810)	total: 5m	remaining: 31.4s
1811:	learn: 0.4798512	test: 0.4839864	best: 0.4839864 (1811)	total: 5m 1s	remaining: 31.2s
1812:	learn: 0.4798471	test: 0.4839863	best: 0.4839863 (1812)	total: 5m 1s	remaining: 31.1s
1813:	learn: 0.4798424	test: 0.4839818	best: 0.4839818 (1813)	total: 5m 1s	remaining: 30.9s
1814

1894:	learn: 0.4793842	test: 0.4837394	best: 0.4837376 (1892)	total: 5m 15s	remaining: 17.5s
1895:	learn: 0.4793805	test: 0.4837388	best: 0.4837376 (1892)	total: 5m 15s	remaining: 17.3s
1896:	learn: 0.4793755	test: 0.4837367	best: 0.4837367 (1896)	total: 5m 15s	remaining: 17.1s
1897:	learn: 0.4793675	test: 0.4837314	best: 0.4837314 (1897)	total: 5m 15s	remaining: 17s
1898:	learn: 0.4793587	test: 0.4837290	best: 0.4837290 (1898)	total: 5m 16s	remaining: 16.8s
1899:	learn: 0.4793564	test: 0.4837294	best: 0.4837290 (1898)	total: 5m 16s	remaining: 16.6s
1900:	learn: 0.4793553	test: 0.4837292	best: 0.4837290 (1898)	total: 5m 16s	remaining: 16.5s
1901:	learn: 0.4793468	test: 0.4837241	best: 0.4837241 (1901)	total: 5m 16s	remaining: 16.3s
1902:	learn: 0.4793396	test: 0.4837215	best: 0.4837215 (1902)	total: 5m 16s	remaining: 16.1s
1903:	learn: 0.4793378	test: 0.4837210	best: 0.4837210 (1903)	total: 5m 16s	remaining: 16s
1904:	learn: 0.4793356	test: 0.4837222	best: 0.4837210 (1903)	total: 5m 17

1984:	learn: 0.4788904	test: 0.4834876	best: 0.4834876 (1984)	total: 5m 30s	remaining: 2.5s
1985:	learn: 0.4788855	test: 0.4834813	best: 0.4834813 (1985)	total: 5m 31s	remaining: 2.33s
1986:	learn: 0.4788821	test: 0.4834806	best: 0.4834806 (1986)	total: 5m 31s	remaining: 2.17s
1987:	learn: 0.4788641	test: 0.4834673	best: 0.4834673 (1987)	total: 5m 31s	remaining: 2s
1988:	learn: 0.4788451	test: 0.4834531	best: 0.4834531 (1988)	total: 5m 31s	remaining: 1.83s
1989:	learn: 0.4788375	test: 0.4834511	best: 0.4834511 (1989)	total: 5m 31s	remaining: 1.67s
1990:	learn: 0.4788343	test: 0.4834500	best: 0.4834500 (1990)	total: 5m 32s	remaining: 1.5s
1991:	learn: 0.4788304	test: 0.4834496	best: 0.4834496 (1991)	total: 5m 32s	remaining: 1.33s
1992:	learn: 0.4788211	test: 0.4834480	best: 0.4834480 (1992)	total: 5m 32s	remaining: 1.17s
1993:	learn: 0.4788080	test: 0.4834406	best: 0.4834406 (1993)	total: 5m 32s	remaining: 1s
1994:	learn: 0.4788032	test: 0.4834384	best: 0.4834384 (1994)	total: 5m 32s	re

<catboost.core.CatBoostRegressor at 0x7f5b1e0490b8>

In [20]:
test_preds = model.predict(data['test'][columns])
data['test']['visitors'] = np.clip(np.expm1(test_preds), a_min=0, a_max=10000)

In [21]:
test = data['test'][['id', 'visitors']]
test = test.groupby('id', as_index=False).mean()

In [22]:
test.to_csv('result/submission.csv', index=False)