# Restaurant Visitor Forecasting by GooseLearning

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
from sklearn import metrics
from sklearn.model_selection import train_test_split
import catboost

pd.options.display.max_columns = 1000

%matplotlib inline

In [3]:
data = {
    'stores':  pd.read_csv('result/stores.csv'),
    'reserve': pd.read_csv('result/reserve.csv'),
    'dates':   pd.read_csv('result/dates.csv'),

    'train': pd.read_csv('dataset/air_visit_data.csv'),
    'test':  pd.read_csv('dataset/sample_submission.csv'),
}

## Обработка данных

### Обрабатываем `train` и `test`

Приводим к единому формату

In [4]:
data['train']['id'] = data['train']['air_store_id'] + '_' + data['train']['visit_date']

data['test']['air_store_id'] = [id[:20] for id in data['test']['id']]
data['test']['visit_date']   = [id[21:] for id in data['test']['id']]

Добавляем дополнительные колонки даты

In [5]:
data['train'] = pd.merge(data['train'], data['dates'], how='left', on=['visit_date'])
data['test']  = pd.merge(data['test'],  data['dates'], how='left', on=['visit_date'])

In [6]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 12)
test  (32019, 12)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118


### Обрабатываем `reserve`

Добавляем `reserve` в `train` / `test` 

In [7]:
for ds in ['train', 'test']:
    data[ds] = pd.merge(data[ds], data['reserve'], how='left', on=['air_store_id', 'visit_date'])

In [8]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 14)
test  (32019, 14)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int,reserve_visitors,reserve_visitors_competitor
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113,0,7438
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114,0,8052
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115,0,21468
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116,0,19082
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118,0,6035


### Обрабатываем `stores`

Добавляем информацию о посещениях

In [9]:
for op_type in ['sum', 'mean', 'median', 'min', 'max', 'count']:
    tmp = getattr(
        data['train']
            .groupby(['air_store_id', 'visit_dow'], as_index=False)
            [['visitors']],
        op_type,
    )().rename(columns={
        'visitors': 'visitors_' + op_type,
    })
    
    data['stores'] = pd.merge(data['stores'], tmp, how='left', on=['air_store_id', 'visit_dow'])

In [10]:
print('stores', data['stores'].shape)

data['stores'].head()

stores (5803, 13)


Unnamed: 0,air_store_id,visit_dow,air_genre_name,air_area_name,latitude,longitude,city,visitors_sum,visitors_mean,visitors_median,visitors_min,visitors_max,visitors_count
0,air_0f0cdeee6c9bf3d7,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Tokyo,609.0,16.916667,15.0,2.0,39.0,36.0
1,air_7cc17a324ae5c7dc,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Tokyo,219.0,9.125,7.0,1.0,34.0,24.0
2,air_fee8dcf4d619598e,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Tokyo,813.0,20.325,18.5,7.0,39.0,40.0
3,air_a17f0778617c76e2,0,Italian/French,Hyōgo-ken Kōbe-shi Kumoidōri,34.695124,135.197852,Tokyo,158.0,26.333333,27.5,19.0,31.0,6.0
4,air_83db5aff8f50478e,0,Italian/French,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,202.0,6.516129,6.0,1.0,13.0,31.0


Объединияем `train` / `test` и `stores`

In [11]:
for ds in ['train', 'test']:
    data[ds] = pd.merge(data[ds], data['stores'], how='left', on=['air_store_id', 'visit_dow'])

In [12]:
print('train', data['train'].shape)
print('test ', data['test'].shape)

data['train'].head()

train (252108, 25)
test  (32019, 25)


Unnamed: 0,air_store_id,visit_date,visitors,id,visit_holiday,visit_day,visit_month,visit_year,visit_dow,visit_work,visit_weekend,visit_date_int,reserve_visitors,reserve_visitors_competitor,air_genre_name,air_area_name,latitude,longitude,city,visitors_sum,visitors_mean,visitors_median,visitors_min,visitors_max,visitors_count
0,air_ba937bf13d40fb24,2016-01-13,25,air_ba937bf13d40fb24_2016-01-13,0,13,1,2016,2,True,False,20160113,0,7438,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1526.0,23.84375,25.0,7.0,57.0,64.0
1,air_ba937bf13d40fb24,2016-01-14,32,air_ba937bf13d40fb24_2016-01-14,0,14,1,2016,3,True,False,20160114,0,8052,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1319.0,20.292308,21.0,2.0,54.0,65.0
2,air_ba937bf13d40fb24,2016-01-15,29,air_ba937bf13d40fb24_2016-01-15,0,15,1,2016,4,True,False,20160115,0,21468,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,2258.0,34.738462,35.0,4.0,61.0,65.0
3,air_ba937bf13d40fb24,2016-01-16,22,air_ba937bf13d40fb24_2016-01-16,0,16,1,2016,5,False,True,20160116,0,19082,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,1825.0,27.651515,27.0,6.0,53.0,66.0
4,air_ba937bf13d40fb24,2016-01-18,6,air_ba937bf13d40fb24_2016-01-18,0,18,1,2016,0,True,False,20160118,0,6035,Dining bar,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599,Tokyo,784.0,13.754386,12.0,2.0,34.0,57.0


## Обучение

In [13]:
columns = pd.Series([col for col in data['train'] if col not in ['id', 'visit_date', 'visitors']])
cat_features = columns[columns.isin([
    'air_store_id',
    'visit_holiday',
    'visit_day',
    'visit_month',
    'visit_year',
    'visit_dow',
    'visit_work',
    'visit_weekend',
    'air_genre_name',
    'air_area_name',
    'city',
])].index

In [14]:
X_all = data['train'][columns]
y_all = np.log1p(data['train']['visitors'])

stratify = np.clip(data['train']['visitors'], a_min=0, a_max=100)

X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all,
    test_size=0.2, 
    random_state=1234,
    stratify=stratify,
)

train_pool = catboost.Pool(
    X_train, y_train,
    cat_features=cat_features.tolist(),
)

test_pool = catboost.Pool(
    X_test, y_test,
    cat_features=cat_features.tolist(),
)

In [15]:
model = catboost.CatBoostRegressor(
     iterations=1000,
)

model.fit(
    train_pool,
    eval_set=test_pool,
)

0:	learn: 2.8324276	test: 2.8321517	best: 2.8321517 (0)	total: 687ms	remaining: 11m 25s
1:	learn: 2.7505736	test: 2.7502500	best: 2.7502500 (1)	total: 945ms	remaining: 7m 51s
2:	learn: 2.6712556	test: 2.6708664	best: 2.6708664 (2)	total: 1.09s	remaining: 6m 4s
3:	learn: 2.5944002	test: 2.5939702	best: 2.5939702 (3)	total: 1.26s	remaining: 5m 14s
4:	learn: 2.5199395	test: 2.5194777	best: 2.5194777 (4)	total: 1.45s	remaining: 4m 49s
5:	learn: 2.4478082	test: 2.4473130	best: 2.4473130 (5)	total: 1.65s	remaining: 4m 33s
6:	learn: 2.3779177	test: 2.3773667	best: 2.3773667 (6)	total: 1.82s	remaining: 4m 18s
7:	learn: 2.3102042	test: 2.3096067	best: 2.3096067 (7)	total: 1.99s	remaining: 4m 6s
8:	learn: 2.2446429	test: 2.2439989	best: 2.2439989 (8)	total: 2.13s	remaining: 3m 54s
9:	learn: 2.1811280	test: 2.1804612	best: 2.1804612 (9)	total: 2.29s	remaining: 3m 46s
10:	learn: 2.1196239	test: 2.1189220	best: 2.1189220 (10)	total: 2.45s	remaining: 3m 39s
11:	learn: 2.0600591	test: 2.0592968	best:

93:	learn: 0.5369003	test: 0.5352475	best: 0.5352475 (93)	total: 22.6s	remaining: 3m 38s
94:	learn: 0.5352931	test: 0.5336655	best: 0.5336655 (94)	total: 22.8s	remaining: 3m 37s
95:	learn: 0.5337865	test: 0.5321621	best: 0.5321621 (95)	total: 23s	remaining: 3m 36s
96:	learn: 0.5323609	test: 0.5307554	best: 0.5307554 (96)	total: 23.2s	remaining: 3m 35s
97:	learn: 0.5310181	test: 0.5294497	best: 0.5294497 (97)	total: 23.3s	remaining: 3m 34s
98:	learn: 0.5297311	test: 0.5281900	best: 0.5281900 (98)	total: 23.5s	remaining: 3m 33s
99:	learn: 0.5285261	test: 0.5270097	best: 0.5270097 (99)	total: 23.7s	remaining: 3m 32s
100:	learn: 0.5273650	test: 0.5258736	best: 0.5258736 (100)	total: 23.8s	remaining: 3m 32s
101:	learn: 0.5262849	test: 0.5248059	best: 0.5248059 (101)	total: 24s	remaining: 3m 31s
102:	learn: 0.5252677	test: 0.5238149	best: 0.5238149 (102)	total: 24.2s	remaining: 3m 30s
103:	learn: 0.5242702	test: 0.5228575	best: 0.5228575 (103)	total: 24.3s	remaining: 3m 29s
104:	learn: 0.523

185:	learn: 0.5041641	test: 0.5037039	best: 0.5037039 (185)	total: 43.5s	remaining: 3m 10s
186:	learn: 0.5041138	test: 0.5036588	best: 0.5036588 (186)	total: 43.8s	remaining: 3m 10s
187:	learn: 0.5040445	test: 0.5035935	best: 0.5035935 (187)	total: 44.1s	remaining: 3m 10s
188:	learn: 0.5040023	test: 0.5035599	best: 0.5035599 (188)	total: 44.3s	remaining: 3m 10s
189:	learn: 0.5039511	test: 0.5035225	best: 0.5035225 (189)	total: 44.7s	remaining: 3m 10s
190:	learn: 0.5039130	test: 0.5034957	best: 0.5034957 (190)	total: 45s	remaining: 3m 10s
191:	learn: 0.5038700	test: 0.5034570	best: 0.5034570 (191)	total: 45.3s	remaining: 3m 10s
192:	learn: 0.5038259	test: 0.5034310	best: 0.5034310 (192)	total: 45.6s	remaining: 3m 10s
193:	learn: 0.5037756	test: 0.5033932	best: 0.5033932 (193)	total: 45.9s	remaining: 3m 10s
194:	learn: 0.5036949	test: 0.5033129	best: 0.5033129 (194)	total: 46.2s	remaining: 3m 10s
195:	learn: 0.5036087	test: 0.5032385	best: 0.5032385 (195)	total: 46.5s	remaining: 3m 10s
1

277:	learn: 0.4994996	test: 0.4996439	best: 0.4996439 (277)	total: 1m 9s	remaining: 2m 59s
278:	learn: 0.4994428	test: 0.4995668	best: 0.4995668 (278)	total: 1m 9s	remaining: 2m 59s
279:	learn: 0.4993801	test: 0.4994996	best: 0.4994996 (279)	total: 1m 9s	remaining: 2m 58s
280:	learn: 0.4993652	test: 0.4994959	best: 0.4994959 (280)	total: 1m 9s	remaining: 2m 58s
281:	learn: 0.4993477	test: 0.4994866	best: 0.4994866 (281)	total: 1m 9s	remaining: 2m 57s
282:	learn: 0.4993043	test: 0.4994681	best: 0.4994681 (282)	total: 1m 10s	remaining: 2m 57s
283:	learn: 0.4992656	test: 0.4994190	best: 0.4994190 (283)	total: 1m 10s	remaining: 2m 57s
284:	learn: 0.4992202	test: 0.4993779	best: 0.4993779 (284)	total: 1m 10s	remaining: 2m 56s
285:	learn: 0.4991896	test: 0.4993501	best: 0.4993501 (285)	total: 1m 10s	remaining: 2m 56s
286:	learn: 0.4991473	test: 0.4993075	best: 0.4993075 (286)	total: 1m 10s	remaining: 2m 55s
287:	learn: 0.4990705	test: 0.4992219	best: 0.4992219 (287)	total: 1m 10s	remaining: 

368:	learn: 0.4961729	test: 0.4965429	best: 0.4965429 (368)	total: 1m 26s	remaining: 2m 27s
369:	learn: 0.4961296	test: 0.4965023	best: 0.4965023 (369)	total: 1m 26s	remaining: 2m 27s
370:	learn: 0.4960928	test: 0.4964498	best: 0.4964498 (370)	total: 1m 26s	remaining: 2m 27s
371:	learn: 0.4960742	test: 0.4964387	best: 0.4964387 (371)	total: 1m 26s	remaining: 2m 26s
372:	learn: 0.4960263	test: 0.4963773	best: 0.4963773 (372)	total: 1m 27s	remaining: 2m 26s
373:	learn: 0.4960102	test: 0.4963667	best: 0.4963667 (373)	total: 1m 27s	remaining: 2m 26s
374:	learn: 0.4959773	test: 0.4963295	best: 0.4963295 (374)	total: 1m 27s	remaining: 2m 26s
375:	learn: 0.4958778	test: 0.4962379	best: 0.4962379 (375)	total: 1m 27s	remaining: 2m 25s
376:	learn: 0.4957819	test: 0.4961372	best: 0.4961372 (376)	total: 1m 27s	remaining: 2m 25s
377:	learn: 0.4957718	test: 0.4961297	best: 0.4961297 (377)	total: 1m 28s	remaining: 2m 25s
378:	learn: 0.4957628	test: 0.4961267	best: 0.4961267 (378)	total: 1m 28s	remain

458:	learn: 0.4932390	test: 0.4937764	best: 0.4937764 (458)	total: 1m 48s	remaining: 2m 7s
459:	learn: 0.4932063	test: 0.4937556	best: 0.4937556 (459)	total: 1m 48s	remaining: 2m 7s
460:	learn: 0.4931868	test: 0.4937319	best: 0.4937319 (460)	total: 1m 48s	remaining: 2m 7s
461:	learn: 0.4931558	test: 0.4937104	best: 0.4937104 (461)	total: 1m 49s	remaining: 2m 7s
462:	learn: 0.4931071	test: 0.4936627	best: 0.4936627 (462)	total: 1m 49s	remaining: 2m 6s
463:	learn: 0.4930594	test: 0.4936032	best: 0.4936032 (463)	total: 1m 49s	remaining: 2m 6s
464:	learn: 0.4930532	test: 0.4936046	best: 0.4936032 (463)	total: 1m 49s	remaining: 2m 6s
465:	learn: 0.4929699	test: 0.4935284	best: 0.4935284 (465)	total: 1m 49s	remaining: 2m 5s
466:	learn: 0.4929637	test: 0.4935253	best: 0.4935253 (466)	total: 1m 50s	remaining: 2m 5s
467:	learn: 0.4929544	test: 0.4935204	best: 0.4935204 (467)	total: 1m 50s	remaining: 2m 5s
468:	learn: 0.4929211	test: 0.4934847	best: 0.4934847 (468)	total: 1m 50s	remaining: 2m 5s

549:	learn: 0.4912939	test: 0.4919015	best: 0.4919015 (549)	total: 2m 7s	remaining: 1m 44s
550:	learn: 0.4912775	test: 0.4918851	best: 0.4918851 (550)	total: 2m 8s	remaining: 1m 44s
551:	learn: 0.4912567	test: 0.4918692	best: 0.4918692 (551)	total: 2m 8s	remaining: 1m 44s
552:	learn: 0.4912445	test: 0.4918619	best: 0.4918619 (552)	total: 2m 8s	remaining: 1m 44s
553:	learn: 0.4912371	test: 0.4918615	best: 0.4918615 (553)	total: 2m 9s	remaining: 1m 43s
554:	learn: 0.4912193	test: 0.4918383	best: 0.4918383 (554)	total: 2m 9s	remaining: 1m 43s
555:	learn: 0.4912040	test: 0.4918178	best: 0.4918178 (555)	total: 2m 9s	remaining: 1m 43s
556:	learn: 0.4911891	test: 0.4918045	best: 0.4918045 (556)	total: 2m 9s	remaining: 1m 43s
557:	learn: 0.4911803	test: 0.4918031	best: 0.4918031 (557)	total: 2m 9s	remaining: 1m 42s
558:	learn: 0.4911629	test: 0.4917935	best: 0.4917935 (558)	total: 2m 10s	remaining: 1m 42s
559:	learn: 0.4911340	test: 0.4917731	best: 0.4917731 (559)	total: 2m 10s	remaining: 1m 4

639:	learn: 0.4896413	test: 0.4904932	best: 0.4904932 (639)	total: 2m 30s	remaining: 1m 24s
640:	learn: 0.4895768	test: 0.4904244	best: 0.4904244 (640)	total: 2m 30s	remaining: 1m 24s
641:	learn: 0.4895712	test: 0.4904219	best: 0.4904219 (641)	total: 2m 30s	remaining: 1m 23s
642:	learn: 0.4895427	test: 0.4904004	best: 0.4904004 (642)	total: 2m 30s	remaining: 1m 23s
643:	learn: 0.4895174	test: 0.4903771	best: 0.4903771 (643)	total: 2m 31s	remaining: 1m 23s
644:	learn: 0.4895116	test: 0.4903736	best: 0.4903736 (644)	total: 2m 31s	remaining: 1m 23s
645:	learn: 0.4895021	test: 0.4903701	best: 0.4903701 (645)	total: 2m 31s	remaining: 1m 23s
646:	learn: 0.4894929	test: 0.4903655	best: 0.4903655 (646)	total: 2m 31s	remaining: 1m 22s
647:	learn: 0.4894506	test: 0.4903225	best: 0.4903225 (647)	total: 2m 32s	remaining: 1m 22s
648:	learn: 0.4894394	test: 0.4903209	best: 0.4903209 (648)	total: 2m 32s	remaining: 1m 22s
649:	learn: 0.4894210	test: 0.4903053	best: 0.4903053 (649)	total: 2m 32s	remain

729:	learn: 0.4882970	test: 0.4893718	best: 0.4893718 (729)	total: 2m 50s	remaining: 1m 3s
730:	learn: 0.4882938	test: 0.4893686	best: 0.4893686 (730)	total: 2m 50s	remaining: 1m 2s
731:	learn: 0.4882850	test: 0.4893653	best: 0.4893653 (731)	total: 2m 50s	remaining: 1m 2s
732:	learn: 0.4882607	test: 0.4893430	best: 0.4893430 (732)	total: 2m 51s	remaining: 1m 2s
733:	learn: 0.4882589	test: 0.4893444	best: 0.4893430 (732)	total: 2m 51s	remaining: 1m 2s
734:	learn: 0.4882481	test: 0.4893366	best: 0.4893366 (734)	total: 2m 51s	remaining: 1m 1s
735:	learn: 0.4882233	test: 0.4893174	best: 0.4893174 (735)	total: 2m 51s	remaining: 1m 1s
736:	learn: 0.4882139	test: 0.4893128	best: 0.4893128 (736)	total: 2m 52s	remaining: 1m 1s
737:	learn: 0.4882100	test: 0.4893123	best: 0.4893123 (737)	total: 2m 52s	remaining: 1m 1s
738:	learn: 0.4882017	test: 0.4893058	best: 0.4893058 (738)	total: 2m 52s	remaining: 1m
739:	learn: 0.4881989	test: 0.4893070	best: 0.4893058 (738)	total: 2m 52s	remaining: 1m
740:	

822:	learn: 0.4870784	test: 0.4883616	best: 0.4883616 (822)	total: 3m 11s	remaining: 41.2s
823:	learn: 0.4870680	test: 0.4883554	best: 0.4883554 (823)	total: 3m 11s	remaining: 41s
824:	learn: 0.4870626	test: 0.4883541	best: 0.4883541 (824)	total: 3m 12s	remaining: 40.8s
825:	learn: 0.4870497	test: 0.4883458	best: 0.4883458 (825)	total: 3m 12s	remaining: 40.5s
826:	learn: 0.4870383	test: 0.4883318	best: 0.4883318 (826)	total: 3m 12s	remaining: 40.3s
827:	learn: 0.4870344	test: 0.4883292	best: 0.4883292 (827)	total: 3m 12s	remaining: 40s
828:	learn: 0.4870311	test: 0.4883296	best: 0.4883292 (827)	total: 3m 12s	remaining: 39.8s
829:	learn: 0.4870294	test: 0.4883288	best: 0.4883288 (829)	total: 3m 13s	remaining: 39.6s
830:	learn: 0.4870257	test: 0.4883268	best: 0.4883268 (830)	total: 3m 13s	remaining: 39.4s
831:	learn: 0.4870115	test: 0.4883188	best: 0.4883188 (831)	total: 3m 13s	remaining: 39.2s
832:	learn: 0.4869986	test: 0.4883124	best: 0.4883124 (832)	total: 3m 14s	remaining: 38.9s
833

913:	learn: 0.4861454	test: 0.4877026	best: 0.4877026 (913)	total: 3m 35s	remaining: 20.3s
914:	learn: 0.4861112	test: 0.4876736	best: 0.4876736 (914)	total: 3m 35s	remaining: 20s
915:	learn: 0.4860929	test: 0.4876543	best: 0.4876543 (915)	total: 3m 36s	remaining: 19.8s
916:	learn: 0.4860711	test: 0.4876328	best: 0.4876328 (916)	total: 3m 36s	remaining: 19.6s
917:	learn: 0.4860643	test: 0.4876304	best: 0.4876304 (917)	total: 3m 36s	remaining: 19.4s
918:	learn: 0.4860511	test: 0.4876191	best: 0.4876191 (918)	total: 3m 36s	remaining: 19.1s
919:	learn: 0.4860483	test: 0.4876191	best: 0.4876191 (918)	total: 3m 37s	remaining: 18.9s
920:	learn: 0.4860421	test: 0.4876198	best: 0.4876191 (918)	total: 3m 37s	remaining: 18.7s
921:	learn: 0.4860399	test: 0.4876186	best: 0.4876186 (921)	total: 3m 37s	remaining: 18.4s
922:	learn: 0.4860259	test: 0.4876106	best: 0.4876106 (922)	total: 3m 38s	remaining: 18.2s
923:	learn: 0.4860195	test: 0.4876065	best: 0.4876065 (923)	total: 3m 38s	remaining: 18s
924

<catboost.core.CatBoostRegressor at 0x7faac5228a58>

In [19]:
test_preds = model.predict(data['test'][columns])
data['test']['visitors'] = np.clip(np.expm1(test_preds), a_min=0, a_max=10000)

In [20]:
test = data['test'][['id', 'visitors']]
test = test.groupby('id', as_index=False).mean()

In [21]:
test.to_csv('result/submission.csv', index=False)