In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report, make_scorer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

In [3]:
!pip install catboost

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/7e/c1/c1c4707013f9e2f8a96899dd3a87f66c9167d6d776a6dc8fe7ec8678d446/catboost-0.24.3-cp36-none-manylinux1_x86_64.whl (66.3MB)
[K     |████████████████████████████████| 66.3MB 53kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.24.3


In [4]:
from catboost import CatBoostClassifier, Pool

In [4]:
clf = CatBoostClassifier(loss_function='MultiClass')
params = {'iterations': [1000],
          'depth': [6],
          'leaf_estimation_iterations': [10],
          'random_seed': [42]
         }
clf_grid = GridSearchCV(estimator=clf, param_grid=params, scoring='f1_weighted', cv=5)

In [None]:
train = pd.read_csv('train_dropped_290_without_nans.csv')
test = pd.read_csv('test_dropped_290_without_nans.csv')
print(train.shape, test.shape)

(2828, 294) (939, 293)


In [None]:
folds = 3
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train,y_train), verbose=3, random_state=42 )
random_search.fit(train,y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed: 41.4min finished


0:	learn: 3.1229551	total: 1.22s	remaining: 16m 12s
1:	learn: 2.9682540	total: 2.45s	remaining: 16m 16s
2:	learn: 2.8213513	total: 3.67s	remaining: 16m 14s
3:	learn: 2.6825932	total: 4.91s	remaining: 16m 16s
4:	learn: 2.5539568	total: 6.15s	remaining: 16m 18s
5:	learn: 2.4313178	total: 7.37s	remaining: 16m 14s
6:	learn: 2.3102687	total: 8.58s	remaining: 16m 11s
7:	learn: 2.2009803	total: 9.79s	remaining: 16m 9s
8:	learn: 2.0940835	total: 11s	remaining: 16m 5s
9:	learn: 1.9980595	total: 12.2s	remaining: 16m 4s
10:	learn: 1.9121805	total: 13.5s	remaining: 16m 5s
11:	learn: 1.8298829	total: 14.7s	remaining: 16m 3s
12:	learn: 1.7478976	total: 15.9s	remaining: 16m 2s
13:	learn: 1.6704577	total: 17.1s	remaining: 16m
14:	learn: 1.6065779	total: 18.3s	remaining: 15m 58s
15:	learn: 1.5429942	total: 19.6s	remaining: 15m 58s
16:	learn: 1.4843661	total: 20.8s	remaining: 15m 56s
17:	learn: 1.4276177	total: 22s	remaining: 15m 55s
18:	learn: 1.3748486	total: 23.2s	remaining: 15m 53s
19:	learn: 1.3273

154:	learn: 0.2852596	total: 3m 9s	remaining: 13m 8s
155:	learn: 0.2837598	total: 3m 10s	remaining: 13m 7s
156:	learn: 0.2818525	total: 3m 11s	remaining: 13m 6s
157:	learn: 0.2806770	total: 3m 13s	remaining: 13m 4s
158:	learn: 0.2781964	total: 3m 14s	remaining: 13m 3s
159:	learn: 0.2770767	total: 3m 15s	remaining: 13m 2s
160:	learn: 0.2755429	total: 3m 16s	remaining: 13m 1s
161:	learn: 0.2741083	total: 3m 18s	remaining: 12m 59s
162:	learn: 0.2726059	total: 3m 19s	remaining: 12m 58s
163:	learn: 0.2707880	total: 3m 20s	remaining: 12m 57s
164:	learn: 0.2693995	total: 3m 21s	remaining: 12m 55s
165:	learn: 0.2681273	total: 3m 22s	remaining: 12m 54s
166:	learn: 0.2668969	total: 3m 24s	remaining: 12m 53s
167:	learn: 0.2653102	total: 3m 25s	remaining: 12m 52s
168:	learn: 0.2641456	total: 3m 26s	remaining: 12m 51s
169:	learn: 0.2629652	total: 3m 27s	remaining: 12m 49s
170:	learn: 0.2609013	total: 3m 28s	remaining: 12m 48s
171:	learn: 0.2590873	total: 3m 30s	remaining: 12m 47s
172:	learn: 0.2579

305:	learn: 0.1392591	total: 6m 14s	remaining: 10m 4s
306:	learn: 0.1387256	total: 6m 15s	remaining: 10m 3s
307:	learn: 0.1383294	total: 6m 16s	remaining: 10m 1s
308:	learn: 0.1372472	total: 6m 17s	remaining: 10m
309:	learn: 0.1366893	total: 6m 19s	remaining: 9m 59s
310:	learn: 0.1362436	total: 6m 20s	remaining: 9m 58s
311:	learn: 0.1356434	total: 6m 21s	remaining: 9m 56s
312:	learn: 0.1352605	total: 6m 22s	remaining: 9m 55s
313:	learn: 0.1341863	total: 6m 24s	remaining: 9m 54s
314:	learn: 0.1335198	total: 6m 25s	remaining: 9m 53s
315:	learn: 0.1329723	total: 6m 26s	remaining: 9m 52s
316:	learn: 0.1325563	total: 6m 27s	remaining: 9m 50s
317:	learn: 0.1319578	total: 6m 28s	remaining: 9m 49s
318:	learn: 0.1316582	total: 6m 30s	remaining: 9m 48s
319:	learn: 0.1312023	total: 6m 31s	remaining: 9m 47s
320:	learn: 0.1308175	total: 6m 32s	remaining: 9m 45s
321:	learn: 0.1303081	total: 6m 33s	remaining: 9m 44s
322:	learn: 0.1300386	total: 6m 35s	remaining: 9m 43s
323:	learn: 0.1296014	total: 6m

458:	learn: 0.0801934	total: 9m 21s	remaining: 6m 56s
459:	learn: 0.0798755	total: 9m 22s	remaining: 6m 55s
460:	learn: 0.0796000	total: 9m 23s	remaining: 6m 54s
461:	learn: 0.0793513	total: 9m 24s	remaining: 6m 53s
462:	learn: 0.0790814	total: 9m 25s	remaining: 6m 51s
463:	learn: 0.0787574	total: 9m 27s	remaining: 6m 50s
464:	learn: 0.0784513	total: 9m 28s	remaining: 6m 49s
465:	learn: 0.0780412	total: 9m 29s	remaining: 6m 48s
466:	learn: 0.0777197	total: 9m 30s	remaining: 6m 47s
467:	learn: 0.0774305	total: 9m 32s	remaining: 6m 45s
468:	learn: 0.0770370	total: 9m 33s	remaining: 6m 44s
469:	learn: 0.0766294	total: 9m 34s	remaining: 6m 43s
470:	learn: 0.0764968	total: 9m 35s	remaining: 6m 42s
471:	learn: 0.0763357	total: 9m 37s	remaining: 6m 40s
472:	learn: 0.0760860	total: 9m 38s	remaining: 6m 39s
473:	learn: 0.0758943	total: 9m 39s	remaining: 6m 38s
474:	learn: 0.0755476	total: 9m 40s	remaining: 6m 37s
475:	learn: 0.0750862	total: 9m 41s	remaining: 6m 36s
476:	learn: 0.0749833	total:

609:	learn: 0.0508535	total: 12m 25s	remaining: 3m 52s
610:	learn: 0.0507318	total: 12m 26s	remaining: 3m 51s
611:	learn: 0.0505511	total: 12m 28s	remaining: 3m 49s
612:	learn: 0.0504066	total: 12m 29s	remaining: 3m 48s
613:	learn: 0.0501588	total: 12m 30s	remaining: 3m 47s
614:	learn: 0.0499527	total: 12m 31s	remaining: 3m 46s
615:	learn: 0.0497621	total: 12m 32s	remaining: 3m 44s
616:	learn: 0.0496161	total: 12m 34s	remaining: 3m 43s
617:	learn: 0.0494999	total: 12m 35s	remaining: 3m 42s
618:	learn: 0.0493337	total: 12m 36s	remaining: 3m 41s
619:	learn: 0.0492030	total: 12m 37s	remaining: 3m 39s
620:	learn: 0.0491116	total: 12m 38s	remaining: 3m 38s
621:	learn: 0.0489550	total: 12m 40s	remaining: 3m 37s
622:	learn: 0.0487685	total: 12m 41s	remaining: 3m 36s
623:	learn: 0.0486062	total: 12m 42s	remaining: 3m 35s
624:	learn: 0.0484597	total: 12m 43s	remaining: 3m 33s
625:	learn: 0.0483865	total: 12m 45s	remaining: 3m 32s
626:	learn: 0.0482828	total: 12m 46s	remaining: 3m 31s
627:	learn

760:	learn: 0.0350499	total: 15m 30s	remaining: 47.7s
761:	learn: 0.0349716	total: 15m 31s	remaining: 46.5s
762:	learn: 0.0349228	total: 15m 32s	remaining: 45.2s
763:	learn: 0.0348158	total: 15m 33s	remaining: 44s
764:	learn: 0.0347494	total: 15m 35s	remaining: 42.8s
765:	learn: 0.0346664	total: 15m 36s	remaining: 41.6s
766:	learn: 0.0345535	total: 15m 37s	remaining: 40.3s
767:	learn: 0.0344153	total: 15m 38s	remaining: 39.1s
768:	learn: 0.0343170	total: 15m 40s	remaining: 37.9s
769:	learn: 0.0342694	total: 15m 41s	remaining: 36.7s
770:	learn: 0.0341895	total: 15m 42s	remaining: 35.5s
771:	learn: 0.0341176	total: 15m 43s	remaining: 34.2s
772:	learn: 0.0340488	total: 15m 45s	remaining: 33s
773:	learn: 0.0339588	total: 15m 46s	remaining: 31.8s
774:	learn: 0.0339067	total: 15m 47s	remaining: 30.6s
775:	learn: 0.0338362	total: 15m 48s	remaining: 29.3s
776:	learn: 0.0337967	total: 15m 49s	remaining: 28.1s
777:	learn: 0.0336490	total: 15m 51s	remaining: 26.9s
778:	learn: 0.0335423	total: 15m

RandomizedSearchCV(cv=<generator object _BaseKFold.split at 0x000001B3B73170C8>,
                   estimator=<catboost.core.CatBoostClassifier object at 0x000001B3B72FD148>,
                   n_iter=5, n_jobs=-1,
                   param_distributions={'depth': [6], 'iterations': [800],
                                        'leaf_estimation_iterations': [10],
                                        'random_seed': [42]},
                   random_state=42, scoring='f1_weighted', verbose=3)

In [None]:
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('preds/submission-random-grid-search-catboost.csv', index=False, header=['Culture'])

In [None]:
train.drop(['0','1','2'],axis=1,inplace=True)
test.drop(['0','1','2'],axis=1,inplace=True)

In [None]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train,y_train), verbose=3, random_state=42 )
random_search.fit(train,y_train)
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('preds/submission-random-grid-search-catboost_290.csv', index=False, header=['Culture'])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed: 69.3min remaining: 103.9min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 69.9min finished


0:	learn: 3.1632428	total: 1.48s	remaining: 19m 43s
1:	learn: 3.0046057	total: 2.85s	remaining: 18m 55s
2:	learn: 2.8785129	total: 4.14s	remaining: 18m 20s
3:	learn: 2.7470170	total: 5.55s	remaining: 18m 25s
4:	learn: 2.6228268	total: 6.88s	remaining: 18m 14s
5:	learn: 2.5016085	total: 8.25s	remaining: 18m 12s
6:	learn: 2.3778618	total: 9.59s	remaining: 18m 6s
7:	learn: 2.2660537	total: 10.9s	remaining: 17m 57s
8:	learn: 2.1580136	total: 12.1s	remaining: 17m 45s
9:	learn: 2.0635516	total: 13.4s	remaining: 17m 42s
10:	learn: 1.9653023	total: 14.9s	remaining: 17m 48s
11:	learn: 1.8797831	total: 16.2s	remaining: 17m 45s
12:	learn: 1.8003884	total: 17.5s	remaining: 17m 36s
13:	learn: 1.7205699	total: 18.7s	remaining: 17m 30s
14:	learn: 1.6465401	total: 19.9s	remaining: 17m 22s
15:	learn: 1.5790366	total: 21.2s	remaining: 17m 18s
16:	learn: 1.5199511	total: 22.6s	remaining: 17m 21s
17:	learn: 1.4656457	total: 23.9s	remaining: 17m 18s
18:	learn: 1.4139395	total: 25.2s	remaining: 17m 14s
19:	

154:	learn: 0.2988826	total: 3m 11s	remaining: 13m 18s
155:	learn: 0.2970145	total: 3m 13s	remaining: 13m 17s
156:	learn: 0.2957600	total: 3m 14s	remaining: 13m 15s
157:	learn: 0.2944118	total: 3m 15s	remaining: 13m 14s
158:	learn: 0.2922998	total: 3m 16s	remaining: 13m 13s
159:	learn: 0.2901739	total: 3m 17s	remaining: 13m 11s
160:	learn: 0.2885199	total: 3m 19s	remaining: 13m 10s
161:	learn: 0.2875347	total: 3m 20s	remaining: 13m 8s
162:	learn: 0.2857581	total: 3m 21s	remaining: 13m 7s
163:	learn: 0.2840724	total: 3m 22s	remaining: 13m 6s
164:	learn: 0.2826227	total: 3m 23s	remaining: 13m 4s
165:	learn: 0.2811875	total: 3m 25s	remaining: 13m 3s
166:	learn: 0.2795513	total: 3m 26s	remaining: 13m 1s
167:	learn: 0.2771919	total: 3m 27s	remaining: 13m
168:	learn: 0.2763536	total: 3m 28s	remaining: 12m 59s
169:	learn: 0.2749355	total: 3m 29s	remaining: 12m 57s
170:	learn: 0.2741810	total: 3m 31s	remaining: 12m 56s
171:	learn: 0.2731607	total: 3m 32s	remaining: 12m 55s
172:	learn: 0.271876

305:	learn: 0.1506587	total: 6m 15s	remaining: 10m 6s
306:	learn: 0.1499987	total: 6m 16s	remaining: 10m 5s
307:	learn: 0.1493461	total: 6m 18s	remaining: 10m 3s
308:	learn: 0.1486576	total: 6m 19s	remaining: 10m 2s
309:	learn: 0.1481544	total: 6m 20s	remaining: 10m 1s
310:	learn: 0.1475082	total: 6m 21s	remaining: 10m
311:	learn: 0.1470206	total: 6m 22s	remaining: 9m 59s
312:	learn: 0.1466997	total: 6m 24s	remaining: 9m 57s
313:	learn: 0.1458271	total: 6m 25s	remaining: 9m 56s
314:	learn: 0.1455527	total: 6m 26s	remaining: 9m 55s
315:	learn: 0.1453475	total: 6m 27s	remaining: 9m 54s
316:	learn: 0.1450899	total: 6m 29s	remaining: 9m 52s
317:	learn: 0.1446001	total: 6m 30s	remaining: 9m 51s
318:	learn: 0.1439396	total: 6m 31s	remaining: 9m 50s
319:	learn: 0.1434105	total: 6m 32s	remaining: 9m 49s
320:	learn: 0.1430742	total: 6m 33s	remaining: 9m 47s
321:	learn: 0.1425403	total: 6m 35s	remaining: 9m 46s
322:	learn: 0.1419306	total: 6m 36s	remaining: 9m 45s
323:	learn: 0.1415249	total: 6m

458:	learn: 0.0913491	total: 9m 21s	remaining: 6m 56s
459:	learn: 0.0910335	total: 9m 22s	remaining: 6m 55s
460:	learn: 0.0905995	total: 9m 23s	remaining: 6m 54s
461:	learn: 0.0903807	total: 9m 24s	remaining: 6m 53s
462:	learn: 0.0901916	total: 9m 25s	remaining: 6m 51s
463:	learn: 0.0900460	total: 9m 27s	remaining: 6m 50s
464:	learn: 0.0897502	total: 9m 28s	remaining: 6m 49s
465:	learn: 0.0893389	total: 9m 29s	remaining: 6m 48s
466:	learn: 0.0890495	total: 9m 30s	remaining: 6m 46s
467:	learn: 0.0886714	total: 9m 31s	remaining: 6m 45s
468:	learn: 0.0883498	total: 9m 33s	remaining: 6m 44s
469:	learn: 0.0880347	total: 9m 34s	remaining: 6m 43s
470:	learn: 0.0878214	total: 9m 35s	remaining: 6m 42s
471:	learn: 0.0874677	total: 9m 36s	remaining: 6m 40s
472:	learn: 0.0871210	total: 9m 37s	remaining: 6m 39s
473:	learn: 0.0868602	total: 9m 39s	remaining: 6m 38s
474:	learn: 0.0864477	total: 9m 40s	remaining: 6m 37s
475:	learn: 0.0862709	total: 9m 41s	remaining: 6m 35s
476:	learn: 0.0860436	total:

609:	learn: 0.0603845	total: 12m 24s	remaining: 3m 51s
610:	learn: 0.0602470	total: 12m 25s	remaining: 3m 50s
611:	learn: 0.0600980	total: 12m 26s	remaining: 3m 49s
612:	learn: 0.0599753	total: 12m 27s	remaining: 3m 48s
613:	learn: 0.0598348	total: 12m 28s	remaining: 3m 46s
614:	learn: 0.0597314	total: 12m 30s	remaining: 3m 45s
615:	learn: 0.0596364	total: 12m 31s	remaining: 3m 44s
616:	learn: 0.0593738	total: 12m 32s	remaining: 3m 43s
617:	learn: 0.0592897	total: 12m 33s	remaining: 3m 41s
618:	learn: 0.0590777	total: 12m 34s	remaining: 3m 40s
619:	learn: 0.0589567	total: 12m 36s	remaining: 3m 39s
620:	learn: 0.0587800	total: 12m 37s	remaining: 3m 38s
621:	learn: 0.0586380	total: 12m 38s	remaining: 3m 37s
622:	learn: 0.0585044	total: 12m 39s	remaining: 3m 35s
623:	learn: 0.0583419	total: 12m 41s	remaining: 3m 34s
624:	learn: 0.0582250	total: 12m 42s	remaining: 3m 33s
625:	learn: 0.0580458	total: 12m 43s	remaining: 3m 32s
626:	learn: 0.0578335	total: 12m 44s	remaining: 3m 30s
627:	learn

760:	learn: 0.0416704	total: 15m 27s	remaining: 47.5s
761:	learn: 0.0416418	total: 15m 28s	remaining: 46.3s
762:	learn: 0.0415298	total: 15m 30s	remaining: 45.1s
763:	learn: 0.0414352	total: 15m 31s	remaining: 43.9s
764:	learn: 0.0414041	total: 15m 32s	remaining: 42.7s
765:	learn: 0.0413143	total: 15m 33s	remaining: 41.4s
766:	learn: 0.0411865	total: 15m 34s	remaining: 40.2s
767:	learn: 0.0411307	total: 15m 36s	remaining: 39s
768:	learn: 0.0410208	total: 15m 37s	remaining: 37.8s
769:	learn: 0.0409437	total: 15m 38s	remaining: 36.6s
770:	learn: 0.0408498	total: 15m 39s	remaining: 35.3s
771:	learn: 0.0407328	total: 15m 40s	remaining: 34.1s
772:	learn: 0.0406428	total: 15m 42s	remaining: 32.9s
773:	learn: 0.0405511	total: 15m 43s	remaining: 31.7s
774:	learn: 0.0404520	total: 15m 44s	remaining: 30.5s
775:	learn: 0.0403873	total: 15m 45s	remaining: 29.3s
776:	learn: 0.0403159	total: 15m 47s	remaining: 28s
777:	learn: 0.0402254	total: 15m 48s	remaining: 26.8s
778:	learn: 0.0401390	total: 15m

In [None]:
train = pd.read_csv('train_290_without_low1.csv')
test = pd.read_csv('test_dropped_290_without_nans.csv')
print(train.shape, test.shape)

(2828, 294) (939, 293)


In [None]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train.drop('Culture',axis=1),train['Culture']), verbose=3, random_state=42 )
random_search.fit(train.drop('Culture',axis=1),train['Culture'])
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 93.5min finished


0:	learn: 2.9428163	total: 1.19s	remaining: 19m 44s
1:	learn: 2.8008257	total: 2.34s	remaining: 19m 28s
2:	learn: 2.6766670	total: 3.52s	remaining: 19m 29s
3:	learn: 2.5618067	total: 4.68s	remaining: 19m 24s
4:	learn: 2.4418281	total: 5.84s	remaining: 19m 23s
5:	learn: 2.3270490	total: 7s	remaining: 19m 19s
6:	learn: 2.2192367	total: 8.13s	remaining: 19m 13s
7:	learn: 2.1119806	total: 9.3s	remaining: 19m 13s
8:	learn: 2.0079923	total: 10.4s	remaining: 19m 9s
9:	learn: 1.9158916	total: 11.6s	remaining: 19m 8s
10:	learn: 1.8330550	total: 12.8s	remaining: 19m 10s
11:	learn: 1.7538018	total: 13.9s	remaining: 19m 7s
12:	learn: 1.6787777	total: 15.1s	remaining: 19m 6s
13:	learn: 1.6085554	total: 16.3s	remaining: 19m 4s
14:	learn: 1.5430247	total: 17.4s	remaining: 19m 3s
15:	learn: 1.4812250	total: 18.6s	remaining: 19m 1s
16:	learn: 1.4218530	total: 19.7s	remaining: 19m
17:	learn: 1.3732509	total: 20.9s	remaining: 18m 59s
18:	learn: 1.3230615	total: 22.1s	remaining: 18m 59s
19:	learn: 1.28429

In [None]:
pd.Series(predss).to_csv('catboost_290_1.csv', index=False, header=['Culture'])

In [None]:
train = pd.read_csv('train_290_without_low2.csv')
test = pd.read_csv('test_dropped_290_without_nans.csv')
print(train.shape, test.shape)

(2794, 294) (939, 293)


In [None]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train.drop('Culture',axis=1),train['Culture']), verbose=3, random_state=42 )
random_search.fit(train.drop('Culture',axis=1),train['Culture'])
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('catboost_290_2.csv', index=False, header=['Culture'])

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 71.6min finished


0:	learn: 2.6987850	total: 916ms	remaining: 15m 14s
1:	learn: 2.5621761	total: 1.84s	remaining: 15m 17s
2:	learn: 2.4371023	total: 2.76s	remaining: 15m 16s
3:	learn: 2.3231288	total: 3.65s	remaining: 15m 8s
4:	learn: 2.2116407	total: 4.55s	remaining: 15m 4s
5:	learn: 2.1049413	total: 5.48s	remaining: 15m 7s
6:	learn: 1.9995491	total: 6.38s	remaining: 15m 5s
7:	learn: 1.8965648	total: 7.29s	remaining: 15m 4s
8:	learn: 1.8047545	total: 8.2s	remaining: 15m 2s
9:	learn: 1.7297489	total: 9.14s	remaining: 15m 4s
10:	learn: 1.6565991	total: 10s	remaining: 15m 2s
11:	learn: 1.5902212	total: 11s	remaining: 15m 1s
12:	learn: 1.5209215	total: 11.9s	remaining: 15m 1s
13:	learn: 1.4596145	total: 12.8s	remaining: 15m
14:	learn: 1.3994418	total: 13.7s	remaining: 14m 58s
15:	learn: 1.3468757	total: 14.6s	remaining: 14m 57s
16:	learn: 1.2997156	total: 15.5s	remaining: 14m 55s
17:	learn: 1.2513589	total: 16.4s	remaining: 14m 55s
18:	learn: 1.2095788	total: 17.3s	remaining: 14m 53s
19:	learn: 1.1724305	t

In [7]:
train = pd.read_csv('train_pca_0.9999999.csv')
test = pd.read_csv('test_pca_0.9999999.csv')
print(train.shape, test.shape)

(2828, 258) (939, 257)


In [8]:
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,0.1,1.1,2.1,Culture
0,4.644999,-1.171835,-0.081057,-0.125770,0.254611,0.226908,-0.009404,-0.265658,-0.015255,-0.025979,0.016203,-0.068886,-0.009408,-0.023298,-0.137708,0.054728,-0.037730,0.014789,-0.033191,-0.012056,0.025822,-0.002696,0.008628,0.020674,-0.035934,-0.011617,0.038664,0.032714,-0.020640,0.025445,0.030165,0.041540,0.012404,0.010338,0.015300,0.007940,-0.008166,0.005004,-0.020457,-0.004794,...,-0.000063,-0.000203,-0.000011,-0.000070,-0.000019,0.000233,0.000109,-0.000055,0.000049,0.000064,-0.000035,0.000070,-0.000070,0.000037,-0.000159,-0.000058,-0.000041,0.000036,-0.000012,-0.000147,0.000069,-9.515302e-07,0.000038,0.000033,-0.000043,-0.000153,0.000069,-0.000065,0.000107,-0.000011,-0.000046,0.000005,-0.000019,-0.000065,0.000147,0.000163,2018.0,517.0,48.64,4
1,-2.485984,-0.186353,-1.274751,0.825462,0.331752,-0.499354,-0.161753,-0.192708,0.221191,0.177418,-0.044281,-0.203562,0.598316,0.120112,-0.106679,0.011605,0.044220,0.243228,0.301607,-0.022361,-0.092866,-0.014104,0.056771,0.132678,0.004705,-0.044727,0.030074,-0.039683,0.041885,0.003817,0.064115,-0.021486,-0.031173,0.019309,-0.026486,-0.006856,-0.038107,0.012808,-0.025417,0.007186,...,-0.000480,-0.000263,0.000582,-0.000146,0.000632,0.000252,0.000504,-0.000072,0.000233,-0.000165,-0.000216,-0.000583,-0.000270,0.000449,-0.000322,0.000989,0.000372,0.000058,0.000094,-0.000553,0.000564,1.000692e-03,0.000180,-0.000568,0.000897,-0.000260,-0.000585,-0.000315,-0.000339,0.000061,-0.000704,-0.000472,0.000432,-0.000059,-0.000032,0.000327,2019.0,1395.0,80.41,17
2,-2.730865,-0.298112,0.203354,-0.638905,-0.765642,0.268721,0.748522,-0.193241,0.182282,-0.145103,-0.138574,-0.025066,0.134268,0.089305,-0.017930,-0.043172,0.153086,0.061300,0.006876,0.056258,0.005934,-0.007576,-0.008665,0.029937,-0.033080,-0.034185,-0.012714,0.016667,0.025733,0.007033,0.017506,-0.018954,0.013881,0.012255,0.019117,0.009602,-0.002400,0.017691,0.015124,0.030314,...,0.000227,0.000136,-0.000026,-0.000142,-0.000063,-0.000193,0.000006,0.000155,-0.000016,0.000057,-0.000018,0.000008,0.000291,-0.000229,0.000009,-0.000226,0.000216,0.000157,-0.000127,-0.000051,-0.000128,1.135418e-04,-0.000251,-0.000152,-0.000060,0.000074,-0.000142,-0.000035,-0.000045,-0.000071,-0.000065,-0.000168,-0.000186,-0.000012,0.000149,-0.000063,2017.0,44.0,43.64,2
3,3.204312,-0.274076,0.840615,0.594383,-1.140873,-0.472444,-0.460494,0.421908,0.721556,-0.056575,0.020825,0.017103,0.125200,-0.096451,-0.160459,-0.053573,-0.046299,-0.043165,0.061827,-0.070974,-0.083550,0.014755,-0.207426,-0.119508,0.084571,-0.121902,0.008568,0.049005,0.008818,-0.039734,0.010878,-0.005138,-0.007987,0.041235,-0.013687,0.034825,0.009069,-0.001665,-0.006770,0.018122,...,-0.000734,-0.000446,-0.000207,0.000012,0.000317,0.000215,0.000141,-0.000166,0.000085,-0.000255,-0.000391,0.000091,-0.000239,0.000284,-0.000069,0.000261,0.000002,0.000026,-0.000058,-0.000221,0.000009,-1.416930e-04,-0.000380,-0.000070,0.000336,-0.000161,0.000205,0.000308,0.000028,-0.000187,-0.000098,-0.000004,-0.000291,-0.000014,0.000011,-0.000124,2018.0,1591.0,79.34,4
4,-2.360852,1.509807,-1.100292,-0.678479,0.642358,0.218669,-0.525703,-0.450308,0.220883,-0.461842,-0.396663,-0.267731,0.103251,-0.066329,0.105321,-0.076317,-0.115595,0.042029,-0.008253,0.009648,-0.020119,-0.087029,-0.041499,0.058610,-0.040561,-0.036400,-0.018470,-0.002518,0.024417,-0.002593,-0.003610,-0.058575,0.076010,0.018805,-0.017250,-0.015220,-0.012102,0.005074,-0.001156,-0.022222,...,0.000081,-0.000182,-0.000154,-0.000007,0.000604,0.000161,-0.000019,-0.000257,0.000262,-0.000224,0.000532,-0.000327,-0.000073,-0.000166,0.000639,-0.000395,0.000338,0.000077,-0.000131,-0.000283,0.000242,-5.167358e-05,0.000301,-0.000345,-0.000329,0.000101,0.000027,-0.000118,-0.000069,0.000108,0.000245,0.000063,0.000052,0.000158,0.000350,-0.000120,2017.0,681.0,144.76,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2823,3.370614,-0.443714,0.743663,0.624816,0.236538,0.262859,0.499478,-0.501979,0.144991,0.049731,-0.116549,0.019021,0.080422,0.031500,0.061619,-0.058139,0.032976,-0.097002,0.091336,-0.086628,0.083729,0.032955,0.001899,-0.025549,-0.001237,-0.028952,-0.011327,-0.012894,0.007812,0.008710,-0.008412,-0.007385,-0.034227,0.002076,0.002490,-0.011214,0.008141,0.020163,0.014252,-0.012129,...,0.000813,-0.000151,-0.000647,-0.000253,-0.000146,-0.000370,0.000387,-0.000088,-0.000137,-0.000097,0.000598,0.000809,-0.000321,-0.000192,0.000044,0.000398,0.000782,-0.000417,0.000172,-0.000715,0.000152,7.715699e-05,-0.000515,-0.000271,0.000014,0.000425,-0.000182,-0.000033,0.000456,0.000211,-0.000011,-0.000281,-0.000058,-0.000075,-0.000190,-0.000283,2019.0,1631.0,117.04,4
2824,2.034094,0.137510,-1.211978,-0.005544,-0.689247,0.510110,-0.142857,-0.368863,-0.026139,-0.132320,-0.122130,0.297841,0.158232,-0.002575,-0.153293,-0.029523,-0.001669,-0.114086,0.016813,0.002493,-0.024617,-0.027669,0.028803,-0.005375,-0.028931,-0.006806,-0.001378,-0.037319,0.010228,0.038686,-0.042297,-0.010439,0.005147,0.001254,0.018116,0.018807,-0.007728,-0.012220,-0.000367,0.018276,...,0.000015,-0.000125,0.000029,0.000108,-0.000136,-0.000022,0.000072,-0.000137,-0.000141,0.000093,-0.000367,-0.000141,0.000012,0.000047,0.000101,0.000128,-0.000389,0.000020,-0.000053,-0.000069,-0.000110,-1.468822e-04,0.000042,0.000002,-0.000006,0.000150,0.000050,-0.000038,-0.000073,0.000139,0.000077,-0.000138,0.000164,0.000070,0.000177,0.000029,2016.0,336.0,70.61,4
2825,1.537344,0.073370,-0.965067,-0.083822,-0.955435,0.014964,-0.471531,-0.440293,-0.150382,-0.043515,0.016796,0.180172,0.181872,0.212831,-0.011235,-0.072618,-0.065670,-0.033378,-0.062807,0.011719,0.040706,-0.053006,-0.020851,-0.008425,0.005397,-0.026594,0.000001,0.024724,0.001845,-0.011136,-0.007368,0.004660,-0.013792,-0.018961,0.011884,0.026090,0.000179,-0.014206,-0.018028,0.016305,...,-0.000477,0.000127,-0.000080,-0.000082,0.000121,0.000218,-0.000119,-0.000128,0.000127,-0.000015,0.000333,-0.000058,0.000087,-0.000214,-0.000166,-0.000247,0.000866,-0.000087,-0.000077,0.000515,-0.000042,-2.109630e-05,0.000024,0.000290,-0.000121,-0.000351,-0.000220,-0.000459,-0.000012,0.000052,0.000063,0.000021,0.000113,0.000269,-0.000255,-0.000309,2016.0,371.0,60.26,4
2826,-1.134091,1.653277,0.014823,-1.263081,-0.546080,0.447197,0.111064,0.299207,-0.104738,0.193487,-0.061365,-0.233216,0.217556,-0.136962,0.018890,0.213283,-0.145937,-0.018788,-0.001809,0.106127,0.029090,0.029836,-0.034795,0.010378,0.029009,-0.002061,0.027027,-0.039157,-0.041596,0.014723,0.010572,0.026422,0.021979,0.034760,-0.009386,0.007934,0.013733,-0.008039,0.018282,-0.004562,...,-0.000380,0.000180,0.000082,-0.000321,0.000819,-0.000919,0.000650,-0.000899,-0.000809,0.000265,-0.000597,0.000642,0.000599,-0.000244,-0.000119,0.000188,-0.000248,0.000504,0.000901,0.000495,0.000076,3.338460e-04,0.000241,0.000189,-0.000206,0.000132,0.000211,-0.000367,-0.000379,-0.000060,-0.000106,-0.000559,-0.000199,-0.000296,0.000466,0.000227,2017.0,115.0,237.86,5


In [10]:
clf = CatBoostClassifier(loss_function='MultiClass')
params = {'iterations': [1000],
          'depth': [6],
          'leaf_estimation_iterations': [10],
          'random_seed': [42]
         }
clf_grid = GridSearchCV(estimator=clf, param_grid=params, scoring='f1_weighted', cv=5)

In [13]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = GridSearchCV(clf, param_grid=params, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train.drop('Culture',axis=1),train['Culture']), verbose=3 )
random_search.fit(train.drop('Culture',axis=1),train['Culture'])
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('catboost_pca1.csv', index=False, header=['Culture'])

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 85.4min finished


0:	learn: 2.9792436	total: 1.11s	remaining: 18m 33s
1:	learn: 2.8777136	total: 2.19s	remaining: 18m 14s
2:	learn: 2.7799671	total: 3.26s	remaining: 18m 3s
3:	learn: 2.6816782	total: 4.33s	remaining: 17m 57s
4:	learn: 2.5815662	total: 5.45s	remaining: 18m 4s
5:	learn: 2.4858483	total: 6.54s	remaining: 18m 2s
6:	learn: 2.4062350	total: 7.62s	remaining: 18m
7:	learn: 2.3197431	total: 8.73s	remaining: 18m 3s
8:	learn: 2.2325245	total: 9.82s	remaining: 18m 1s
9:	learn: 2.1370975	total: 10.9s	remaining: 17m 59s
10:	learn: 2.0527659	total: 12s	remaining: 17m 57s
11:	learn: 1.9930497	total: 13.1s	remaining: 17m 54s
12:	learn: 1.9229106	total: 14.1s	remaining: 17m 51s
13:	learn: 1.8641417	total: 15.2s	remaining: 17m 51s
14:	learn: 1.7990716	total: 16.3s	remaining: 17m 49s
15:	learn: 1.7439749	total: 17.4s	remaining: 17m 48s
16:	learn: 1.6918833	total: 18.4s	remaining: 17m 46s
17:	learn: 1.6393551	total: 19.5s	remaining: 17m 45s
18:	learn: 1.5874443	total: 20.6s	remaining: 17m 42s
19:	learn: 1.5

In [None]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = GridSearchCV(clf, param_grid=params, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train.drop('Culture',axis=1),train['Culture']), verbose=3 )
random_search.fit(train.drop('Culture',axis=1),train['Culture'])
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('catboost_pca1.csv', index=False, header=['Culture'])