In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report, make_scorer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

In [2]:
from catboost import CatBoostClassifier, Pool

In [3]:
clf = CatBoostClassifier(loss_function='MultiClass')
params = {'iterations': [800],
          'depth': [6],
          'leaf_estimation_iterations': [10],
          'random_seed': [42]
         }
clf_grid = GridSearchCV(estimator=clf, param_grid=params, scoring='f1_weighted', cv=5)

In [5]:
train = pd.read_csv('data/train_dropped_290_without_nans.csv')
test = pd.read_csv('data/test_dropped_290_without_nans.csv')
y_train=pd.read_csv('data/y_train.csv')['Culture']
print(train.shape, test.shape)

(2838, 293) (939, 293)


In [15]:
folds = 3
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train,y_train), verbose=3, random_state=42 )
random_search.fit(train,y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed: 41.4min finished


0:	learn: 3.1229551	total: 1.22s	remaining: 16m 12s
1:	learn: 2.9682540	total: 2.45s	remaining: 16m 16s
2:	learn: 2.8213513	total: 3.67s	remaining: 16m 14s
3:	learn: 2.6825932	total: 4.91s	remaining: 16m 16s
4:	learn: 2.5539568	total: 6.15s	remaining: 16m 18s
5:	learn: 2.4313178	total: 7.37s	remaining: 16m 14s
6:	learn: 2.3102687	total: 8.58s	remaining: 16m 11s
7:	learn: 2.2009803	total: 9.79s	remaining: 16m 9s
8:	learn: 2.0940835	total: 11s	remaining: 16m 5s
9:	learn: 1.9980595	total: 12.2s	remaining: 16m 4s
10:	learn: 1.9121805	total: 13.5s	remaining: 16m 5s
11:	learn: 1.8298829	total: 14.7s	remaining: 16m 3s
12:	learn: 1.7478976	total: 15.9s	remaining: 16m 2s
13:	learn: 1.6704577	total: 17.1s	remaining: 16m
14:	learn: 1.6065779	total: 18.3s	remaining: 15m 58s
15:	learn: 1.5429942	total: 19.6s	remaining: 15m 58s
16:	learn: 1.4843661	total: 20.8s	remaining: 15m 56s
17:	learn: 1.4276177	total: 22s	remaining: 15m 55s
18:	learn: 1.3748486	total: 23.2s	remaining: 15m 53s
19:	learn: 1.3273

154:	learn: 0.2852596	total: 3m 9s	remaining: 13m 8s
155:	learn: 0.2837598	total: 3m 10s	remaining: 13m 7s
156:	learn: 0.2818525	total: 3m 11s	remaining: 13m 6s
157:	learn: 0.2806770	total: 3m 13s	remaining: 13m 4s
158:	learn: 0.2781964	total: 3m 14s	remaining: 13m 3s
159:	learn: 0.2770767	total: 3m 15s	remaining: 13m 2s
160:	learn: 0.2755429	total: 3m 16s	remaining: 13m 1s
161:	learn: 0.2741083	total: 3m 18s	remaining: 12m 59s
162:	learn: 0.2726059	total: 3m 19s	remaining: 12m 58s
163:	learn: 0.2707880	total: 3m 20s	remaining: 12m 57s
164:	learn: 0.2693995	total: 3m 21s	remaining: 12m 55s
165:	learn: 0.2681273	total: 3m 22s	remaining: 12m 54s
166:	learn: 0.2668969	total: 3m 24s	remaining: 12m 53s
167:	learn: 0.2653102	total: 3m 25s	remaining: 12m 52s
168:	learn: 0.2641456	total: 3m 26s	remaining: 12m 51s
169:	learn: 0.2629652	total: 3m 27s	remaining: 12m 49s
170:	learn: 0.2609013	total: 3m 28s	remaining: 12m 48s
171:	learn: 0.2590873	total: 3m 30s	remaining: 12m 47s
172:	learn: 0.2579

305:	learn: 0.1392591	total: 6m 14s	remaining: 10m 4s
306:	learn: 0.1387256	total: 6m 15s	remaining: 10m 3s
307:	learn: 0.1383294	total: 6m 16s	remaining: 10m 1s
308:	learn: 0.1372472	total: 6m 17s	remaining: 10m
309:	learn: 0.1366893	total: 6m 19s	remaining: 9m 59s
310:	learn: 0.1362436	total: 6m 20s	remaining: 9m 58s
311:	learn: 0.1356434	total: 6m 21s	remaining: 9m 56s
312:	learn: 0.1352605	total: 6m 22s	remaining: 9m 55s
313:	learn: 0.1341863	total: 6m 24s	remaining: 9m 54s
314:	learn: 0.1335198	total: 6m 25s	remaining: 9m 53s
315:	learn: 0.1329723	total: 6m 26s	remaining: 9m 52s
316:	learn: 0.1325563	total: 6m 27s	remaining: 9m 50s
317:	learn: 0.1319578	total: 6m 28s	remaining: 9m 49s
318:	learn: 0.1316582	total: 6m 30s	remaining: 9m 48s
319:	learn: 0.1312023	total: 6m 31s	remaining: 9m 47s
320:	learn: 0.1308175	total: 6m 32s	remaining: 9m 45s
321:	learn: 0.1303081	total: 6m 33s	remaining: 9m 44s
322:	learn: 0.1300386	total: 6m 35s	remaining: 9m 43s
323:	learn: 0.1296014	total: 6m

458:	learn: 0.0801934	total: 9m 21s	remaining: 6m 56s
459:	learn: 0.0798755	total: 9m 22s	remaining: 6m 55s
460:	learn: 0.0796000	total: 9m 23s	remaining: 6m 54s
461:	learn: 0.0793513	total: 9m 24s	remaining: 6m 53s
462:	learn: 0.0790814	total: 9m 25s	remaining: 6m 51s
463:	learn: 0.0787574	total: 9m 27s	remaining: 6m 50s
464:	learn: 0.0784513	total: 9m 28s	remaining: 6m 49s
465:	learn: 0.0780412	total: 9m 29s	remaining: 6m 48s
466:	learn: 0.0777197	total: 9m 30s	remaining: 6m 47s
467:	learn: 0.0774305	total: 9m 32s	remaining: 6m 45s
468:	learn: 0.0770370	total: 9m 33s	remaining: 6m 44s
469:	learn: 0.0766294	total: 9m 34s	remaining: 6m 43s
470:	learn: 0.0764968	total: 9m 35s	remaining: 6m 42s
471:	learn: 0.0763357	total: 9m 37s	remaining: 6m 40s
472:	learn: 0.0760860	total: 9m 38s	remaining: 6m 39s
473:	learn: 0.0758943	total: 9m 39s	remaining: 6m 38s
474:	learn: 0.0755476	total: 9m 40s	remaining: 6m 37s
475:	learn: 0.0750862	total: 9m 41s	remaining: 6m 36s
476:	learn: 0.0749833	total:

609:	learn: 0.0508535	total: 12m 25s	remaining: 3m 52s
610:	learn: 0.0507318	total: 12m 26s	remaining: 3m 51s
611:	learn: 0.0505511	total: 12m 28s	remaining: 3m 49s
612:	learn: 0.0504066	total: 12m 29s	remaining: 3m 48s
613:	learn: 0.0501588	total: 12m 30s	remaining: 3m 47s
614:	learn: 0.0499527	total: 12m 31s	remaining: 3m 46s
615:	learn: 0.0497621	total: 12m 32s	remaining: 3m 44s
616:	learn: 0.0496161	total: 12m 34s	remaining: 3m 43s
617:	learn: 0.0494999	total: 12m 35s	remaining: 3m 42s
618:	learn: 0.0493337	total: 12m 36s	remaining: 3m 41s
619:	learn: 0.0492030	total: 12m 37s	remaining: 3m 39s
620:	learn: 0.0491116	total: 12m 38s	remaining: 3m 38s
621:	learn: 0.0489550	total: 12m 40s	remaining: 3m 37s
622:	learn: 0.0487685	total: 12m 41s	remaining: 3m 36s
623:	learn: 0.0486062	total: 12m 42s	remaining: 3m 35s
624:	learn: 0.0484597	total: 12m 43s	remaining: 3m 33s
625:	learn: 0.0483865	total: 12m 45s	remaining: 3m 32s
626:	learn: 0.0482828	total: 12m 46s	remaining: 3m 31s
627:	learn

760:	learn: 0.0350499	total: 15m 30s	remaining: 47.7s
761:	learn: 0.0349716	total: 15m 31s	remaining: 46.5s
762:	learn: 0.0349228	total: 15m 32s	remaining: 45.2s
763:	learn: 0.0348158	total: 15m 33s	remaining: 44s
764:	learn: 0.0347494	total: 15m 35s	remaining: 42.8s
765:	learn: 0.0346664	total: 15m 36s	remaining: 41.6s
766:	learn: 0.0345535	total: 15m 37s	remaining: 40.3s
767:	learn: 0.0344153	total: 15m 38s	remaining: 39.1s
768:	learn: 0.0343170	total: 15m 40s	remaining: 37.9s
769:	learn: 0.0342694	total: 15m 41s	remaining: 36.7s
770:	learn: 0.0341895	total: 15m 42s	remaining: 35.5s
771:	learn: 0.0341176	total: 15m 43s	remaining: 34.2s
772:	learn: 0.0340488	total: 15m 45s	remaining: 33s
773:	learn: 0.0339588	total: 15m 46s	remaining: 31.8s
774:	learn: 0.0339067	total: 15m 47s	remaining: 30.6s
775:	learn: 0.0338362	total: 15m 48s	remaining: 29.3s
776:	learn: 0.0337967	total: 15m 49s	remaining: 28.1s
777:	learn: 0.0336490	total: 15m 51s	remaining: 26.9s
778:	learn: 0.0335423	total: 15m

RandomizedSearchCV(cv=<generator object _BaseKFold.split at 0x000001B3B73170C8>,
                   estimator=<catboost.core.CatBoostClassifier object at 0x000001B3B72FD148>,
                   n_iter=5, n_jobs=-1,
                   param_distributions={'depth': [6], 'iterations': [800],
                                        'leaf_estimation_iterations': [10],
                                        'random_seed': [42]},
                   random_state=42, scoring='f1_weighted', verbose=3)

In [16]:
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('preds/submission-random-grid-search-catboost.csv', index=False, header=['Culture'])

In [6]:
train.drop(['0','1','2'],axis=1,inplace=True)
test.drop(['0','1','2'],axis=1,inplace=True)

In [7]:
folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 0)

random_search = RandomizedSearchCV(clf, param_distributions=params, n_iter=param_comb, scoring='f1_weighted', n_jobs=-1, cv=skf.split(train,y_train), verbose=3, random_state=42 )
random_search.fit(train,y_train)
preds = random_search.predict(test)
predss=[]
for i in preds:
    predss.append(i[0])
pd.Series(predss).to_csv('preds/submission-random-grid-search-catboost_290.csv', index=False, header=['Culture'])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed: 69.3min remaining: 103.9min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 69.9min finished


0:	learn: 3.1632428	total: 1.48s	remaining: 19m 43s
1:	learn: 3.0046057	total: 2.85s	remaining: 18m 55s
2:	learn: 2.8785129	total: 4.14s	remaining: 18m 20s
3:	learn: 2.7470170	total: 5.55s	remaining: 18m 25s
4:	learn: 2.6228268	total: 6.88s	remaining: 18m 14s
5:	learn: 2.5016085	total: 8.25s	remaining: 18m 12s
6:	learn: 2.3778618	total: 9.59s	remaining: 18m 6s
7:	learn: 2.2660537	total: 10.9s	remaining: 17m 57s
8:	learn: 2.1580136	total: 12.1s	remaining: 17m 45s
9:	learn: 2.0635516	total: 13.4s	remaining: 17m 42s
10:	learn: 1.9653023	total: 14.9s	remaining: 17m 48s
11:	learn: 1.8797831	total: 16.2s	remaining: 17m 45s
12:	learn: 1.8003884	total: 17.5s	remaining: 17m 36s
13:	learn: 1.7205699	total: 18.7s	remaining: 17m 30s
14:	learn: 1.6465401	total: 19.9s	remaining: 17m 22s
15:	learn: 1.5790366	total: 21.2s	remaining: 17m 18s
16:	learn: 1.5199511	total: 22.6s	remaining: 17m 21s
17:	learn: 1.4656457	total: 23.9s	remaining: 17m 18s
18:	learn: 1.4139395	total: 25.2s	remaining: 17m 14s
19:	

154:	learn: 0.2988826	total: 3m 11s	remaining: 13m 18s
155:	learn: 0.2970145	total: 3m 13s	remaining: 13m 17s
156:	learn: 0.2957600	total: 3m 14s	remaining: 13m 15s
157:	learn: 0.2944118	total: 3m 15s	remaining: 13m 14s
158:	learn: 0.2922998	total: 3m 16s	remaining: 13m 13s
159:	learn: 0.2901739	total: 3m 17s	remaining: 13m 11s
160:	learn: 0.2885199	total: 3m 19s	remaining: 13m 10s
161:	learn: 0.2875347	total: 3m 20s	remaining: 13m 8s
162:	learn: 0.2857581	total: 3m 21s	remaining: 13m 7s
163:	learn: 0.2840724	total: 3m 22s	remaining: 13m 6s
164:	learn: 0.2826227	total: 3m 23s	remaining: 13m 4s
165:	learn: 0.2811875	total: 3m 25s	remaining: 13m 3s
166:	learn: 0.2795513	total: 3m 26s	remaining: 13m 1s
167:	learn: 0.2771919	total: 3m 27s	remaining: 13m
168:	learn: 0.2763536	total: 3m 28s	remaining: 12m 59s
169:	learn: 0.2749355	total: 3m 29s	remaining: 12m 57s
170:	learn: 0.2741810	total: 3m 31s	remaining: 12m 56s
171:	learn: 0.2731607	total: 3m 32s	remaining: 12m 55s
172:	learn: 0.271876

305:	learn: 0.1506587	total: 6m 15s	remaining: 10m 6s
306:	learn: 0.1499987	total: 6m 16s	remaining: 10m 5s
307:	learn: 0.1493461	total: 6m 18s	remaining: 10m 3s
308:	learn: 0.1486576	total: 6m 19s	remaining: 10m 2s
309:	learn: 0.1481544	total: 6m 20s	remaining: 10m 1s
310:	learn: 0.1475082	total: 6m 21s	remaining: 10m
311:	learn: 0.1470206	total: 6m 22s	remaining: 9m 59s
312:	learn: 0.1466997	total: 6m 24s	remaining: 9m 57s
313:	learn: 0.1458271	total: 6m 25s	remaining: 9m 56s
314:	learn: 0.1455527	total: 6m 26s	remaining: 9m 55s
315:	learn: 0.1453475	total: 6m 27s	remaining: 9m 54s
316:	learn: 0.1450899	total: 6m 29s	remaining: 9m 52s
317:	learn: 0.1446001	total: 6m 30s	remaining: 9m 51s
318:	learn: 0.1439396	total: 6m 31s	remaining: 9m 50s
319:	learn: 0.1434105	total: 6m 32s	remaining: 9m 49s
320:	learn: 0.1430742	total: 6m 33s	remaining: 9m 47s
321:	learn: 0.1425403	total: 6m 35s	remaining: 9m 46s
322:	learn: 0.1419306	total: 6m 36s	remaining: 9m 45s
323:	learn: 0.1415249	total: 6m

458:	learn: 0.0913491	total: 9m 21s	remaining: 6m 56s
459:	learn: 0.0910335	total: 9m 22s	remaining: 6m 55s
460:	learn: 0.0905995	total: 9m 23s	remaining: 6m 54s
461:	learn: 0.0903807	total: 9m 24s	remaining: 6m 53s
462:	learn: 0.0901916	total: 9m 25s	remaining: 6m 51s
463:	learn: 0.0900460	total: 9m 27s	remaining: 6m 50s
464:	learn: 0.0897502	total: 9m 28s	remaining: 6m 49s
465:	learn: 0.0893389	total: 9m 29s	remaining: 6m 48s
466:	learn: 0.0890495	total: 9m 30s	remaining: 6m 46s
467:	learn: 0.0886714	total: 9m 31s	remaining: 6m 45s
468:	learn: 0.0883498	total: 9m 33s	remaining: 6m 44s
469:	learn: 0.0880347	total: 9m 34s	remaining: 6m 43s
470:	learn: 0.0878214	total: 9m 35s	remaining: 6m 42s
471:	learn: 0.0874677	total: 9m 36s	remaining: 6m 40s
472:	learn: 0.0871210	total: 9m 37s	remaining: 6m 39s
473:	learn: 0.0868602	total: 9m 39s	remaining: 6m 38s
474:	learn: 0.0864477	total: 9m 40s	remaining: 6m 37s
475:	learn: 0.0862709	total: 9m 41s	remaining: 6m 35s
476:	learn: 0.0860436	total:

609:	learn: 0.0603845	total: 12m 24s	remaining: 3m 51s
610:	learn: 0.0602470	total: 12m 25s	remaining: 3m 50s
611:	learn: 0.0600980	total: 12m 26s	remaining: 3m 49s
612:	learn: 0.0599753	total: 12m 27s	remaining: 3m 48s
613:	learn: 0.0598348	total: 12m 28s	remaining: 3m 46s
614:	learn: 0.0597314	total: 12m 30s	remaining: 3m 45s
615:	learn: 0.0596364	total: 12m 31s	remaining: 3m 44s
616:	learn: 0.0593738	total: 12m 32s	remaining: 3m 43s
617:	learn: 0.0592897	total: 12m 33s	remaining: 3m 41s
618:	learn: 0.0590777	total: 12m 34s	remaining: 3m 40s
619:	learn: 0.0589567	total: 12m 36s	remaining: 3m 39s
620:	learn: 0.0587800	total: 12m 37s	remaining: 3m 38s
621:	learn: 0.0586380	total: 12m 38s	remaining: 3m 37s
622:	learn: 0.0585044	total: 12m 39s	remaining: 3m 35s
623:	learn: 0.0583419	total: 12m 41s	remaining: 3m 34s
624:	learn: 0.0582250	total: 12m 42s	remaining: 3m 33s
625:	learn: 0.0580458	total: 12m 43s	remaining: 3m 32s
626:	learn: 0.0578335	total: 12m 44s	remaining: 3m 30s
627:	learn

760:	learn: 0.0416704	total: 15m 27s	remaining: 47.5s
761:	learn: 0.0416418	total: 15m 28s	remaining: 46.3s
762:	learn: 0.0415298	total: 15m 30s	remaining: 45.1s
763:	learn: 0.0414352	total: 15m 31s	remaining: 43.9s
764:	learn: 0.0414041	total: 15m 32s	remaining: 42.7s
765:	learn: 0.0413143	total: 15m 33s	remaining: 41.4s
766:	learn: 0.0411865	total: 15m 34s	remaining: 40.2s
767:	learn: 0.0411307	total: 15m 36s	remaining: 39s
768:	learn: 0.0410208	total: 15m 37s	remaining: 37.8s
769:	learn: 0.0409437	total: 15m 38s	remaining: 36.6s
770:	learn: 0.0408498	total: 15m 39s	remaining: 35.3s
771:	learn: 0.0407328	total: 15m 40s	remaining: 34.1s
772:	learn: 0.0406428	total: 15m 42s	remaining: 32.9s
773:	learn: 0.0405511	total: 15m 43s	remaining: 31.7s
774:	learn: 0.0404520	total: 15m 44s	remaining: 30.5s
775:	learn: 0.0403873	total: 15m 45s	remaining: 29.3s
776:	learn: 0.0403159	total: 15m 47s	remaining: 28s
777:	learn: 0.0402254	total: 15m 48s	remaining: 26.8s
778:	learn: 0.0401390	total: 15m