In [None]:
!pip install catboost

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from catboost.datasets import titanic
from library1 import *

In [None]:
train, test = titanic()
xcols = test.columns.tolist()
ycol = "Survived"

In [None]:
pipe1 = Pipeline([
    ('step1',   SimpleImputer(strategy="mean") ),
    ('step2',   numeric_filtering()  ),
    ('step3',   StandardScaler()  ),
]) 

pipe2 = Pipeline([
    ('step1',   SimpleImputer(strategy="most_frequent") ),
    ('step2',   categorical_filtering()  ),
    ('step3',   OneHotEncoder()  ),
])

transform = ColumnTransformer([
    ('num',  pipe1,  make_column_selector(dtype_include=np.number)),
    ('cat',  pipe2,  make_column_selector(dtype_exclude=np.number)),
])

pipe = Pipeline([
    ('transform',  transform ),
    ('model',      RandomForestClassifier() ),
])      

In [None]:
param_grid = {
            'model__max_depth':[4,5,6,7],
            'model__max_leaf_nodes':[3,5,7,9,10],
            'model__criterion':['entropy']
        }

In [None]:
gs = GridSearchCV(pipe,
                  param_grid=param_grid,
                  scoring='accuracy',
                  cv=5)
gs.fit(train[xcols],train[ycol])

In [None]:
pd.DataFrame(gs.cv_results_).sort_values('rank_test_score')

In [None]:
param_grid = {
            'model__max_depth':range(1,21),
            'model__max_leaf_nodes':range(5,101,5),
            'model__criterion':['entropy','gini']
        }

In [None]:
rs = RandomizedSearchCV(pipe,
                        param_distributions=param_grid,
                        n_iter=20,
                        scoring='accuracy',
                        cv=5)
rs.fit(train[xcols],train[ycol])

In [None]:
pd.DataFrame(rs.cv_results_).sort_values('rank_test_score')