In [None]:
import pandas as pd
import numpy as np
from sklearn.inspection import permutation_importance
from library import *
from sklearn.ensemble import RandomForestClassifier
from catboost.datasets import titanic


In [None]:
class pipeline_model:
    def __init__(self):
        
        pipe1 = Pipeline([
            ('step1',   SimpleImputer(strategy="mean") ),
            ('step2',   numeric_filtering()  ),
            ('step3',   StandardScaler()  ),
        ]) 
        
        pipe2 = Pipeline([
            ('step1',   SimpleImputer(strategy="most_frequent") ),
            ('step2',   categorical_filtering()  ),
            ('step3',   OneHotEncoder()  ),
        ])
        
        transform = ColumnTransformer([
            ('num',  pipe1,  make_column_selector(dtype_include=np.number)),
            ('cat',  pipe2,  make_column_selector(dtype_exclude=np.number)),
        ])
        
        self.pipe = Pipeline([
            ('transform',  transform     ),
            ('model',      RandomForestClassifier()    )
        ])
        
        
    def fit(self, X, y):
        self.columns = X.columns.tolist()
        self.pipe.fit(X, y)
        self.imp = permutation_importance(estimator = self.pipe,
                                          X = X, y = y, 
                                          scoring="accuracy", n_repeats=5 )
        return self
    
    def predict(self, X):
        return self.pipe.predict(X)
        
    def feature_importances(self):
        result = pd.DataFrame(self.imp['importances_mean'], 
                              index=self.columns, 
                              columns=['features'])
        return result

In [None]:
train, test = titanic()
ycol = 'Survived'
xcol = [col for col in train.columns if col not in [ycol]]

In [None]:
model = pipeline_model()
model.fit(train[xcol], train[ycol])

In [None]:
model.feature_importances().plot.barh()

In [None]:
pred = model.predict(test)
print("prediction for testset: ")
print(pred)