The experiment is done with a random split so we should not detect a data drift

In [None]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor

import cinnamon
from cinnamon.drift import ModelDriftExplainer

# Boston Data

In [None]:
boston = datasets.load_boston()

In [None]:
boston_df = pd.DataFrame(boston.data, columns = boston.feature_names)

In [None]:
boston_df.head()

# Build CatBoost model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(boston_df, boston.target, test_size=0.3, random_state=2021)

In [None]:
model = CatBoostRegressor(loss_function='RMSE',
                         learning_rate=0.1,
                         iterations=1000,
                         max_depth=6,
                         early_stopping_rounds=20,
                         random_seed=2021,
                         verbose=10
                       )

In [None]:
model.fit(X=X_train, y=y_train, eval_set=[(X_test, y_test)])

# Analyze data drift with ModelDriftExplainer

In [None]:
drift_explainer = ModelDriftExplainer(model)
drift_explainer.fit(X1=X_train, X2=X_test, y1=y_train, y2=y_test)

In [None]:
cinnamon.plot_prediction_drift(drift_explainer)
drift_explainer.get_prediction_drift()

In [None]:
cinnamon.plot_target_drift(drift_explainer)
drift_explainer.get_target_drift()

In [None]:
drift_explainer.get_performance_metrics_drift()

In [None]:
# plot drift values computed with the tree based approach
cinnamon.plot_tree_based_drift_importances(drift_explainer, type='node_size')

In [None]:
drift_explainer.get_tree_based_drift_importances(type='mean')

In [None]:
# plot drift values computed with the model agnostic approach
cinnamon.plot_model_agnostic_drift_importances(drift_explainer, type='mean')
drift_explainer.get_model_agnostic_drift_importances(type='mean')

In [None]:
cinnamon.plot_feature_drift(drift_explainer, 'LSTAT')
drift_explainer.get_feature_drift('LSTAT')