In [None]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from cinnamon.drift import ModelDriftExplainer

RANDOM_SEED = 2021

# Boston Housing Data

In [None]:
boston = datasets.load_boston()

In [None]:
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)

In [None]:
boston_df.head()

In [None]:
boston_df.shape

# Build linear model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(boston_df, boston.target, test_size=0.3, random_state=RANDOM_SEED)

In [None]:
model = LinearRegression()

In [None]:
model.fit(X=X_train, y=y_train)

# ModelDriftExplainer

The experiment is done with a random split so we should not detect a data drift

In [None]:
# There is no specific parser in CinnaMon for scikit learn LinearRegression, but the model can be treated as as blackbox
# with model agnosticf methods. To do so, "task" parameter should be provided to ModelDriftExplainer.
drift_explainer = ModelDriftExplainer(model, task='regression')

In [None]:
drift_explainer.fit(X_train, X_test, y_train, y_test)

In [None]:
# no apparent drift in distributions of predictions
drift_explainer.plot_prediction_drift()
drift_explainer.get_prediction_drift()

In [None]:
# no apparent drift in distributions of target labels
drift_explainer.plot_target_drift()
drift_explainer.get_target_drift()

In [None]:
# here we would detect a data drift but the difference in performance metrics
# is due to comparing train and test dataset
drift_explainer.get_performance_metrics_drift()

In [None]:
# plot drift values computed with the tree based approach
drift_explainer.plot_model_agnostic_drift_values(type='wasserstein')
drift_explainer.get_model_agnostic_drift_values(type='wasserstein')

In [None]:
# no apparent drift for LSTAT the first feature indicated by drift values
drift_explainer.plot_feature_drift('LSTAT')
drift_explainer.get_feature_drift('LSTAT')

In [None]:
drift_explainer.get_feature_drift(0)

In [None]:
drift_explainer.get_feature_drifts()