In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import cinnamon
from cinnamon.drift import ModelDriftExplainer

# Iris Data

In [None]:
dataset = datasets.load_iris()

In [None]:
X = pd.DataFrame(dataset.data, columns = dataset.feature_names)
y = dataset.target

In [None]:
X.head()

In [None]:
# This is multiclass classification pbm with 3 classes
np.unique(y, return_counts=True)

# Build Logistic Regression model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2021)

In [None]:
clf = LogisticRegression(max_iter=1000)

In [None]:
clf.fit(X=X_train, y=y_train)

# Analyze data drift with ModelDriftExplainer

The experiment is done with a random split so we should not detect a data drift

In [None]:
# There is no specific parser in CinnaMon for scikit learn LogisticRegression, but the model can be treated as as blackbox
# with model agnosticf methods. To do so, "task" parameter should be provided to ModelDriftExplainer.
drift_explainer = ModelDriftExplainer(clf, task='classification')

In [None]:
drift_explainer.fit(X1=X_train, X2=X_test, y1=y_train, y2=y_test)

In [None]:
# no apparent drift in distributions of predicted probabilities
cinnamon.plot_prediction_drift(drift_explainer, prediction_type='raw')
drift_explainer.get_prediction_drift(prediction_type='raw')

In [None]:
drift_explainer.get_target_drift()

In [None]:
# no apparent drift in distributions of target labels
cinnamon.plot_target_drift(drift_explainer)
drift_explainer.get_target_drift()

In [None]:
# plot drift values computed with the tree based approach (only for demonstration
# purpose since there is no data drift)
cinnamon.plot_model_agnostic_drift_values(drift_explainer, type='mean')
drift_explainer.get_model_agnostic_drift_values(type='mean')

In [None]:
drift_explainer.get_feature_drifts()