In [None]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

from cinnamon.drift import OutputDriftExplainer

RANDOM_SEED = 2021

# Boston Housing Data

In [None]:
boston = datasets.load_boston()

In [None]:
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)

In [None]:
boston_df.head()

In [None]:
boston_df.shape

# Build XGBoost model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(boston_df, boston.target, test_size=0.3, random_state=RANDOM_SEED)

In [None]:
model = XGBRegressor(n_estimators=1000,
                   booster="gbtree",
                   objective="reg:squarederror",
                   learning_rate=0.05,
                   max_depth=6,
                   seed=RANDOM_SEED,
                   use_label_encoder=False)

In [None]:
model.fit(X=X_train, y=y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=20, verbose=10)

# OutputDriftExplainer

The experiment is done with a random split so we should not detect a data drift

In [None]:
output_drift_explainer = OutputDriftExplainer(task='regression')

In [None]:
output_drift_explainer.fit(model.predict(X_train), model.predict(X_test), y_train, y_test)

In [None]:
output_drift_explainer.plot_prediction_drift()
output_drift_explainer.get_prediction_drift()

In [None]:
output_drift_explainer.plot_target_drift()
output_drift_explainer.get_target_drift()

In [None]:
output_drift_explainer.get_performance_metrics_drift()