In [2]:
import sys

sys.path.append("/workspaces/lgcns-mlops-practice")


In [None]:
import os

import joblib
from datetime import datetime

import numpy as np
import pandas as pd
from deepchecks.tabular import Dataset
from deepchecks.tabular.suites import train_test_validation, model_evaluation

from src.common.constants import (
    ARTIFACT_PATH,
    DATA_PATH,
)
from src.preprocess import CAT_FEATURES

In [4]:
DATE = datetime.now().strftime("%Y%m%d")
LABEL_NAME = "rent"

In [23]:
train_df = pd.read_csv(
    os.path.join(DATA_PATH, "house_rent_train.csv"),
    usecols=lambda x: x not in ["area_locality", "posted_on", "id"],
)
new_df = pd.read_csv(
    os.path.join(DATA_PATH, "house_rent_new.csv"),
    usecols=lambda x: x not in ["area_locality", "posted_on", "id"],
)

## Data Drift

In [24]:
train_set = Dataset(
    train_df, 
    label=LABEL_NAME,
    cat_features=CAT_FEATURES,
    )
new_set = Dataset(
    new_df, 
    label=LABEL_NAME,
    cat_features=CAT_FEATURES,
    )

In [25]:
validation_suite = train_test_validation()
suite_result = validation_suite.run(train_set, new_set)

In [26]:
DRIFT_DETECTION_PATH = os.path.join(
    ARTIFACT_PATH, "drift_detection"
)

if not os.path.exists(DRIFT_DETECTION_PATH):
    os.makedirs(DRIFT_DETECTION_PATH)

In [None]:
for result in suite_result.get_not_passed_checks():
    print(result.header)
    print(result.conditions_results[0].details)

In [None]:
suite_result.save_as_html(
    os.path.join(DRIFT_DETECTION_PATH, f"{DATE}_drift_detection.html")
)

In [None]:
suite_result.show()

---

## Model Drift

In [30]:
from src.preprocess import preprocess_pipeline

In [31]:
model = joblib.load(os.path.join(ARTIFACT_PATH, "model.pkl"))

In [32]:
y_train = np.log1p(train_df[LABEL_NAME])
x_train = preprocess_pipeline.fit_transform(
    X=train_df.drop([LABEL_NAME], axis=1),
    y=y_train
)

y_new = np.log1p(new_df[LABEL_NAME])
x_new = preprocess_pipeline.fit_transform(
    X=new_df.drop([LABEL_NAME], axis=1),
    y=y_new
)

In [33]:
train_set = Dataset(
    x_train,
    label=y_train,
    cat_features=CAT_FEATURES,
)
new_set = Dataset(
    x_new,
    label=y_new,
    cat_features=CAT_FEATURES,
)

In [None]:
evaluation_suite = model_evaluation()
suite_result = evaluation_suite.run(train_set, new_set, model["regr"])

In [None]:
suite_result.show()

In [None]:
for result in suite_result.get_not_passed_checks():
    print(
        "The following test failed!\n"
        f"{result.header}: {result.conditions_results[0].details}\n"
    )